class Raev::Article
Constants
- REGEX_NODE_EMPTY
Attributes
body[R]
doc[R]
Public Class Methods
new(body)
click to toggle source
# File lib/raev/article.rb, line 10 def initialize(body) @doc = Nokogiri::HTML::DocumentFragment.parse( replace_non_breaking_space(body) ) replace_divs_with_paragraphs(@doc) remove_empty_paragraphs(@doc) remove_extra_linebreaks(@doc) @body = @doc.to_s.gsub("\n".freeze, "".freeze) end
Private Instance Methods
node_empty?(node)
click to toggle source
# File lib/raev/article.rb, line 58 def node_empty?(node) node.element_children.empty? && REGEX_NODE_EMPTY.match(node.inner_text) end
remove_empty_paragraphs(doc)
click to toggle source
# File lib/raev/article.rb, line 36 def remove_empty_paragraphs(doc) doc.css("p".freeze).each do |node| if node_empty?(node) node.remove end end end
remove_extra_linebreaks(doc)
click to toggle source
# File lib/raev/article.rb, line 44 def remove_extra_linebreaks(doc) doc.css("br".freeze).each do |node| next_node = node.next if next_node if next_node.matches?("br".freeze) || node_empty?(next_node) node.remove end else node.remove end end end
replace_divs_with_paragraphs(doc)
click to toggle source
# File lib/raev/article.rb, line 28 def replace_divs_with_paragraphs(doc) doc.css("div".freeze).each do |node| if node.css("p".freeze).length == 0 node.name = "p".freeze end end end
replace_non_breaking_space(str)
click to toggle source
# File lib/raev/article.rb, line 24 def replace_non_breaking_space(str) str.gsub(" ".freeze, " ".freeze) end