class Raev::Article

Constants

REGEX_NODE_EMPTY

Attributes

body[R]
doc[R]

Public Class Methods

new(body) click to toggle source
# File lib/raev/article.rb, line 10
def initialize(body)
  @doc = Nokogiri::HTML::DocumentFragment.parse(
    replace_non_breaking_space(body)
  )
  
  replace_divs_with_paragraphs(@doc)
  remove_empty_paragraphs(@doc)
  remove_extra_linebreaks(@doc)

  @body = @doc.to_s.gsub("\n".freeze, "".freeze)
end

Private Instance Methods

node_empty?(node) click to toggle source
# File lib/raev/article.rb, line 58
def node_empty?(node)
  node.element_children.empty? && REGEX_NODE_EMPTY.match(node.inner_text)
end
remove_empty_paragraphs(doc) click to toggle source
# File lib/raev/article.rb, line 36
def remove_empty_paragraphs(doc)
  doc.css("p".freeze).each do |node|
    if node_empty?(node)
      node.remove
    end
  end
end
remove_extra_linebreaks(doc) click to toggle source
# File lib/raev/article.rb, line 44
def remove_extra_linebreaks(doc)
  doc.css("br".freeze).each do |node|
    next_node = node.next
    
    if next_node
      if next_node.matches?("br".freeze) || node_empty?(next_node)
        node.remove
      end
    else
      node.remove
    end
  end
end
replace_divs_with_paragraphs(doc) click to toggle source
# File lib/raev/article.rb, line 28
def replace_divs_with_paragraphs(doc)
  doc.css("div".freeze).each do |node|
    if node.css("p".freeze).length == 0        
      node.name = "p".freeze
    end
  end
end
replace_non_breaking_space(str) click to toggle source
# File lib/raev/article.rb, line 24
def replace_non_breaking_space(str)
  str.gsub(" ".freeze, " ".freeze)
end