class Slasher::DOM

Constants

REMOVED_ELEMENTS
STRIPPED_ELEMENTS

Attributes

document[RW]

Public Class Methods

new(document) click to toggle source
# File lib/slasher/dom.rb, line 10
def initialize(document)
  @document = Nokogiri::HTML(document)
end

Public Instance Methods

get_paragraphs_content(node) click to toggle source
# File lib/slasher/dom.rb, line 28
def get_paragraphs_content(node)
  node.send(:>, "p").map do |p|
    p.text
    p.remove
  end.join(" ")
end
get_texts(node) click to toggle source
# File lib/slasher/dom.rb, line 35
def get_texts(node)
  node.children.map do |child|
    child.text.delete("\n").strip if child.text?
  end.join
end
remove_elements() click to toggle source
# File lib/slasher/dom.rb, line 14
def remove_elements
  REMOVED_ELEMENTS.each do |element|
    @document.xpath("//#{element}").remove
  end
end
strip_elements() click to toggle source
# File lib/slasher/dom.rb, line 20
def strip_elements
  STRIPPED_ELEMENTS.each do |element|
    @document.search("//#{element}").each do |node|
      node.replace(Nokogiri::XML::Text.new(node.text, node.document))
    end
  end
end