class Slasher::DOM
Constants
- REMOVED_ELEMENTS
- STRIPPED_ELEMENTS
Attributes
document[RW]
Public Class Methods
new(document)
click to toggle source
# File lib/slasher/dom.rb, line 10 def initialize(document) @document = Nokogiri::HTML(document) end
Public Instance Methods
get_paragraphs_content(node)
click to toggle source
# File lib/slasher/dom.rb, line 28 def get_paragraphs_content(node) node.send(:>, "p").map do |p| p.text p.remove end.join(" ") end
get_texts(node)
click to toggle source
# File lib/slasher/dom.rb, line 35 def get_texts(node) node.children.map do |child| child.text.delete("\n").strip if child.text? end.join end
remove_elements()
click to toggle source
# File lib/slasher/dom.rb, line 14 def remove_elements REMOVED_ELEMENTS.each do |element| @document.xpath("//#{element}").remove end end
strip_elements()
click to toggle source
# File lib/slasher/dom.rb, line 20 def strip_elements STRIPPED_ELEMENTS.each do |element| @document.search("//#{element}").each do |node| node.replace(Nokogiri::XML::Text.new(node.text, node.document)) end end end