class Pincers::Nokogiri::Backend
Constants
- BOOL_PROPERTIES
This is a small bool properties subset, I believe its enough for scrapping. For information of where to find the full list: stackoverflow.com/questions/706384/boolean-html-attributes
Public Class Methods
new(_document)
click to toggle source
# File lib/pincers/nokogiri/backend.rb, line 17 def initialize(_document) @document = _document end
Public Instance Methods
close_document()
click to toggle source
# File lib/pincers/nokogiri/backend.rb, line 33 def close_document # no closing needed end
document()
click to toggle source
# File lib/pincers/nokogiri/backend.rb, line 21 def document @document end
document_root()
click to toggle source
# File lib/pincers/nokogiri/backend.rb, line 25 def document_root [document] end
document_title()
click to toggle source
# File lib/pincers/nokogiri/backend.rb, line 29 def document_title document.title end
extract_element_attribute(_element, _name)
click to toggle source
# File lib/pincers/nokogiri/backend.rb, line 59 def extract_element_attribute(_element, _name) _name = _name.to_sym if _name == :value case classify _element when :input_checkbox, :input_radio extract_checkable_value _element when :select extract_select_value _element when :option extract_option_value _element when :textarea _element.content else _element[:value] end elsif is_boolean? _element, _name !_element[_name].nil? else _element[_name] end end
extract_element_html(_element)
click to toggle source
# File lib/pincers/nokogiri/backend.rb, line 55 def extract_element_html(_element) _element.to_html end
extract_element_tag(_element)
click to toggle source
# File lib/pincers/nokogiri/backend.rb, line 47 def extract_element_tag(_element) _element.name end
extract_element_text(_element)
click to toggle source
# File lib/pincers/nokogiri/backend.rb, line 51 def extract_element_text(_element) _element.content end
search_by_css(_element, _selector, _limit)
click to toggle source
# File lib/pincers/nokogiri/backend.rb, line 37 def search_by_css(_element, _selector, _limit) # nokogiri does not do any query level optimization when searching just one node _element.css _selector end
search_by_xpath(_element, _selector, _limit)
click to toggle source
# File lib/pincers/nokogiri/backend.rb, line 42 def search_by_xpath(_element, _selector, _limit) # nokogiri does not do any query level optimization when searching just one node _element.xpath _selector end
set_element_attribute(_element, _name, _value)
click to toggle source
# File lib/pincers/nokogiri/backend.rb, line 81 def set_element_attribute(_element, _name, _value) _name = _name.to_sym if _name == :value case classify _element when :select set_select_value _element, _value when :textarea _element.content = _value else _element.set_attribute(_name, _value) end elsif is_boolean? _element, _name set_boolean _element, _name, _value else _element.set_attribute(_name, _value) end end
Private Instance Methods
classify(_element)
click to toggle source
# File lib/pincers/nokogiri/backend.rb, line 102 def classify(_element) name = _element.name name = "input_#{(_element[:type] || 'text')}" if name == 'input' name = "button_#{(_element[:type] || 'submit')}" if name == 'button' name.to_sym end
extract_checkable_value(_element)
click to toggle source
# File lib/pincers/nokogiri/backend.rb, line 116 def extract_checkable_value(_element) value = _element[:value] value || 'on' end
extract_option_value(_element)
click to toggle source
# File lib/pincers/nokogiri/backend.rb, line 131 def extract_option_value(_element) return nil if _element.nil? _element[:value] || _element.content end
extract_select_value(_element)
click to toggle source
# File lib/pincers/nokogiri/backend.rb, line 121 def extract_select_value(_element) multiple = !_element[:multiple].nil? selected = _element.css('option[selected]') if multiple selected.map { |o| extract_option_value(o) } else extract_option_value(selected.first) end end
is_boolean?(_element, _name)
click to toggle source
# File lib/pincers/nokogiri/backend.rb, line 109 def is_boolean?(_element, _name) permitted = BOOL_PROPERTIES[_name] return false if permitted.nil? return true if permitted == :all return permitted.include? classify(_element) end
set_boolean(_element, _name, _value)
click to toggle source
# File lib/pincers/nokogiri/backend.rb, line 143 def set_boolean(_element, _name, _value) if _value _element.set_attribute(_name, _name) else _element.remove_attribute(_name.to_s) end end
set_select_value(_element, _value)
click to toggle source
# File lib/pincers/nokogiri/backend.rb, line 136 def set_select_value(_element, _value) _element.xpath(".//option[@selected]").each { |o| set_boolean(o, :selected, false) } to_select = _element.at_xpath(".//option[@value='#{_value}']") to_select = _element.at_xpath(".//option[text()='#{_value}']") if to_select.nil? set_boolean(to_select, :selected, true) unless to_select.nil? end