class HtmlEntry::Page::EntityFetcher
This entity-html_entry class designed for reading data from HTML/XML block according to instructions
@see tests/html_entry/page/test_entity_fetcher.rb
Attributes
Get instructions
@return [Array]
Public Class Methods
Init
# File lib/html_entry/page/entity_fetcher.rb, line 30 def initialize @selector_cache ||= {} end
Public Instance Methods
Fetch data from document
@param [Nokogiri::HTML::Document, Nokogiri::XML::Element] document @param [TrueClass, FalseClass] plenty Get plenty of elements
or the only one
@return [Hash, Array]
# File lib/html_entry/page/entity_fetcher.rb, line 84 def fetch(document:, plenty: false) if plenty fetch_plenty(document) else fetch_single(document) end end
Fetch collection data from document
@param [Nokogiri::HTML::Document, Nokogiri::XML::Element] document @return [Hash]
# File lib/html_entry/page/entity_fetcher.rb, line 129 def fetch_plenty(document) unless instructions.instance_of? Array raise 'Instructions must be an array.' end collectors, data = process_instructions(document) collectors.each do |_i, collector| # @type [HtmlEntry::Page::ValuesCollector] collector data.push collector.data end data end
Fetch single data from document
@param [Nokogiri::HTML::Document, Nokogiri::XML::Element] document @return [Hash]
# File lib/html_entry/page/entity_fetcher.rb, line 98 def fetch_single(document) collector = get_values_collector(document) instructions.each do |instruction| node = Page.fetch_node(document, instruction) next unless instruction[:data] instruction[:data].each do |name, data_instruction| collector.fetch name, data_instruction, node end end collector.data end
Get value collector
@param [Nokogiri::HTML::Document, Nokogiri::XML::Element] document @return [Page::ValuesCollector]
# File lib/html_entry/page/entity_fetcher.rb, line 119 def get_values_collector(document) Page::ValuesCollector.new document: document, instructions: instructions end
Set instructions
Example for reading simple text by CSS selector: {
:name1 => { :type => :instruction, :selector => '.test-block a.deep-in', }
} There are filters allowed for type :instruction :
-
:node_text, returns XML of found node
-
:node, returns object Nokogiri::XML::Element of found node
-
:no_strip, returns non-stripped text
-
by default it use .strip for found text
Example for calculating instruction according to fetch fields: {
:vote_up => { :type => :instruction, :selector => '.vote-up', }, :vote_down => { :type => :instruction, :selector => '.vote-down', }, :vote_diff => { :type => :function, :function => Proc.new { |info, name, document, instruction| info[:vote_up].to_i - info[:vote_down].to_i }, }
}
@param [Array] instructions @return [self]
# File lib/html_entry/page/entity_fetcher.rb, line 70 def instructions=(instructions) instructions = [instructions] unless instructions.instance_of? Array @instructions = instructions end
Protected Instance Methods
Check if merge nodes data must disabled
# File lib/html_entry/page/entity_fetcher.rb, line 189 def data_has_option?(instruction, option:, value:) return false if instruction.key :merge !instruction[:data].select! do |_k, el| (el.is_a?(Hash) && (el[option] == value)) end.nil? end
# File lib/html_entry/page/entity_fetcher.rb, line 146 def process_instructions(document) data = [] # @type [HtmlEntry::Page::ValuesCollector[]] collectors collectors = {} instructions.each do |instruction| raise 'Instruction must be Hash.' unless instruction.instance_of? Hash nodes = retrieve_nodes(document, instruction) nodes.each_with_index do |node, i| process_node(document, node, instruction, collectors, i) end end [collectors, data] end
# File lib/html_entry/page/entity_fetcher.rb, line 169 def process_node(document, node, instruction, collectors, index) if instruction[:merge] # gather items under the same collector index = 0 end unless collectors.key? index collectors[index] = get_values_collector(document) end return unless instruction[:data] instruction[:data].each do |name, data_instruction| collectors[index].fetch name, data_instruction, node end end
@param [Array, Nil] instruction
# File lib/html_entry/page/entity_fetcher.rb, line 161 def retrieve_nodes(document, instruction) nodes = Page.fetch_nodes(document, instruction) if nodes.nil? || instruction[:allow_empty] && nodes.count.zero? nodes = [nil] end nodes end