class HtmlEntry::Page::EntityFetcher

This entity-html_entry class designed for reading data from HTML/XML block according to instructions

@see tests/html_entry/page/test_entity_fetcher.rb

Attributes

instructions[R]

Get instructions

@return [Array]

Public Class Methods

new() click to toggle source

Init

# File lib/html_entry/page/entity_fetcher.rb, line 30
def initialize
  @selector_cache ||= {}
end

Public Instance Methods

fetch(document:, plenty: false) click to toggle source

Fetch data from document

@param [Nokogiri::HTML::Document, Nokogiri::XML::Element] document @param [TrueClass, FalseClass] plenty Get plenty of elements

or the only one

@return [Hash, Array]

# File lib/html_entry/page/entity_fetcher.rb, line 84
def fetch(document:, plenty: false)
  if plenty
    fetch_plenty(document)
  else
    fetch_single(document)
  end
end
fetch_plenty(document) click to toggle source

Fetch collection data from document

@param [Nokogiri::HTML::Document, Nokogiri::XML::Element] document @return [Hash]

# File lib/html_entry/page/entity_fetcher.rb, line 129
def fetch_plenty(document)
  unless instructions.instance_of? Array
    raise 'Instructions must be an array.'
  end

  collectors, data = process_instructions(document)

  collectors.each do |_i, collector|
    # @type [HtmlEntry::Page::ValuesCollector] collector
    data.push collector.data
  end

  data
end
fetch_single(document) click to toggle source

Fetch single data from document

@param [Nokogiri::HTML::Document, Nokogiri::XML::Element] document @return [Hash]

# File lib/html_entry/page/entity_fetcher.rb, line 98
def fetch_single(document)
  collector = get_values_collector(document)

  instructions.each do |instruction|
    node = Page.fetch_node(document, instruction)

    next unless instruction[:data]
    instruction[:data].each do |name, data_instruction|
      collector.fetch name, data_instruction, node
    end
  end

  collector.data
end
get_values_collector(document) click to toggle source

Get value collector

@param [Nokogiri::HTML::Document, Nokogiri::XML::Element] document @return [Page::ValuesCollector]

# File lib/html_entry/page/entity_fetcher.rb, line 119
def get_values_collector(document)
  Page::ValuesCollector.new document:     document,
                            instructions: instructions
end
instructions=(instructions) click to toggle source

Set instructions

Example for reading simple text by CSS selector: {

:name1 => {
  :type     => :instruction,
  :selector => '.test-block a.deep-in',
}

} There are filters allowed for type :instruction :

  • :node_text, returns XML of found node

  • :node, returns object Nokogiri::XML::Element of found node

  • :no_strip, returns non-stripped text

  • by default it use .strip for found text

Example for calculating instruction according to fetch fields: {

:vote_up   => {
  :type     => :instruction,
  :selector => '.vote-up',
},
  :vote_down => {
  :type     => :instruction,
  :selector => '.vote-down',
},
  :vote_diff => {
  :type     => :function,
  :function => Proc.new { |info, name, document, instruction|
    info[:vote_up].to_i - info[:vote_down].to_i
  },
}

}

@param [Array] instructions @return [self]

# File lib/html_entry/page/entity_fetcher.rb, line 70
def instructions=(instructions)
  instructions = [instructions] unless instructions.instance_of? Array

  @instructions = instructions
end

Protected Instance Methods

data_has_option?(instruction, option:, value:) click to toggle source

Check if merge nodes data must disabled

# File lib/html_entry/page/entity_fetcher.rb, line 189
def data_has_option?(instruction, option:, value:)
  return false if instruction.key :merge

  !instruction[:data].select! do |_k, el|
    (el.is_a?(Hash) && (el[option] == value))
  end.nil?
end
process_instructions(document) click to toggle source
# File lib/html_entry/page/entity_fetcher.rb, line 146
def process_instructions(document)
  data = []
  # @type [HtmlEntry::Page::ValuesCollector[]] collectors
  collectors = {}
  instructions.each do |instruction|
    raise 'Instruction must be Hash.' unless instruction.instance_of? Hash
    nodes = retrieve_nodes(document, instruction)
    nodes.each_with_index do |node, i|
      process_node(document, node, instruction, collectors, i)
    end
  end
  [collectors, data]
end
process_node(document, node, instruction, collectors, index) click to toggle source
# File lib/html_entry/page/entity_fetcher.rb, line 169
def process_node(document, node, instruction, collectors, index)
  if instruction[:merge]
    # gather items under the same collector
    index = 0
  end

  unless collectors.key? index
    collectors[index] = get_values_collector(document)
  end

  return unless instruction[:data]

  instruction[:data].each do |name, data_instruction|
    collectors[index].fetch name, data_instruction, node
  end
end
retrieve_nodes(document, instruction) click to toggle source

@param [Array, Nil] instruction

# File lib/html_entry/page/entity_fetcher.rb, line 161
def retrieve_nodes(document, instruction)
  nodes = Page.fetch_nodes(document, instruction)
  if nodes.nil? || instruction[:allow_empty] && nodes.count.zero?
    nodes = [nil]
  end
  nodes
end