class HtmlEntry::PageFetcher

Page fetcher

Attributes

instructions[RW]

Set instructions

@param [Hash] instructions @return [self]

Public Instance Methods

fetch(document) click to toggle source

Fetch entities from document

@param [Nokogiri::HTML::Document] document @return [Hash]

# File lib/html_entry/page_fetcher.rb, line 27
def fetch(document)
  items = []
  if instructions[:block].nil?
    # "block" instructions is not defined
    block_document = if document.instance_of?(Nokogiri::HTML::Document)
                       fetch_block_document(
                           document,
                           type:     :selector,
                           selector: 'body'
                       ).first
                     else
                       document
                     end

    fetch_data(block_document, instructions[:entity]).each do |element|
      items.push element
    end
  else
    # fetch each "block" and process entities
    fetch_block_document(document, instructions[:block]).each do |block_document|
      fetch_data(block_document, instructions[:entity]).each do |element|
        items.push element
      end
    end
  end
  items
end
last_page?(document) click to toggle source

Check if it's a last page

@param [Nokogiri::HTML::Document] document @return [TrueClass, FalseClass]

# File lib/html_entry/page_fetcher.rb, line 61
def last_page?(document)
  if instructions[:last_page][:type] == :function
    !!call_function(document, instructions[:last_page])
  else
    Page.fetch_nodes(document, instructions[:last_page]).count > 0
  end
end

Protected Instance Methods

call_function(document, instruction) click to toggle source

Call custom function

@param [Nokogiri::HTML::Document] document @param [Hash] instruction @return [*]

# File lib/html_entry/page_fetcher.rb, line 105
def call_function(document, instruction)
  instruction[:function].call document, instruction
end
fetch_block_document(document, instructions) click to toggle source

Fetch entities on a page

@param [Nokogiri::HTML::Document] document @return [Nokogiri::XML::NodeSet]

# File lib/html_entry/page_fetcher.rb, line 90
def fetch_block_document(document, instructions)
  raise 'Instructions are not set.' if instructions.nil?

  return call_function(document, instructions) if instructions[:type] == :function

  Page.fetch_nodes(document, instructions)
end
fetch_data(entity_document, instructions) click to toggle source

Fetch entity data

@param [Nokogiri::XML::Element] entity_document @param [Hash] instructions @return [Hash]

# File lib/html_entry/page_fetcher.rb, line 78
def fetch_data(entity_document, instructions)
  fetcher              = Page::EntityFetcher.new
  fetcher.instructions = instructions
  fetcher.fetch(document: entity_document, plenty: true)
end