class HtmlEntry::PageFetcher
Page
fetcher
Attributes
instructions[RW]
Set instructions
@param [Hash] instructions @return [self]
Public Instance Methods
fetch(document)
click to toggle source
Fetch entities from document
@param [Nokogiri::HTML::Document] document @return [Hash]
# File lib/html_entry/page_fetcher.rb, line 27 def fetch(document) items = [] if instructions[:block].nil? # "block" instructions is not defined block_document = if document.instance_of?(Nokogiri::HTML::Document) fetch_block_document( document, type: :selector, selector: 'body' ).first else document end fetch_data(block_document, instructions[:entity]).each do |element| items.push element end else # fetch each "block" and process entities fetch_block_document(document, instructions[:block]).each do |block_document| fetch_data(block_document, instructions[:entity]).each do |element| items.push element end end end items end
last_page?(document)
click to toggle source
Check if it's a last page
@param [Nokogiri::HTML::Document] document @return [TrueClass, FalseClass]
# File lib/html_entry/page_fetcher.rb, line 61 def last_page?(document) if instructions[:last_page][:type] == :function !!call_function(document, instructions[:last_page]) else Page.fetch_nodes(document, instructions[:last_page]).count > 0 end end
Protected Instance Methods
call_function(document, instruction)
click to toggle source
Call custom function
@param [Nokogiri::HTML::Document] document @param [Hash] instruction @return [*]
# File lib/html_entry/page_fetcher.rb, line 105 def call_function(document, instruction) instruction[:function].call document, instruction end
fetch_block_document(document, instructions)
click to toggle source
Fetch entities on a page
@param [Nokogiri::HTML::Document] document @return [Nokogiri::XML::NodeSet]
# File lib/html_entry/page_fetcher.rb, line 90 def fetch_block_document(document, instructions) raise 'Instructions are not set.' if instructions.nil? return call_function(document, instructions) if instructions[:type] == :function Page.fetch_nodes(document, instructions) end
fetch_data(entity_document, instructions)
click to toggle source
Fetch entity data
@param [Nokogiri::XML::Element] entity_document @param [Hash] instructions @return [Hash]
# File lib/html_entry/page_fetcher.rb, line 78 def fetch_data(entity_document, instructions) fetcher = Page::EntityFetcher.new fetcher.instructions = instructions fetcher.fetch(document: entity_document, plenty: true) end