module Horsefield::Scraper

Public Class Methods

included(base) click to toggle source
# File lib/horsefield/scraper.rb, line 14
def self.included(base)
  base.extend(ClassMethods)
end
new(html_xml_or_url, remove_namespaces: false) click to toggle source
# File lib/horsefield/scraper.rb, line 18
def initialize(html_xml_or_url, remove_namespaces: false)
  html_xml_or_url = open(html_xml_or_url).read if html_xml_or_url =~ /\A#{URI::regexp}\Z/

  @doc = if html_xml_or_url =~ /\A<\?xml/
           doc = Nokogiri::XML(html_xml_or_url)
           doc = doc.remove_namespaces! if remove_namespaces
           doc
         else
           Nokogiri::HTML(html_xml_or_url)
         end
end
scrape(html_or_url, &block) click to toggle source
# File lib/horsefield/scraper.rb, line 7
def self.scrape(html_or_url, &block)
  Class.new {
    include Horsefield::Scraper
    instance_eval(&block)
  }.new(html_or_url).scrape
end

Public Instance Methods

[](field) click to toggle source
# File lib/horsefield/scraper.rb, line 30
def [](field)
  fields[field]
end
fields() click to toggle source
# File lib/horsefield/scraper.rb, line 38
def fields
  @fields ||= self.class.lookups.reduce({}) { |fields, l| fields.merge(l.call(@doc)) }.
    instance_eval(&self.class.postprocessor)
end
scrape() click to toggle source
# File lib/horsefield/scraper.rb, line 34
def scrape
  fields
end