class Reid
Public Class Methods
new(requester_options = {})
click to toggle source
# File lib/reid/reid.rb, line 4 def initialize(requester_options = {}) @requester = Requester.new(requester_options) end
Public Instance Methods
crawl(url_crawler, operations, store_function)
click to toggle source
# File lib/reid/reid.rb, line 24 def crawl url_crawler, operations, store_function doc = nil while(url = url_crawler.next(doc)) doc = @requester.request url store_function.call(scrape_doc(doc, operations)) end end
scrape_doc(doc, operations)
click to toggle source
# File lib/reid/reid.rb, line 8 def scrape_doc doc, operations record = {} operations.each do |e| if e[3] == :xpath e[0].call(doc.xpath(e[1]), record) else e[0].call(doc.css(e[1]), record) end end return record end
scrape_page(url, operations)
click to toggle source
# File lib/reid/reid.rb, line 20 def scrape_page url, operations return scrape_doc(@requester.request(url), operations) end