module Makuri::Spider
Include this module in your spider class
Attributes
engine[RW]
response[RW]
Public Class Methods
included(base)
click to toggle source
# File lib/makuri/spider.rb, line 10 def self.included(base) base.extend(ClassMethods) end
new(**config)
click to toggle source
# File lib/makuri/spider.rb, line 36 def initialize(**config) @start_url = config.fetch(:start_url, nil) @engine = config.fetch(:engine, :net_http) update_response(@start_url) end
Public Instance Methods
absolute_url(relative_url)
click to toggle source
# File lib/makuri/spider.rb, line 64 def absolute_url(relative_url) Addressable::URI.join(base_url, relative_url).to_s end
browser()
click to toggle source
# File lib/makuri/spider.rb, line 43 def browser @browser ||= Makuri::Browser.new(engine: engine) end
parse()
click to toggle source
# File lib/makuri/spider.rb, line 47 def parse raise NotImplementedError, "Define #parse method for #{self.class}." end
request_to(handler, **params)
click to toggle source
# File lib/makuri/spider.rb, line 51 def request_to(handler, **params) if valid_url? params[:url] update_response(params[:url]) params.delete :url end if params.empty? public_send handler else public_send handler, params end end
Private Instance Methods
base_url()
click to toggle source
# File lib/makuri/spider.rb, line 70 def base_url @start_url || browser.url.to_s end
update_response(url)
click to toggle source
# File lib/makuri/spider.rb, line 78 def update_response(url) res = browser.request(absolute_url(url)).html # res = res.html if @engine == :chrome @response = Nokogiri::HTML(res) end
valid_url?(url)
click to toggle source
# File lib/makuri/spider.rb, line 74 def valid_url?(url) defined?(url) && !url.to_s.empty? end