module Makuri::Spider

Include this module in your spider class

Attributes

engine[RW]
response[RW]

Public Class Methods

included(base) click to toggle source
# File lib/makuri/spider.rb, line 10
def self.included(base)
  base.extend(ClassMethods)
end
new(**config) click to toggle source
# File lib/makuri/spider.rb, line 36
def initialize(**config)
  @start_url = config.fetch(:start_url, nil)
  @engine    = config.fetch(:engine, :net_http)

  update_response(@start_url)
end

Public Instance Methods

absolute_url(relative_url) click to toggle source
# File lib/makuri/spider.rb, line 64
def absolute_url(relative_url)
  Addressable::URI.join(base_url, relative_url).to_s
end
browser() click to toggle source
# File lib/makuri/spider.rb, line 43
def browser
  @browser ||= Makuri::Browser.new(engine: engine)
end
parse() click to toggle source
# File lib/makuri/spider.rb, line 47
def parse
  raise NotImplementedError, "Define #parse method for #{self.class}."
end
request_to(handler, **params) click to toggle source
# File lib/makuri/spider.rb, line 51
def request_to(handler, **params)
  if valid_url? params[:url]
    update_response(params[:url])
    params.delete :url
  end

  if params.empty?
    public_send handler
  else
    public_send handler, params
  end
end

Private Instance Methods

base_url() click to toggle source
# File lib/makuri/spider.rb, line 70
def base_url
  @start_url || browser.url.to_s
end
update_response(url) click to toggle source
# File lib/makuri/spider.rb, line 78
def update_response(url)
  res = browser.request(absolute_url(url)).html
  # res = res.html if @engine == :chrome
  @response = Nokogiri::HTML(res)
end
valid_url?(url) click to toggle source
# File lib/makuri/spider.rb, line 74
def valid_url?(url)
  defined?(url) && !url.to_s.empty?
end