class Vessel::Scheduler
Attributes
browser[R]
delay[R]
headers[R]
queue[R]
Public Class Methods
new(queue, settings)
click to toggle source
# File lib/vessel/scheduler.rb, line 13 def initialize(queue, settings) @queue = queue @min_threads, @max_threads, @delay, @headers = settings.values_at(:min_threads, :max_threads, :delay, :headers) options = settings[:ferrum] options.merge!(timeout: settings[:timeout]) if settings[:timeout] @browser = Ferrum::Browser.new(**options) if settings[:intercept] @browser.network.intercept @browser.on(:request, &settings[:intercept]) end end
Public Instance Methods
post(*requests)
click to toggle source
# File lib/vessel/scheduler.rb, line 28 def post(*requests) requests.map do |request| Concurrent::Promises.future_on(pool, queue, request) do |queue, request| queue << goto(request) end end end
stop()
click to toggle source
# File lib/vessel/scheduler.rb, line 36 def stop pool.shutdown pool.kill unless pool.wait_for_termination(30) browser.quit end
Private Instance Methods
goto(request)
click to toggle source
# File lib/vessel/scheduler.rb, line 52 def goto(request) return [nil, request] if request.stub? page = browser.create_page page.headers.set(headers) if headers # Delay is set between requests when we don't want to bombard server with # requests so it requires crawler to be single threaded. Otherwise doesn't # make sense. sleep(delay) if @max_threads == 1 && delay > 0 page.goto(request.url) [page, request] rescue => e e end
pool()
click to toggle source
# File lib/vessel/scheduler.rb, line 44 def pool @pool ||= Concurrent::ThreadPoolExecutor.new( max_queue: 0, min_threads: @min_threads, max_threads: @max_threads ) end