class Vessel::Engine
Attributes
crawler_class[R]
middleware[R]
scheduler[R]
settings[R]
Public Class Methods
new(klass, &block)
click to toggle source
# File lib/vessel/engine.rb, line 11 def initialize(klass, &block) @crawler_class = klass @settings = klass.settings @middleware = block || Middleware.build(*settings[:middleware]) @queue = SizedQueue.new(settings[:max_threads]) @scheduler = Scheduler.new(@queue, settings) end
run(*args, &block)
click to toggle source
# File lib/vessel/engine.rb, line 5 def self.run(*args, &block) new(*args, &block).tap(&:run) end
Public Instance Methods
handle(page, args)
click to toggle source
# File lib/vessel/engine.rb, line 38 def handle(page, args) crawler = @crawler_class.new(page) crawler.send(*args) do |*result| if result.flatten.all? { |i| i.is_a?(Request) } scheduler.post(*result.flatten) else @middleware&.call(*result) end end ensure page.close if page end
run()
click to toggle source
# File lib/vessel/engine.rb, line 19 def run scheduler.post(*start_requests) until @queue.closed? message = @queue.pop raise(message) if message.is_a?(Exception) page, request = message args = [request.method, request.data].compact handle(page, args) @queue.close if idle? end ensure scheduler.stop end
Private Instance Methods
idle?()
click to toggle source
# File lib/vessel/engine.rb, line 57 def idle? @queue.empty? && @scheduler.queue_length.zero? && @scheduler.scheduled_task_count == @scheduler.completed_task_count end
start_requests()
click to toggle source
# File lib/vessel/engine.rb, line 53 def start_requests Request.build(*settings[:start_urls]) end