class Vessel::Engine

Attributes

crawler_class[R]
middleware[R]
scheduler[R]
settings[R]

Public Class Methods

new(klass, &block) click to toggle source
# File lib/vessel/engine.rb, line 11
def initialize(klass, &block)
  @crawler_class = klass
  @settings = klass.settings
  @middleware = block || Middleware.build(*settings[:middleware])
  @queue = SizedQueue.new(settings[:max_threads])
  @scheduler = Scheduler.new(@queue, settings)
end
run(*args, &block) click to toggle source
# File lib/vessel/engine.rb, line 5
def self.run(*args, &block)
  new(*args, &block).tap(&:run)
end

Public Instance Methods

handle(page, args) click to toggle source
# File lib/vessel/engine.rb, line 38
def handle(page, args)
  crawler = @crawler_class.new(page)
  crawler.send(*args) do |*result|
    if result.flatten.all? { |i| i.is_a?(Request) }
      scheduler.post(*result.flatten)
    else
      @middleware&.call(*result)
    end
  end
ensure
  page.close if page
end
run() click to toggle source
# File lib/vessel/engine.rb, line 19
def run
  scheduler.post(*start_requests)

  until @queue.closed?
    message = @queue.pop

    raise(message) if message.is_a?(Exception)

    page, request = message
    args = [request.method, request.data].compact
    handle(page, args)

    @queue.close if idle?
  end

ensure
  scheduler.stop
end

Private Instance Methods

idle?() click to toggle source
# File lib/vessel/engine.rb, line 57
def idle?
  @queue.empty? &&
  @scheduler.queue_length.zero? &&
  @scheduler.scheduled_task_count == @scheduler.completed_task_count
end
start_requests() click to toggle source
# File lib/vessel/engine.rb, line 53
def start_requests
  Request.build(*settings[:start_urls])
end