class WebFetch::Gatherer

Handles requests to gather URLs and delegates to the EventMachine web server

Constants

HASHABLE_KEYS

Public Class Methods

new(storage, params, logger = Logger, http = EM::HttpRequest) click to toggle source
# File lib/web_fetch/gatherer.rb, line 11
def initialize(storage, params, logger = Logger, http = EM::HttpRequest)
  @requests = params[:requests]
  @storage = storage
  @logger = logger
  @http = http
end

Public Instance Methods

start() click to toggle source
# File lib/web_fetch/gatherer.rb, line 18
def start
  tagged = { requests: tag_requests }
  gather(tagged[:requests])
  tagged
end

Private Instance Methods

apply_callbacks(request, deferred) click to toggle source
# File lib/web_fetch/gatherer.rb, line 37
def apply_callbacks(request, deferred)
  uid = request[:uid]
  deferred.callback do
    @logger.debug("HTTP fetch successful for uid: #{uid}")
    @storage.store(uid, response(request, deferred, success: true))
  end

  deferred.errback do
    @logger.debug("HTTP fetch failure for uid: #{uid}")
    @storage.store(uid, response(request, deferred, success: false))
  end

  @logger.debug("HTTP fetch started for uid: #{uid}")
end
gather(targets) click to toggle source
# File lib/web_fetch/gatherer.rb, line 26
def gather(targets)
  targets.each do |target|
    uid = target[:uid]
    @logger.debug("Initialising async for uid: #{uid}")
    deferred = request_async(target)
    request = { uid: uid, start_time: target[:start_time],
                request: target[:request] }
    apply_callbacks(request, deferred)
  end
end
hash(obj) click to toggle source
# File lib/web_fetch/gatherer.rb, line 93
def hash(obj)
  string = JSON.dump(obj.select { |key| HASHABLE_KEYS.include?(key) })
  Digest.hexencode(Digest::SHA1.new.digest(string))
end
missing_url?() click to toggle source
# File lib/web_fetch/gatherer.rb, line 83
def missing_url?
  @requests.is_a?(Array) && @requests.any? { |req| req[:url].nil? }
end
request_async(target) click to toggle source
# File lib/web_fetch/gatherer.rb, line 52
def request_async(target)
  request = target[:request]
  target[:start_time] = Time.now.utc
  async_request = @http.new(request[:url])
  method = request.fetch(:method, 'GET').downcase.to_sym
  async_request.public_send(
    method, head: request[:headers],
            query: request.fetch(:query, {}),
            body: request.fetch(:body, nil)
  )
end
requests_empty?() click to toggle source
# File lib/web_fetch/gatherer.rb, line 79
def requests_empty?
  @requests.is_a?(Array) && @requests.length.zero?
end
requests_missing?() click to toggle source
# File lib/web_fetch/gatherer.rb, line 71
def requests_missing?
  @requests.nil?
end
requests_not_array?() click to toggle source
# File lib/web_fetch/gatherer.rb, line 75
def requests_not_array?
  !@requests.nil? && !@requests.is_a?(Array)
end
response(request, result, options = {}) click to toggle source
# File lib/web_fetch/gatherer.rb, line 106
def response(request, result, options = {})
  {
    response: result(request, result, options.fetch(:success)),
    request: request,
    uid: request[:uid]
  }
end
response_time(request) click to toggle source
# File lib/web_fetch/gatherer.rb, line 102
def response_time(request)
  Time.now.utc - request[:start_time]
end
result(request, result, success) click to toggle source
# File lib/web_fetch/gatherer.rb, line 114
def result(request, result, success)
  {
    success: success,
    body: Base64.encode64(result.response),
    headers: result.headers,
    status: result.response_header.status,
    response_time: response_time(request)
  }.merge(success ? {} : { error: (result.error&.inspect) })
end
tag_requests() click to toggle source
# File lib/web_fetch/gatherer.rb, line 87
def tag_requests
  @requests.map do |request|
    { request: request, hash: hash(request), uid: uid }
  end
end
uid() click to toggle source
# File lib/web_fetch/gatherer.rb, line 98
def uid
  SecureRandom.uuid
end
validate() click to toggle source
# File lib/web_fetch/gatherer.rb, line 64
def validate
  error(:requests_missing) if requests_missing?
  error(:requests_not_array) if requests_not_array?
  error(:requests_empty) if requests_empty?
  error(:missing_url) if missing_url?
end