class WebFetch::Gatherer
Handles requests to gather URLs and delegates to the EventMachine web server
Constants
- HASHABLE_KEYS
Public Class Methods
new(storage, params, logger = Logger, http = EM::HttpRequest)
click to toggle source
# File lib/web_fetch/gatherer.rb, line 11 def initialize(storage, params, logger = Logger, http = EM::HttpRequest) @requests = params[:requests] @storage = storage @logger = logger @http = http end
Public Instance Methods
start()
click to toggle source
# File lib/web_fetch/gatherer.rb, line 18 def start tagged = { requests: tag_requests } gather(tagged[:requests]) tagged end
Private Instance Methods
apply_callbacks(request, deferred)
click to toggle source
# File lib/web_fetch/gatherer.rb, line 37 def apply_callbacks(request, deferred) uid = request[:uid] deferred.callback do @logger.debug("HTTP fetch successful for uid: #{uid}") @storage.store(uid, response(request, deferred, success: true)) end deferred.errback do @logger.debug("HTTP fetch failure for uid: #{uid}") @storage.store(uid, response(request, deferred, success: false)) end @logger.debug("HTTP fetch started for uid: #{uid}") end
gather(targets)
click to toggle source
# File lib/web_fetch/gatherer.rb, line 26 def gather(targets) targets.each do |target| uid = target[:uid] @logger.debug("Initialising async for uid: #{uid}") deferred = request_async(target) request = { uid: uid, start_time: target[:start_time], request: target[:request] } apply_callbacks(request, deferred) end end
hash(obj)
click to toggle source
# File lib/web_fetch/gatherer.rb, line 93 def hash(obj) string = JSON.dump(obj.select { |key| HASHABLE_KEYS.include?(key) }) Digest.hexencode(Digest::SHA1.new.digest(string)) end
missing_url?()
click to toggle source
# File lib/web_fetch/gatherer.rb, line 83 def missing_url? @requests.is_a?(Array) && @requests.any? { |req| req[:url].nil? } end
request_async(target)
click to toggle source
# File lib/web_fetch/gatherer.rb, line 52 def request_async(target) request = target[:request] target[:start_time] = Time.now.utc async_request = @http.new(request[:url]) method = request.fetch(:method, 'GET').downcase.to_sym async_request.public_send( method, head: request[:headers], query: request.fetch(:query, {}), body: request.fetch(:body, nil) ) end
requests_empty?()
click to toggle source
# File lib/web_fetch/gatherer.rb, line 79 def requests_empty? @requests.is_a?(Array) && @requests.length.zero? end
requests_missing?()
click to toggle source
# File lib/web_fetch/gatherer.rb, line 71 def requests_missing? @requests.nil? end
requests_not_array?()
click to toggle source
# File lib/web_fetch/gatherer.rb, line 75 def requests_not_array? !@requests.nil? && !@requests.is_a?(Array) end
response(request, result, options = {})
click to toggle source
# File lib/web_fetch/gatherer.rb, line 106 def response(request, result, options = {}) { response: result(request, result, options.fetch(:success)), request: request, uid: request[:uid] } end
response_time(request)
click to toggle source
# File lib/web_fetch/gatherer.rb, line 102 def response_time(request) Time.now.utc - request[:start_time] end
result(request, result, success)
click to toggle source
# File lib/web_fetch/gatherer.rb, line 114 def result(request, result, success) { success: success, body: Base64.encode64(result.response), headers: result.headers, status: result.response_header.status, response_time: response_time(request) }.merge(success ? {} : { error: (result.error&.inspect) }) end
tag_requests()
click to toggle source
# File lib/web_fetch/gatherer.rb, line 87 def tag_requests @requests.map do |request| { request: request, hash: hash(request), uid: uid } end end
uid()
click to toggle source
# File lib/web_fetch/gatherer.rb, line 98 def uid SecureRandom.uuid end
validate()
click to toggle source
# File lib/web_fetch/gatherer.rb, line 64 def validate error(:requests_missing) if requests_missing? error(:requests_not_array) if requests_not_array? error(:requests_empty) if requests_empty? error(:missing_url) if missing_url? end