class WaybackArchiver::Request

Make HTTP requests

Constants

GETStruct

GET response wrapper

MAX_REDIRECTS

Max number of redirects before an error is raised

REQUEST_ERRORS

Known request errors

Public Class Methods

blank?(value) click to toggle source

Return whether a value is blank or not. @return [Boolean] whether the value is blank or not. @param [Object] value the value to check if its blank or not. @example Returns false for nil.

Request.blank?(nil)

@example Returns false for empty string.

Request.blank?('')

@example Returns false for string with only spaces.

Request.blank?('  ')
# File lib/wayback_archiver/request.rb, line 190
def self.blank?(value)
  return true unless value
  return true if value.strip.empty?

  false
end
build_redirect_uri(uri, response) click to toggle source

Builds an URI for a redirect response. @return [URI] to redirect to. @param [URI] uri that was requested. @param [Net::HTTPResponse] response the server response. @example Build redirect URI for example.com (lets pretend it will redirect..)

Request.build_redirect_uri('http://example.com', net_http_response)
# File lib/wayback_archiver/request.rb, line 142
def self.build_redirect_uri(uri, response)
  location_header = response.header.fetch('location') do
    raise InvalidRedirectError, "No location header found on redirect when requesting #{uri}"
  end

  location = URI.parse(location_header)
  return build_uri(uri) + location_header if location.relative?

  location
end
build_response(uri, response) click to toggle source

Builds a Response object. @return [Response] @param [URI] uri that was requested. @param [Net::HTTPResponse] response the server response. @example Build Response object for example.com

Request.build_response(uri, net_http_response)
# File lib/wayback_archiver/request.rb, line 127
def self.build_response(uri, response)
  Response.new(
    response.code,
    response.message,
    parse_body(response.body),
    uri.to_s
  )
end
build_uri(uri) click to toggle source

Build URI. @return [URI] uri to redirect to. @param [URI, String] uri to build. @example Build URI for example.com

Request.build_uri('http://example.com')

@example Build URI for #<URI::HTTP example.com>

uri = URI.parse('http://example.com')
Request.build_uri(uri)
# File lib/wayback_archiver/request.rb, line 161
def self.build_uri(uri)
  return uri if uri.is_a?(URI)

  uri = "http://#{uri}" unless uri =~ %r{^https?://}
  URI.parse(uri)
end
get( uri, max_redirects: MAX_REDIRECTS, raise_on_http_error: false, follow_redirects: true ) click to toggle source

Get reponse. @return [Response] the http response representation. @param [String, URI] uri to retrieve. @param max_redirects [Integer] max redirects (default: 10). @param follow_redirects [Boolean] follow redirects (default: true). @example Get example.com

Request.get('example.com')

@example Get example.com and follow max 3 redirects

Request.get('http://example.com', max_redirects: 3)

@example Get example.com and don't follow redirects

Request.get('http://example.com', follow_redirects: false)

@raise [Error] super class of all exceptions that this method can raise @raise [ServerError] all server errors @raise [ClientError] all client errors @raise [HTTPError] all HTTP errors @raise [MaxRedirectError] too many redirects, subclass of HTTPError (only raised if raise_on_http_error flag is true) @raise [ResponseError] server responsed with a 4xx or 5xx HTTP status code, subclass of HTTPError (only raised if raise_on_http_error flag is true) @raise [UnknownResponseCodeError] server responded with an unknown HTTP status code, subclass of HTTPError (only raised if raise_on_http_error flag is true) @raise [InvalidRedirectError] server responded with an invalid redirect, subclass of HTTPError (only raised if raise_on_http_error flag is true)

# File lib/wayback_archiver/request.rb, line 68
def self.get(
  uri,
  max_redirects: MAX_REDIRECTS,
  raise_on_http_error: false,
  follow_redirects: true
)
  uri = build_uri(uri)

  redirect_count = 0
  until redirect_count > max_redirects
    WaybackArchiver.logger.debug "Requesting #{uri}"

    http = Net::HTTP.new(uri.host, uri.port)
    if uri.scheme == 'https'
      http.use_ssl = true
      http.verify_mode = OpenSSL::SSL::VERIFY_NONE
    end

    request = Net::HTTP::Get.new(uri.request_uri)
    request['User-Agent'] = WaybackArchiver.user_agent

    result = perform_request(uri, http, request)
    response = result.response
    error = result.error

    raise error if error

    code = response.code
    WaybackArchiver.logger.debug "[#{code}, #{response.message}] Requested #{uri}"

    case HTTPCode.type(code)
    when :success
      return build_response(uri, response)
    when :redirect
      return build_response(uri, response) unless follow_redirects

      uri = build_redirect_uri(uri, response)
      redirect_count += 1
      next
    when :error
      if raise_on_http_error
        raise ResponseError, "Failed with response code: #{code} when requesting #{uri}"
      end

      return build_response(uri, response)
    else
      raise UnknownResponseCodeError, "Unknown HTTP response code #{code} when requesting #{uri}"
    end
  end

  raise MaxRedirectError, "Redirected too many times when requesting #{uri}"
end
parse_body(response_body) click to toggle source

Parse response body, handles reqular and gzipped response bodies. @return [String] the response body. @param [String] response_body the server response body. @example Return response body for response.

Request.parse_body(uri, net_http_response)
# File lib/wayback_archiver/request.rb, line 173
def self.parse_body(response_body)
  return '' unless response_body

  Zlib::GzipReader.new(StringIO.new(response_body)).read
rescue Zlib::GzipFile::Error => _e
  response_body
end

Private Class Methods

build_request_error(uri, error, error_wrapper_klass) click to toggle source
# File lib/wayback_archiver/request.rb, line 207
def self.build_request_error(uri, error, error_wrapper_klass)
  WaybackArchiver.logger.error "Request to #{uri} failed: #{error_wrapper_klass}, #{error.class}, #{error.message}"

  GETStruct.new(
    Response.new,
    error_wrapper_klass.new("#{error.class}, #{error.message}")
  )
end
perform_request(uri, http, request) click to toggle source
# File lib/wayback_archiver/request.rb, line 199
def self.perform_request(uri, http, request)
  # TODO: Consider retrying on certain HTTP response codes, i.e 429, 503
  response = http.request(request)
  GETStruct.new(response)
rescue *REQUEST_ERRORS.keys => e
  build_request_error(uri, e, REQUEST_ERRORS.fetch(e.class))
end