class URLCanonicalize::HTTP

Persistent connection for possible repeated requests to the same host

Attributes

last_known_good[RW]

Public Class Methods

new(raw_url) click to toggle source
# File lib/url_canonicalize/http.rb, line 27
def initialize(raw_url)
  @raw_url = raw_url
end

Public Instance Methods

do_request(http_request) click to toggle source
# File lib/url_canonicalize/http.rb, line 19
def do_request(http_request)
  http.request http_request
end
fetch() click to toggle source
# File lib/url_canonicalize/http.rb, line 6
def fetch
  loop { break last_known_good if handle_response }
end
uri() click to toggle source
# File lib/url_canonicalize/http.rb, line 10
def uri
  @uri ||= URLCanonicalize::URI.parse(url) # Malformed URLs will raise a URLCanonicalize exception
end
url=(value) click to toggle source
# File lib/url_canonicalize/http.rb, line 14
def url=(value)
  @url = value.to_s
  @uri = nil
end

Private Instance Methods

fetch_response() click to toggle source
# File lib/url_canonicalize/http.rb, line 44
def fetch_response
  request.with_uri(uri).fetch
end
handle_canonical_found() click to toggle source
# File lib/url_canonicalize/http.rb, line 102
def handle_canonical_found
  self.last_known_good = response.response
  return true if response_url == url || redirect_list.include?(response_url)
  set_url_from_response
  false
end
handle_failure() click to toggle source
# File lib/url_canonicalize/http.rb, line 113
def handle_failure
  return true if last_known_good
  raise URLCanonicalize::Exception::Failure, "#{response.failure_class}: #{response.message}"
end
handle_response() click to toggle source

Parse the response, and clear the response ready to follow the next redirect

# File lib/url_canonicalize/http.rb, line 49
def handle_response
  result = parse_response
  @response = nil
  @response_url = nil
  result
end
handle_success() click to toggle source
# File lib/url_canonicalize/http.rb, line 122
def handle_success
  self.last_known_good = response
  true
end
handle_unhandled_response() click to toggle source
# File lib/url_canonicalize/http.rb, line 118
def handle_unhandled_response
  raise URLCanonicalize::Exception::Failure, "Unhandled response type: #{response.class}"
end
http() click to toggle source
# File lib/url_canonicalize/http.rb, line 131
def http
  return @http if same_host_and_port # reuse connection

  @previous = uri
  @http = new_http
end
increment_redirects() click to toggle source
# File lib/url_canonicalize/http.rb, line 94
def increment_redirects
  @redirects = redirects + 1
end
max_redirects_reached?() click to toggle source
# File lib/url_canonicalize/http.rb, line 84
def max_redirects_reached?
  return false unless @redirects > options[:max_redirects]
  return true if last_known_good
  raise URLCanonicalize::Exception::Redirect, "#{@redirects} redirects is too many"
end
new_http() click to toggle source
# File lib/url_canonicalize/http.rb, line 146
def new_http
  h = Net::HTTP.new uri.host, uri.port

  h.open_timeout = options[:open_timeout]
  h.read_timeout = options[:read_timeout]

  if uri.scheme == 'https'
    h.use_ssl = true # Can generate exception
    h.verify_mode = OpenSSL::SSL::VERIFY_NONE
  else
    h.use_ssl = false
  end

  h
end
options() click to toggle source
# File lib/url_canonicalize/http.rb, line 162
def options
  @options ||= {
    open_timeout: 8, # Twitter responds in >5s
    read_timeout: 15,
    max_redirects: 10
  }
end
parse_response() click to toggle source

Parse the response

# File lib/url_canonicalize/http.rb, line 57
def parse_response
  case response
  when URLCanonicalize::Response::Success
    handle_success
  when URLCanonicalize::Response::Redirect
    redirect_loop_detected? || max_redirects_reached?
  when URLCanonicalize::Response::CanonicalFound
    handle_canonical_found
  when URLCanonicalize::Response::Failure
    handle_failure
  else
    handle_unhandled_response
  end
end
previous() click to toggle source
# File lib/url_canonicalize/http.rb, line 142
def previous
  @previous ||= Struct.new(:host, :port).new
end
redirect_list() click to toggle source
# File lib/url_canonicalize/http.rb, line 90
def redirect_list
  @redirect_list ||= []
end
redirect_loop_detected?() click to toggle source
# File lib/url_canonicalize/http.rb, line 72
def redirect_loop_detected?
  if redirect_list.include?(response_url)
    return true if last_known_good
    raise URLCanonicalize::Exception::Redirect, 'Redirect loop detected'
  end

  redirect_list << response_url
  increment_redirects
  set_url_from_response
  false
end
redirects() click to toggle source
# File lib/url_canonicalize/http.rb, line 98
def redirects
  @redirects ||= 0
end
request() click to toggle source
# File lib/url_canonicalize/http.rb, line 40
def request
  @request ||= Request.new(self)
end
response() click to toggle source

Fetch the response

# File lib/url_canonicalize/http.rb, line 32
def response
  @response ||= fetch_response
end
response_url() click to toggle source
# File lib/url_canonicalize/http.rb, line 36
def response_url
  @response_url ||= response.url
end
same_host_and_port() click to toggle source
# File lib/url_canonicalize/http.rb, line 138
def same_host_and_port
  uri.host == previous.host && uri.port == previous.port
end
set_url_from_response() click to toggle source
# File lib/url_canonicalize/http.rb, line 109
def set_url_from_response
  self.url = response_url
end
url() click to toggle source
# File lib/url_canonicalize/http.rb, line 127
def url
  @url ||= @raw_url.to_s
end