class URLCanonicalize::HTTP
Persistent connection for possible repeated requests to the same host
Attributes
last_known_good[RW]
Public Class Methods
new(raw_url)
click to toggle source
# File lib/url_canonicalize/http.rb, line 27 def initialize(raw_url) @raw_url = raw_url end
Public Instance Methods
do_request(http_request)
click to toggle source
# File lib/url_canonicalize/http.rb, line 19 def do_request(http_request) http.request http_request end
fetch()
click to toggle source
# File lib/url_canonicalize/http.rb, line 6 def fetch loop { break last_known_good if handle_response } end
uri()
click to toggle source
# File lib/url_canonicalize/http.rb, line 10 def uri @uri ||= URLCanonicalize::URI.parse(url) # Malformed URLs will raise a URLCanonicalize exception end
url=(value)
click to toggle source
# File lib/url_canonicalize/http.rb, line 14 def url=(value) @url = value.to_s @uri = nil end
Private Instance Methods
fetch_response()
click to toggle source
# File lib/url_canonicalize/http.rb, line 44 def fetch_response request.with_uri(uri).fetch end
handle_canonical_found()
click to toggle source
# File lib/url_canonicalize/http.rb, line 102 def handle_canonical_found self.last_known_good = response.response return true if response_url == url || redirect_list.include?(response_url) set_url_from_response false end
handle_failure()
click to toggle source
# File lib/url_canonicalize/http.rb, line 113 def handle_failure return true if last_known_good raise URLCanonicalize::Exception::Failure, "#{response.failure_class}: #{response.message}" end
handle_response()
click to toggle source
Parse the response, and clear the response ready to follow the next redirect
# File lib/url_canonicalize/http.rb, line 49 def handle_response result = parse_response @response = nil @response_url = nil result end
handle_success()
click to toggle source
# File lib/url_canonicalize/http.rb, line 122 def handle_success self.last_known_good = response true end
handle_unhandled_response()
click to toggle source
# File lib/url_canonicalize/http.rb, line 118 def handle_unhandled_response raise URLCanonicalize::Exception::Failure, "Unhandled response type: #{response.class}" end
http()
click to toggle source
# File lib/url_canonicalize/http.rb, line 131 def http return @http if same_host_and_port # reuse connection @previous = uri @http = new_http end
increment_redirects()
click to toggle source
# File lib/url_canonicalize/http.rb, line 94 def increment_redirects @redirects = redirects + 1 end
max_redirects_reached?()
click to toggle source
# File lib/url_canonicalize/http.rb, line 84 def max_redirects_reached? return false unless @redirects > options[:max_redirects] return true if last_known_good raise URLCanonicalize::Exception::Redirect, "#{@redirects} redirects is too many" end
new_http()
click to toggle source
# File lib/url_canonicalize/http.rb, line 146 def new_http h = Net::HTTP.new uri.host, uri.port h.open_timeout = options[:open_timeout] h.read_timeout = options[:read_timeout] if uri.scheme == 'https' h.use_ssl = true # Can generate exception h.verify_mode = OpenSSL::SSL::VERIFY_NONE else h.use_ssl = false end h end
options()
click to toggle source
# File lib/url_canonicalize/http.rb, line 162 def options @options ||= { open_timeout: 8, # Twitter responds in >5s read_timeout: 15, max_redirects: 10 } end
parse_response()
click to toggle source
Parse the response
# File lib/url_canonicalize/http.rb, line 57 def parse_response case response when URLCanonicalize::Response::Success handle_success when URLCanonicalize::Response::Redirect redirect_loop_detected? || max_redirects_reached? when URLCanonicalize::Response::CanonicalFound handle_canonical_found when URLCanonicalize::Response::Failure handle_failure else handle_unhandled_response end end
previous()
click to toggle source
# File lib/url_canonicalize/http.rb, line 142 def previous @previous ||= Struct.new(:host, :port).new end
redirect_list()
click to toggle source
# File lib/url_canonicalize/http.rb, line 90 def redirect_list @redirect_list ||= [] end
redirect_loop_detected?()
click to toggle source
# File lib/url_canonicalize/http.rb, line 72 def redirect_loop_detected? if redirect_list.include?(response_url) return true if last_known_good raise URLCanonicalize::Exception::Redirect, 'Redirect loop detected' end redirect_list << response_url increment_redirects set_url_from_response false end
redirects()
click to toggle source
# File lib/url_canonicalize/http.rb, line 98 def redirects @redirects ||= 0 end
request()
click to toggle source
# File lib/url_canonicalize/http.rb, line 40 def request @request ||= Request.new(self) end
response()
click to toggle source
Fetch the response
# File lib/url_canonicalize/http.rb, line 32 def response @response ||= fetch_response end
response_url()
click to toggle source
# File lib/url_canonicalize/http.rb, line 36 def response_url @response_url ||= response.url end
same_host_and_port()
click to toggle source
# File lib/url_canonicalize/http.rb, line 138 def same_host_and_port uri.host == previous.host && uri.port == previous.port end
set_url_from_response()
click to toggle source
# File lib/url_canonicalize/http.rb, line 109 def set_url_from_response self.url = response_url end
url()
click to toggle source
# File lib/url_canonicalize/http.rb, line 127 def url @url ||= @raw_url.to_s end