class Unwind::RedirectFollower
Attributes
final_url[R]
original_url[R]
redirect_limit[R]
redirects[R]
response[R]
Public Class Methods
new(original_url, limit=5)
click to toggle source
# File lib/unwind.rb, line 14 def initialize(original_url, limit=5) @original_url, @redirect_limit = original_url, limit @redirects = [] end
resolve(original_url, limit=5)
click to toggle source
# File lib/unwind.rb, line 57 def self.resolve(original_url, limit=5) new(original_url, limit).resolve end
Public Instance Methods
redirected?()
click to toggle source
# File lib/unwind.rb, line 19 def redirected? !(self.final_url == self.original_url) end
resolve(current_url=nil, options={})
click to toggle source
# File lib/unwind.rb, line 23 def resolve(current_url=nil, options={}) ok_to_continue? current_url ||= self.original_url #adding this header because we really only care about resolving the url headers = (options || {}).merge({"accept-encoding" => "none"}) url = URI.parse(current_url) request = Net::HTTP::Get.new(url) headers.each do |header, value| request.add_field(header, value) end response = Net::HTTP.start( url.host, url.port, :use_ssl => url.scheme == 'https' ) do |http| http.request(request) end if is_response_redirect?(response) handle_redirect(redirect_url(response), current_url, response, headers) elsif meta_uri = meta_refresh?(current_url, response) handle_redirect(meta_uri, current_url, response, headers) else handle_final_response(current_url, response) end self end
Private Instance Methods
canonical_link?(response)
click to toggle source
# File lib/unwind.rb, line 118 def canonical_link?(response) body_match = response.body.match(/<link rel=[\'\"]canonical[\'\"] href=[\'\"](.*?)[\'\"]/i) body_match ? Addressable::URI.parse(body_match[1]).to_s : false end
handle_final_response(current_url, response)
click to toggle source
# File lib/unwind.rb, line 77 def handle_final_response(current_url, response) current_url = current_url.dup.to_s if Net::HTTPSuccess === response && canonical = canonical_link?(response) @redirects << current_url if Addressable::URI.parse(canonical).relative? @final_url = make_url_absolute(current_url, Addressable::URI.parse(canonical)).to_s else @final_url = canonical end else @final_url = current_url end @response = response end
handle_redirect(uri_to_redirect, url, response, headers)
click to toggle source
# File lib/unwind.rb, line 72 def handle_redirect(uri_to_redirect, url, response, headers) record_redirect url resolve(uri_to_redirect.normalize, apply_cookie(response, headers)) end
is_response_redirect?(response)
click to toggle source
# File lib/unwind.rb, line 68 def is_response_redirect?(response) Net::HTTPRedirection === response end
make_url_absolute(current_url, relative_url)
click to toggle source
# File lib/unwind.rb, line 132 def make_url_absolute(current_url, relative_url) current_uri = Addressable::URI.parse(current_url) if (relative_url.relative?) url = Addressable::URI.new( :scheme => current_uri.scheme, :user => current_uri.user, :password => current_uri.password, :host => current_uri.host, :port => current_uri.port, :path => relative_url.path, :query => relative_url.query, :fragment => relative_url.fragment) else relative_url end end
meta_refresh?(current_url, response)
click to toggle source
# File lib/unwind.rb, line 108 def meta_refresh?(current_url, response) if Net::HTTPSuccess === response body_match = response.body.match(/<meta http-equiv=\"refresh\" content=\"0; URL=(.*?)\"\s*\/*>/i) if body_match uri = Addressable::URI.parse(body_match[1]) make_url_absolute(current_url, uri) end end end
ok_to_continue?()
click to toggle source
# File lib/unwind.rb, line 93 def ok_to_continue? raise TooManyRedirects if redirect_limit < 0 end
record_redirect(url)
click to toggle source
# File lib/unwind.rb, line 63 def record_redirect(url) @redirects << url.to_s @redirect_limit -= 1 end
redirect_url(response)
click to toggle source
# File lib/unwind.rb, line 97 def redirect_url(response) if response['location'].nil? body_match = (response.body || "").match(/<a href=\"([^>]+)\">/i) raise MissingRedirectLocation unless body_match Addressable::URI.parse(body_match[0]) else redirect_uri = Addressable::URI.parse(response['location']) redirect_uri.relative? ? Addressable::URI.parse(response.uri).join(response['location']) : redirect_uri end end