class Medusa::HTTP
Constants
- REDIRECT_LIMIT
Maximum number of redirects to follow on each
get_response
- RETRY_LIMIT
Attributes
Public Class Methods
# File lib/medusa/http.rb, line 14 def initialize(opts = {}) @opts = opts @cookie_store = CookieStore.new(@opts[:cookies]) end
Public Instance Methods
Create new Pages from the response of an HTTP
request to url, including redirects
# File lib/medusa/http.rb, line 31 def fetch_pages(url, referer = nil, depth = nil) pages = [] begin url = URI(url) unless url.is_a?(URI) get(url, referer) do |response, headers, code, location, redirect_to, response_time| pages << Page.new(location, :body => response, :headers => headers, :code => code, :referer => referer, :depth => depth, :redirect_to => redirect_to, :response_time => response_time) end return pages rescue StandardError => e return pages << Page.new(url, error: e) end end
The http authentication options as in www.ruby-doc.org/stdlib/libdoc/open-uri/rdoc/OpenURI/OpenRead.html userinfo is deprecated [RFC3986]
# File lib/medusa/http.rb, line 77 def http_basic_authentication @opts[:http_basic_authentication] end
The proxy options as in www.ruby-doc.org/stdlib/libdoc/open-uri/rdoc/OpenURI/OpenRead.html
# File lib/medusa/http.rb, line 91 def proxy @opts[:proxy] end
The proxy address string
# File lib/medusa/http.rb, line 98 def proxy_host @opts[:proxy_host] end
The proxy authentication options as in www.ruby-doc.org/stdlib/libdoc/open-uri/rdoc/OpenURI/OpenRead.html
# File lib/medusa/http.rb, line 84 def proxy_http_basic_authentication @opts[:proxy_http_basic_authentication] end
The proxy port
# File lib/medusa/http.rb, line 105 def proxy_port @opts[:proxy_port] end
HTTP
read timeout in seconds
# File lib/medusa/http.rb, line 112 def read_timeout @opts[:read_timeout] end
The maximum number of redirects to follow
# File lib/medusa/http.rb, line 54 def redirect_limit @opts[:redirect_limit] || REDIRECT_LIMIT end
The user-agent string which will be sent with each request, or nil if no such option is set
# File lib/medusa/http.rb, line 62 def user_agent @opts[:user_agent] end
Private Instance Methods
Allowed to connect to the requested url?
# File lib/medusa/http.rb, line 188 def allowed?(to_url, from_url) to_url.host.nil? || (to_url.host == from_url.host) end
Retrieve HTTP
responses for url, including redirects. Yields the response object, response code, and URI location for each response.
# File lib/medusa/http.rb, line 123 def get(url, referer = nil) limit = redirect_limit loc = url begin # if redirected to a relative url, merge it with the host of the original # request url loc = url.merge(loc) if loc.relative? response, headers, response_time, response_code, redirect_to = get_response(loc, referer) yield response, headers, Integer(response_code), loc, redirect_to, response_time limit -= 1 end while (loc = redirect_to) && allowed?(redirect_to, url) && limit > 0 end
Get an HTTPResponse for url, sending the appropriate User-Agent string
# File lib/medusa/http.rb, line 141 def get_response(url, referer = nil) full_path = url.query.nil? ? url.path : "#{url.path}?#{url.query}" opts = {} opts['User-Agent'] = user_agent if user_agent opts['Referer'] = referer.to_s if referer opts['Cookie'] = @cookie_store.to_s unless @cookie_store.empty? || (!accept_cookies? && @opts[:cookies].nil?) opts[:http_basic_authentication] = http_basic_authentication if http_basic_authentication opts[:proxy] = proxy if proxy opts[:proxy_http_basic_authentication] = proxy_http_basic_authentication if proxy_http_basic_authentication opts[:read_timeout] = read_timeout if !!read_timeout opts[:redirect] = false redirect_to = nil retries = 0 begin start = Time.now() begin if Gem::Requirement.new('< 2.5').satisfied_by?(Gem::Version.new(RUBY_VERSION)) resource = open(url, opts) else resource = URI.open(url, opts) end rescue OpenURI::HTTPRedirect => e_redirect resource = e_redirect.io redirect_to = e_redirect.uri rescue OpenURI::HTTPError => e_http resource = e_http.io end finish = Time.now() response_time = ((finish - start) * 1000).round @cookie_store.merge!(resource.meta['set-cookie']) if accept_cookies? return resource.read, resource.meta, response_time, resource.status.shift, redirect_to rescue Timeout::Error, EOFError, Errno::ECONNREFUSED, Errno::ETIMEDOUT, Errno::ECONNRESET => e retries += 1 sleep(3 ^ retries) retry unless retries > RETRY_LIMIT ensure resource&.close unless resource&.closed? end end