class Medusa::HTTP

Constants

REDIRECT_LIMIT

Maximum number of redirects to follow on each get_response

RETRY_LIMIT

Attributes

Public Class Methods

new(opts = {}) click to toggle source
# File lib/medusa/http.rb, line 14
def initialize(opts = {})
  @opts = opts
  @cookie_store = CookieStore.new(@opts[:cookies])
end

Public Instance Methods

accept_cookies?() click to toggle source

Does this HTTP client accept cookies from the server?

# File lib/medusa/http.rb, line 69
def accept_cookies?
  @opts[:accept_cookies]
end
fetch_page(url, referer = nil, depth = nil) click to toggle source

Fetch a single Page from the response of an HTTP request to url. Just gets the final destination page.

# File lib/medusa/http.rb, line 23
def fetch_page(url, referer = nil, depth = nil)
  fetch_pages(url, referer, depth).last
end
fetch_pages(url, referer = nil, depth = nil) click to toggle source

Create new Pages from the response of an HTTP request to url, including redirects

# File lib/medusa/http.rb, line 31
def fetch_pages(url, referer = nil, depth = nil)
  pages = []
  begin
    url = URI(url) unless url.is_a?(URI)
    get(url, referer) do |response, headers, code, location, redirect_to, response_time|
      pages << Page.new(location, :body => response,
                                  :headers => headers,
                                  :code => code,
                                  :referer => referer,
                                  :depth => depth,
                                  :redirect_to => redirect_to,
                                  :response_time => response_time)
    end

    return pages
  rescue StandardError => e
    return pages << Page.new(url, error: e)
  end
end
http_basic_authentication() click to toggle source

The http authentication options as in www.ruby-doc.org/stdlib/libdoc/open-uri/rdoc/OpenURI/OpenRead.html userinfo is deprecated [RFC3986]

# File lib/medusa/http.rb, line 77
def http_basic_authentication
  @opts[:http_basic_authentication]
end
proxy() click to toggle source

The proxy options as in www.ruby-doc.org/stdlib/libdoc/open-uri/rdoc/OpenURI/OpenRead.html

# File lib/medusa/http.rb, line 91
def proxy
  @opts[:proxy]
end
proxy_host() click to toggle source

The proxy address string

# File lib/medusa/http.rb, line 98
def proxy_host
  @opts[:proxy_host]
end
proxy_http_basic_authentication() click to toggle source

The proxy authentication options as in www.ruby-doc.org/stdlib/libdoc/open-uri/rdoc/OpenURI/OpenRead.html

# File lib/medusa/http.rb, line 84
def proxy_http_basic_authentication
  @opts[:proxy_http_basic_authentication]
end
proxy_port() click to toggle source

The proxy port

# File lib/medusa/http.rb, line 105
def proxy_port
  @opts[:proxy_port]
end
read_timeout() click to toggle source

HTTP read timeout in seconds

# File lib/medusa/http.rb, line 112
def read_timeout
  @opts[:read_timeout]
end
redirect_limit() click to toggle source

The maximum number of redirects to follow

# File lib/medusa/http.rb, line 54
def redirect_limit
  @opts[:redirect_limit] || REDIRECT_LIMIT
end
user_agent() click to toggle source

The user-agent string which will be sent with each request, or nil if no such option is set

# File lib/medusa/http.rb, line 62
def user_agent
  @opts[:user_agent]
end

Private Instance Methods

allowed?(to_url, from_url) click to toggle source

Allowed to connect to the requested url?

# File lib/medusa/http.rb, line 188
def allowed?(to_url, from_url)
  to_url.host.nil? || (to_url.host == from_url.host)
end
get(url, referer = nil) { |response, headers, Integer(response_code), loc, redirect_to, response_time| ... } click to toggle source

Retrieve HTTP responses for url, including redirects. Yields the response object, response code, and URI location for each response.

# File lib/medusa/http.rb, line 123
def get(url, referer = nil)
  limit = redirect_limit
  loc = url
  begin
      # if redirected to a relative url, merge it with the host of the original
      # request url
      loc = url.merge(loc) if loc.relative?

      response, headers, response_time, response_code, redirect_to = get_response(loc, referer)

      yield response, headers, Integer(response_code), loc, redirect_to, response_time
      limit -= 1
  end while (loc = redirect_to) && allowed?(redirect_to, url) && limit > 0
end
get_response(url, referer = nil) click to toggle source

Get an HTTPResponse for url, sending the appropriate User-Agent string

# File lib/medusa/http.rb, line 141
def get_response(url, referer = nil)
  full_path = url.query.nil? ? url.path : "#{url.path}?#{url.query}"

  opts = {}
  opts['User-Agent'] = user_agent if user_agent
  opts['Referer'] = referer.to_s if referer
  opts['Cookie'] = @cookie_store.to_s unless @cookie_store.empty? || (!accept_cookies? && @opts[:cookies].nil?)
  opts[:http_basic_authentication] = http_basic_authentication if http_basic_authentication
  opts[:proxy] = proxy if proxy
  opts[:proxy_http_basic_authentication] = proxy_http_basic_authentication if proxy_http_basic_authentication
  opts[:read_timeout] = read_timeout if !!read_timeout
  opts[:redirect] = false
  redirect_to = nil
  retries = 0
  begin
    start = Time.now()

    begin
      if Gem::Requirement.new('< 2.5').satisfied_by?(Gem::Version.new(RUBY_VERSION))
        resource = open(url, opts)
      else
        resource = URI.open(url, opts)
      end
    rescue OpenURI::HTTPRedirect => e_redirect
      resource = e_redirect.io
      redirect_to = e_redirect.uri
    rescue OpenURI::HTTPError => e_http
      resource = e_http.io
    end

    finish = Time.now()
    response_time = ((finish - start) * 1000).round
    @cookie_store.merge!(resource.meta['set-cookie']) if accept_cookies?
    return resource.read, resource.meta, response_time, resource.status.shift, redirect_to

  rescue Timeout::Error, EOFError, Errno::ECONNREFUSED, Errno::ETIMEDOUT, Errno::ECONNRESET => e
    retries += 1
    sleep(3 ^ retries)
    retry unless retries > RETRY_LIMIT
  ensure
    resource&.close unless resource&.closed?
  end
end