module GoGetter
Constants
- USER_AGENTS
Some user agents for use with websites that change their behavior according to your browser Set by adding to http_headers: “User-Agent” => USER_AGENTS Use www.useragentstring.com/pages/useragentstring.php to find more user agent strings
Public Class Methods
get(uri, http_headers = {}, params = {})
click to toggle source
# File lib/go_getter/go_getter.rb, line 11 def GoGetter.get(uri, http_headers = {}, params = {}) uri = parse_url(uri.to_s) unless uri.is_a? URI path = uri.path path << "?#{uri.query}" if uri.query request = Net::HTTP::Get.new(path) http_headers.each {|key, value| request.add_field key, value } # basic authentication request.basic_auth(params[:auth_user], params[:auth_pass]) if params[:auth_user] and params[:auth_pass] # proxy klass = (params[:proxy_host] and params[:proxy_port]) ? Net::HTTP::Proxy(params[:proxy_host], params[:proxy_port], params[:proxy_user], params[:proxy_pass]) : Net::HTTP # SSL opt = (uri.scheme == "https") ? { use_ssl: true, verify_mode: OpenSSL::SSL::VERIFY_NONE } : {} response = klass.start(uri.host, uri.port, opt) do |http| http.read_timeout = params.fetch(:read_timeout, 600) http.request(request) end if response.is_a?(Net::HTTPRedirection) # Redirect # allow for a single redirection by default params[:max_redirects] = 1 unless params.has_key?(:max_redirects) response = handle_redirection(uri, response, http_headers, params) else response.final_uri = uri end return response end
handle_redirection(from_uri, response, http_headers, params)
click to toggle source
# File lib/go_getter/go_getter.rb, line 57 def GoGetter.handle_redirection(from_uri, response, http_headers, params) if params.fetch(:max_redirects, 0) > 0 params[:uris_seen] = Set.new unless params[:uris_seen] if params[:uris_seen].size < params.fetch(:max_redirects, 0) && response['Location'] params[:uris_seen] << from_uri new_uri = URI.parse(response['Location']) # new uri may be just the path, w/o host and port; if so, copy from old unless new_uri.host new_uri.host = from_uri.host new_uri.port = from_uri.port end new_uri.scheme = from_uri.scheme unless new_uri.scheme # avoid infinite redirect loops unless params[:uris_seen].member? new_uri # request the new location just as we did the old one. params[:max_redirects] -= 1 response = GoGetter.get(new_uri, http_headers, params) end end end response end
parse_url(url)
click to toggle source
Given a URL, which may not be formatted properly, parse a URI
# File lib/go_getter/go_getter.rb, line 46 def GoGetter.parse_url(url) unless (url =~ %r{^https?://}mi) == 0 url = "http://#{url}" end uri = URI.parse url if uri.path.length == 0 and uri.query.nil? uri.path = "/" end uri end