class SiteMapper::Request

Get webpage wrapper.

Constants

Request info link

USER_AGENT

Request User-Agent

Public Class Methods

document(url, options = {}) click to toggle source

Given an URL get it then parse it with Nokogiri::HTML. @param [String] url @param [Hash] options @return [Nokogiri::HTML] a nokogiri HTML object

# File lib/site_mapper/request.rb, line 16
def document(url, options = {})
  Nokogiri::HTML(Request.response_body(url, options))
end
resolve_url(url) click to toggle source

Resolve an URL string and follows redirects. if the URL can't be resolved the original URL is returned. @param [String] url to resolve @return [String] a URL string that potentially is a redirected URL @example Resolve google.com

resolve_url('google.com')
# => 'https://www.google.com'
# File lib/site_mapper/request.rb, line 60
def resolve_url(url)
  resolved = UrlResolver.resolve(url)
  resolved = resolved.prepend('http://') unless has_protocol?(resolved)
  resolved
end
response(url, options = {}) click to toggle source

Given an URL get the response. @param [String] url @param [Hash] options @return [Net::HTTPOK] if response is successfull, raises error otherwise @example get example.com and resolve the URL

Request.response('example.com', resolve: true)

@example get example.com and do not resolve the URL

Request.response('http://example.com')

@example get example.com and resolve the URL

Request.response('http://example.com', resolve: true)

@example get example.com and resolve the URL and use a custom User-Agent

Request.response('http://example.com', resolve: true, user_agent: 'MyUserAgent')
# File lib/site_mapper/request.rb, line 32
def response(url, options = {})
  options = {
    resolve: false,
    user_agent: SiteMapper::USER_AGENT
  }.merge(options)
  resolved_url = options[:resolve] ? resolve_url(url) : url
  uri          = URI.parse(resolved_url)
  http         = Net::HTTP.new(uri.host, uri.port)
  http.use_ssl = true if resolved_url.start_with?('https://')

  request = Net::HTTP::Get.new(uri.request_uri)
  request['User-Agent'] = options[:user_agent]
  http.request(request)
end
response_body(*args) click to toggle source

Get response body, rescues with nil if an exception is raised. @see Request#response

# File lib/site_mapper/request.rb, line 49
def response_body(*args)
  response(*args).body
end

Private Class Methods

has_protocol?(url) click to toggle source
# File lib/site_mapper/request.rb, line 68
def has_protocol?(url)
  url.start_with?('https://') || url.start_with?('http://')
end