class TinyGrabber::Agent

Constants

AGENT_ALIASES

Agent aliases given from www.useragentstring.com/pages/Chrome/

Attributes

basic_auth[W]

Basic authentification configuration

cookies[RW]

Headers

debug[W]

Debug configuration

follow_location[W]

Follow location

headers[R]

Headers

perfect_url[RW]

perfect url

proxy[RW]

Remote proxy configuration

read_timeout[W]

Max time to execute request

uri[RW]

Uri

user_agent[W]

Web browser name

verify_mode[W]

Set verify mode

Public Class Methods

new() click to toggle source

Initialization object

# File lib/tiny_grabber/agent.rb, line 56
def initialize
  @debug = Debug.new

  # Initialize variables agent attributes
  @user_agent = AGENT_ALIASES[rand(AGENT_ALIASES.count) - 1]
  @proxy = []
  @basic_auth = {}
  @headers = {}
  @cookies = nil
  @perfect_url = false
  @follow_location = false
  @read_timeout = 10
  # Initialize variable for URI object
  @uri = nil
  # Initialize variable for Net::HTTP request object
  @http = Net::HTTP
  # Initialize variable for Net::HTTP response object
  @response = nil
  @verify_mode = OpenSSL::SSL::VERIFY_NONE
end

Public Instance Methods

convert_to_uri(url) click to toggle source

Initialize URI object from request url

@param url Request link

# File lib/tiny_grabber/agent.rb, line 254
def convert_to_uri(url)
  unless @perfect_url
    # Remove anchor
    url = url.gsub(/#.*\Z/, '')
    # It's magic work with escaped url
    url = URI.escape(URI.unescape(url))
  end
  @uri = URI(url)
  @debug.save "-> [uri] = #{@uri}" if @debug.active
end
cookies=(cookies) click to toggle source

Set COOKIES agent attribute

@param cookies Request cookies

# File lib/tiny_grabber/agent.rb, line 158
def cookies=(cookies)
  cookies = var_to_sym(cookies)
  cookies = cookies.to_a.map { |x| "#{x[0]}=#{x[1]}" }.join('&') if cookies.is_a?(Hash)
  raise 'attribute cookies must be String' unless cookies.is_a?(String)
  @cookies = cookies
end
fetch(url, method = :get, headers = {}, params = {}) click to toggle source

Fetch request for GET and POST HTTP methods Setting USER_AGENT, BASIC_AUTH, HEADERS, COOKIES request attribute Make response and save COOKIES for next requests

@param url Resource link @param method Request method @param headers Request header @param params Request additional params

# File lib/tiny_grabber/agent.rb, line 191
def fetch(url, method = :get, headers = {}, params = {})
  if @debug.active
    @debug.save '=============================='
    @debug.save "#{method.upcase} #{url}"
    @debug.save "-> [proxy] = #{@proxy}" if @proxy
    @debug.save "-> [params] = #{params}"
    @debug.save '------------------------------'
  end
  convert_to_uri url
  case method
  when :get
    @request = Net::HTTP::Get.new(@uri.request_uri)
  when :post
    @request = Net::HTTP::Post.new(@uri.request_uri)
    @request.set_form_data(params)
  end
  set_user_agent if @user_agent
  set_basic_auth unless @basic_auth.empty?
  @headers = headers unless headers.empty?
  set_headers if @headers
  set_cookies if @cookies
  @response = send_request
  case @response
  # HTTP response code 1xx
  when Net::HTTPInformation
    @debug.save '<- [response] = Net::HTTPInformation' if @debug.active
  # HTTP response code 2xx
  when Net::HTTPSuccess
    save_headers
    save_cookies
    @debug.save "<- [response] = #{@response.code} Net::HTTPSuccess" if @debug.active
    # Follow meta refresh
    if @follow_location
      refresh = @response.ng.at_css('meta[http-equiv="refresh"]')
      @response = fetch refresh.attr('content').gsub(/\A.*?(http)/, 'http') if refresh
    end
  # HTTP response code 3xx
  when Net::HTTPRedirection
    @debug.save "<- [response] = #{@response.code} Net::HTTPRedirection" if @debug.active
    @debug.save 'try curl user_agent: tg.user_agent=\'curl\'' if @debug.active
    # Follow location
    if @follow_location
      @response = fetch @response.header['Location']
    else
      save_headers
      save_cookies
    end
  # HTTP response code 4xx
  when Net::HTTPClientError
    @debug.save "<- [response] = #{@response.code} Net::HTTPClientError" if @debug.active
  # HTTP response code 5xx
  when Net::HTTPServerError
    @debug.save "<- [response] = #{@response.code} Net::HTTPServerError" if @debug.active
  end
  @response.uri = @uri
  @debug.save_to_file @response.body if @debug.save_html
  @response
end
headers=(headers) click to toggle source

Set HEADERS agent attribute

@param headers Request headers

# File lib/tiny_grabber/agent.rb, line 149
def headers=(headers)
  raise 'attribute headers must be Hash' unless headers.is_a?(Hash)
  @headers = headers
end
proxy=(proxy) click to toggle source

Initialize Net::HTTP connection through proxy provider TYPE attribute distribute proxy type on SOCKS4(5) and HTTP(s)

@param proxy Proxy configuration

# File lib/tiny_grabber/agent.rb, line 115
def proxy=(proxy)
  if proxy.is_a?(String)
    ip, port, type = proxy.split(':')
    raise 'attribute proxy must be in format ip:port' unless ip && port
    type ||= :http
    proxy = { ip: ip, port: port, type: type }
  end
  proxy = var_to_sym(proxy)
  raise 'attribute proxy must be Hash' unless proxy.is_a?(Hash)
  raise 'attribute proxy must contain :ip and :port keys' unless proxy[:ip] && proxy[:port]

  @proxy = proxy
  @http = if [:socks, 'socks'].include? proxy[:type]
            Net::HTTP.SOCKSProxy(proxy[:ip].to_s, proxy[:port].to_s)
          else
            Net::HTTP::Proxy(proxy[:ip], proxy[:port])
          end
end
reset() click to toggle source

Clears headers and cookies

# File lib/tiny_grabber/agent.rb, line 335
def reset
  @headers = {}
  @cookies = nil
end
save_cookies() click to toggle source

Save response cookies in agent attribute

# File lib/tiny_grabber/agent.rb, line 323
def save_cookies
  if @response.respond_to?(:cookies)
    return unless @response.cookies
    @cookies = @response.cookies
  else
    return unless @response['Set-Cookie']
    @cookies = @response['Set-Cookie']
  end
end
save_headers() click to toggle source

Save response headers in agent attribute

# File lib/tiny_grabber/agent.rb, line 313
def save_headers
  return unless @response.header
  @headers = @response.header
  # Delete header TRANSFER_ENCODING for chain of requests
  @headers.delete('transfer-encoding')
  @debug.save "<- [headers] = #{@headers}" if @debug.active
end
send_request() click to toggle source

Send request and get response Use SSL connect for HTTPS link scheme

# File lib/tiny_grabber/agent.rb, line 304
def send_request
  @http.start(@uri.host, @uri.port, use_ssl: @uri.scheme == 'https', verify_mode: @verify_mode, read_timeout: @read_timeout) do |http|
    @debug.save "-> [read_timeout] = #{@read_timeout}" if @debug.active
    http.request(@request)
  end
end
set_basic_auth() click to toggle source

Set BASIC_AUTH request authentification

# File lib/tiny_grabber/agent.rb, line 274
def set_basic_auth
  @request.basic_auth @basic_auth[:username], @basic_auth[:password]
  @debug.save "-> [basic_auth] = #{@basic_auth}" if @debug.active
end
set_cookies() click to toggle source

Set request COOKIES

# File lib/tiny_grabber/agent.rb, line 296
def set_cookies
  @request['Cookie'] = @cookies
  @debug.save "-> [cookies] = #{@cookies}" if @debug.active
end
set_headers() click to toggle source

Set request HEADERS

# File lib/tiny_grabber/agent.rb, line 281
def set_headers
  @headers.each do |k, v|
    k = String(k)
    case k
    when 'Accept'
      @request[k] = v
    else
      @request.add_field(k, v)
    end
  end
  @debug.save "-> [headers] = #{@headers}" if @debug.active
end
set_user_agent() click to toggle source

Set USER_AGENT request attribute

# File lib/tiny_grabber/agent.rb, line 267
def set_user_agent
  @headers['User-Agent'] = @user_agent
  @debug.save "-> [user_agent] = #{@user_agent}" if @debug.active
end
var_to_sym(var, str_to_sym = false) click to toggle source

Convert variables and contains to symbol

@param var Variable need to convert

# File lib/tiny_grabber/agent.rb, line 344
def var_to_sym(var, str_to_sym = false)
  if var.is_a?(Hash)
    result = {}
    var.each do |k, v|
      result[k.to_sym] = var_to_sym(v, str_to_sym)
    end
  elsif var.is_a?(Array)
    result = []
    var.each do |v|
      result << var_to_sym(v, str_to_sym)
    end
  elsif var.is_a?(String)
    result = str_to_sym ? var.to_sym : var
  else
    result = var
  end
  result
end