class TinyGrabber::Agent
Constants
- AGENT_ALIASES
Agent
aliases given from www.useragentstring.com/pages/Chrome/
Attributes
Basic authentification configuration
Debug
configuration
Follow location
Headers
perfect url
Remote proxy configuration
Max time to execute request
Uri
Web browser name
Set verify mode
Public Class Methods
Initialization object
# File lib/tiny_grabber/agent.rb, line 56 def initialize @debug = Debug.new # Initialize variables agent attributes @user_agent = AGENT_ALIASES[rand(AGENT_ALIASES.count) - 1] @proxy = [] @basic_auth = {} @headers = {} @cookies = nil @perfect_url = false @follow_location = false @read_timeout = 10 # Initialize variable for URI object @uri = nil # Initialize variable for Net::HTTP request object @http = Net::HTTP # Initialize variable for Net::HTTP response object @response = nil @verify_mode = OpenSSL::SSL::VERIFY_NONE end
Public Instance Methods
Initialize URI object from request url
@param url Request link
# File lib/tiny_grabber/agent.rb, line 254 def convert_to_uri(url) unless @perfect_url # Remove anchor url = url.gsub(/#.*\Z/, '') # It's magic work with escaped url url = URI.escape(URI.unescape(url)) end @uri = URI(url) @debug.save "-> [uri] = #{@uri}" if @debug.active end
Fetch request for GET and POST HTTP methods Setting USER_AGENT, BASIC_AUTH, HEADERS, COOKIES request attribute Make response and save COOKIES for next requests
@param url Resource link @param method Request method @param headers Request header @param params Request additional params
# File lib/tiny_grabber/agent.rb, line 191 def fetch(url, method = :get, headers = {}, params = {}) if @debug.active @debug.save '==============================' @debug.save "#{method.upcase} #{url}" @debug.save "-> [proxy] = #{@proxy}" if @proxy @debug.save "-> [params] = #{params}" @debug.save '------------------------------' end convert_to_uri url case method when :get @request = Net::HTTP::Get.new(@uri.request_uri) when :post @request = Net::HTTP::Post.new(@uri.request_uri) @request.set_form_data(params) end set_user_agent if @user_agent set_basic_auth unless @basic_auth.empty? @headers = headers unless headers.empty? set_headers if @headers set_cookies if @cookies @response = send_request case @response # HTTP response code 1xx when Net::HTTPInformation @debug.save '<- [response] = Net::HTTPInformation' if @debug.active # HTTP response code 2xx when Net::HTTPSuccess save_headers save_cookies @debug.save "<- [response] = #{@response.code} Net::HTTPSuccess" if @debug.active # Follow meta refresh if @follow_location refresh = @response.ng.at_css('meta[http-equiv="refresh"]') @response = fetch refresh.attr('content').gsub(/\A.*?(http)/, 'http') if refresh end # HTTP response code 3xx when Net::HTTPRedirection @debug.save "<- [response] = #{@response.code} Net::HTTPRedirection" if @debug.active @debug.save 'try curl user_agent: tg.user_agent=\'curl\'' if @debug.active # Follow location if @follow_location @response = fetch @response.header['Location'] else save_headers save_cookies end # HTTP response code 4xx when Net::HTTPClientError @debug.save "<- [response] = #{@response.code} Net::HTTPClientError" if @debug.active # HTTP response code 5xx when Net::HTTPServerError @debug.save "<- [response] = #{@response.code} Net::HTTPServerError" if @debug.active end @response.uri = @uri @debug.save_to_file @response.body if @debug.save_html @response end
Set HEADERS agent attribute
@param headers Request headers
# File lib/tiny_grabber/agent.rb, line 149 def headers=(headers) raise 'attribute headers must be Hash' unless headers.is_a?(Hash) @headers = headers end
Initialize Net::HTTP connection through proxy provider TYPE attribute distribute proxy type on SOCKS4(5) and HTTP(s)
@param proxy Proxy configuration
# File lib/tiny_grabber/agent.rb, line 115 def proxy=(proxy) if proxy.is_a?(String) ip, port, type = proxy.split(':') raise 'attribute proxy must be in format ip:port' unless ip && port type ||= :http proxy = { ip: ip, port: port, type: type } end proxy = var_to_sym(proxy) raise 'attribute proxy must be Hash' unless proxy.is_a?(Hash) raise 'attribute proxy must contain :ip and :port keys' unless proxy[:ip] && proxy[:port] @proxy = proxy @http = if [:socks, 'socks'].include? proxy[:type] Net::HTTP.SOCKSProxy(proxy[:ip].to_s, proxy[:port].to_s) else Net::HTTP::Proxy(proxy[:ip], proxy[:port]) end end
Clears headers and cookies
# File lib/tiny_grabber/agent.rb, line 335 def reset @headers = {} @cookies = nil end
Save response headers in agent attribute
# File lib/tiny_grabber/agent.rb, line 313 def save_headers return unless @response.header @headers = @response.header # Delete header TRANSFER_ENCODING for chain of requests @headers.delete('transfer-encoding') @debug.save "<- [headers] = #{@headers}" if @debug.active end
Send request and get response Use SSL connect for HTTPS link scheme
# File lib/tiny_grabber/agent.rb, line 304 def send_request @http.start(@uri.host, @uri.port, use_ssl: @uri.scheme == 'https', verify_mode: @verify_mode, read_timeout: @read_timeout) do |http| @debug.save "-> [read_timeout] = #{@read_timeout}" if @debug.active http.request(@request) end end
Set BASIC_AUTH request authentification
# File lib/tiny_grabber/agent.rb, line 274 def set_basic_auth @request.basic_auth @basic_auth[:username], @basic_auth[:password] @debug.save "-> [basic_auth] = #{@basic_auth}" if @debug.active end
Set request HEADERS
# File lib/tiny_grabber/agent.rb, line 281 def set_headers @headers.each do |k, v| k = String(k) case k when 'Accept' @request[k] = v else @request.add_field(k, v) end end @debug.save "-> [headers] = #{@headers}" if @debug.active end
Set USER_AGENT request attribute
# File lib/tiny_grabber/agent.rb, line 267 def set_user_agent @headers['User-Agent'] = @user_agent @debug.save "-> [user_agent] = #{@user_agent}" if @debug.active end
Convert variables and contains to symbol
@param var Variable need to convert
# File lib/tiny_grabber/agent.rb, line 344 def var_to_sym(var, str_to_sym = false) if var.is_a?(Hash) result = {} var.each do |k, v| result[k.to_sym] = var_to_sym(v, str_to_sym) end elsif var.is_a?(Array) result = [] var.each do |v| result << var_to_sym(v, str_to_sym) end elsif var.is_a?(String) result = str_to_sym ? var.to_sym : var else result = var end result end