class Apollo::Fetcher::BaseFetcher
Public Class Methods
fetch(url, options = {})
click to toggle source
# File lib/apollo_crawler/fetcher/base_fetcher.rb, line 76 def self.fetch(url, options = {}) begin uri = URI.parse(url.to_s) rescue Exception => e puts "EXCEPTION: BaseFetcher::fetch() - Unable to fetch: '#{e.to_s}'" return nil end agent = Mechanize.new do |agent| agent.user_agent = 'Apollo Crawler' end page = agent.get(uri) res = { :status => page.code, :headers => page.header.to_hash, :body => page.content } return res end
fetch_old(url, options = {})
click to toggle source
# File lib/apollo_crawler/fetcher/base_fetcher.rb, line 47 def self.fetch_old(url, options = {}) begin uri = URI.parse(url.to_s) rescue Exception => e puts "EXCEPTION: BaseFetcher::fetch() - Unable to fetch: '#{e.to_s}'" return nil end # See https://github.com/lostisland/faraday conn = Faraday.new(:url => url) do |faraday| # faraday.request :url_encoded # form-encode POST params # faraday.response :logger # log requests to STDOUT faraday.adapter Faraday.default_adapter # make requests with Net::HTTP end # Make request begin res = conn.get(uri) do |request| request.headers = BaseFetcher.get_fake_headers(uri) end rescue Exception => e puts "EXCEPTION: BaseFetcher::fetch() - Unable to fetch: '#{e.to_s}'" return nil end # Return result return res end
get_fake_headers(url)
click to toggle source
# File lib/apollo_crawler/fetcher/base_fetcher.rb, line 39 def self.get_fake_headers(url) ip = IPAddr.new(rand(2**32), Socket::AF_INET).to_s return { "X-Forwarded-For" => ip } end