class Apollo::Fetcher::BaseFetcher

Public Class Methods

fetch(url, options = {}) click to toggle source
# File lib/apollo_crawler/fetcher/base_fetcher.rb, line 76
def self.fetch(url, options = {})
        begin
                uri = URI.parse(url.to_s)
        rescue Exception => e
                puts "EXCEPTION: BaseFetcher::fetch() - Unable to fetch: '#{e.to_s}'"
                return nil
        end

        agent = Mechanize.new do |agent|
                agent.user_agent = 'Apollo Crawler'
        end

        page = agent.get(uri)

        res = {
                :status => page.code,
                :headers => page.header.to_hash,
                :body => page.content
        }

        return res
end
fetch_old(url, options = {}) click to toggle source
# File lib/apollo_crawler/fetcher/base_fetcher.rb, line 47
def self.fetch_old(url, options = {})
        begin
                uri = URI.parse(url.to_s)
        rescue Exception => e
                puts "EXCEPTION: BaseFetcher::fetch() - Unable to fetch: '#{e.to_s}'"
                return nil
        end

        # See https://github.com/lostisland/faraday
        conn = Faraday.new(:url => url) do |faraday|
                # faraday.request  :url_encoded             # form-encode POST params
                # faraday.response :logger                  # log requests to STDOUT
                faraday.adapter  Faraday.default_adapter  # make requests with Net::HTTP
        end

        # Make request
        begin
                res = conn.get(uri) do |request|
                        request.headers = BaseFetcher.get_fake_headers(uri)
                end
        rescue Exception => e
                puts "EXCEPTION: BaseFetcher::fetch() - Unable to fetch: '#{e.to_s}'"
                return nil
        end

        # Return result
        return res
end
get_fake_headers(url) click to toggle source
# File lib/apollo_crawler/fetcher/base_fetcher.rb, line 39
def self.get_fake_headers(url)
        ip = IPAddr.new(rand(2**32), Socket::AF_INET).to_s

        return {
                "X-Forwarded-For" => ip
        }
end