module EmailCrawler::MechanizeHelper

Constants

READ_TIMEOUT

Public Instance Methods

get(url) click to toggle source
# File lib/email_crawler/mechanize_helper.rb, line 17
def get(url)
  retried = false

  begin
    page = begin
             Timeout::timeout(READ_TIMEOUT) do
               agent.get(url)
             end
           rescue Timeout::Error
             unless retried
               retried = true
               retry
             end
           end
    page if page.is_a?(Mechanize::Page)
  rescue Mechanize::Error;
  rescue SocketError
    unless retried
      retried = true
      retry
    end
  end
end
new_agent() { |agent| ... } click to toggle source
# File lib/email_crawler/mechanize_helper.rb, line 7
def new_agent
  Thread.current[:agent] ||= Mechanize.new do |agent|
    agent.user_agent_alias = "Windows Mozilla"
    agent.open_timeout = agent.read_timeout = READ_TIMEOUT
    agent.verify_mode = OpenSSL::SSL::VERIFY_NONE
    agent.history.max_size = 1
    yield(agent) if block_given?
  end
end

Private Instance Methods

agent() click to toggle source
# File lib/email_crawler/mechanize_helper.rb, line 43
def agent
  @agent ||= new_agent
end