module EmailCrawler::MechanizeHelper
Constants
- READ_TIMEOUT
Public Instance Methods
get(url)
click to toggle source
# File lib/email_crawler/mechanize_helper.rb, line 17 def get(url) retried = false begin page = begin Timeout::timeout(READ_TIMEOUT) do agent.get(url) end rescue Timeout::Error unless retried retried = true retry end end page if page.is_a?(Mechanize::Page) rescue Mechanize::Error; rescue SocketError unless retried retried = true retry end end end
new_agent() { |agent| ... }
click to toggle source
# File lib/email_crawler/mechanize_helper.rb, line 7 def new_agent Thread.current[:agent] ||= Mechanize.new do |agent| agent.user_agent_alias = "Windows Mozilla" agent.open_timeout = agent.read_timeout = READ_TIMEOUT agent.verify_mode = OpenSSL::SSL::VERIFY_NONE agent.history.max_size = 1 yield(agent) if block_given? end end
Private Instance Methods
agent()
click to toggle source
# File lib/email_crawler/mechanize_helper.rb, line 43 def agent @agent ||= new_agent end