class RequestManager
Public Class Methods
new(proxy_list, request_interval, browser_num)
click to toggle source
# File lib/requestmanager.rb, line 6 def initialize(proxy_list, request_interval, browser_num) @proxy_list = parse_proxy_list(proxy_list) @request_interval = request_interval @used_proxies = Array.new @browser_num = browser_num @browsers = Hash.new open_n_browsers end
Public Instance Methods
close_all_browsers()
click to toggle source
Close all the browsers
# File lib/requestmanager.rb, line 72 def close_all_browsers @browsers.each do |browser| browser[1][0].quit end end
gen_driver(chosen_proxy)
click to toggle source
Generate driver for searches
# File lib/requestmanager.rb, line 100 def gen_driver(chosen_proxy) # Profile settings profile = Selenium::WebDriver::Firefox::Profile.new profile['intl.accept_languages'] = 'en' # Set proxy if proxy list, otherwise sleep if chosen_proxy proxy = Selenium::WebDriver::Proxy.new(http: chosen_proxy, ssl: chosen_proxy) profile.proxy = proxy else sleep(rand(@request_interval[0]..@request_interval[1])) end return Selenium::WebDriver.for :firefox, profile: profile end
get_least_recent_browser()
click to toggle source
Get the least recently used browser
# File lib/requestmanager.rb, line 46 def get_least_recent_browser least_recent = @browsers.first @browsers.each do |browser| if browser[1][1] < least_recent[1][1] least_recent = browser end end # Update the usage time @browsers[least_recent[0]] = [least_recent[1][0], Time.now] return least_recent[1][0] end
get_most_recent_browser()
click to toggle source
Get the most recently used browser
# File lib/requestmanager.rb, line 34 def get_most_recent_browser most_recent = @browsers.first @browsers.each do |browser| if browser[1][1] > most_recent[1][1] most_recent = browser end end return most_recent end
get_page(url, form_input = nil)
click to toggle source
Get the page requested
# File lib/requestmanager.rb, line 79 def get_page(url, form_input = nil) # Get the page browser = get_least_recent_browser browser.navigate.to url puts "Getting page " + url # Handle form input if there is any if form_input sleep(2) element = browser.find_element(name: "q") element.send_keys form_input element.submit end # Sleep while things load then save output sleep(rand(@request_interval[0]..@request_interval[1])) page_html = browser.page_source return page_html end
get_random_proxy()
click to toggle source
Choose a random proxy that hasn't been used recently
# File lib/requestmanager.rb, line 117 def get_random_proxy max = @proxy_list.length chosen_proxy = @proxy_list[Random.rand(max)] # Only use proxy if it hasn't been used in last n seconds on same host if !@used_proxies.include?(chosen_proxy) @used_proxies.push(chosen_proxy) return chosen_proxy else sleep(0.005) get_random_proxy end end
get_updated_current_page()
click to toggle source
Get the html on the page now
# File lib/requestmanager.rb, line 29 def get_updated_current_page return get_most_recent_browser[1][0].page_source end
open_browser()
click to toggle source
Open the browser with a random proxy
# File lib/requestmanager.rb, line 23 def open_browser chosen_proxy = @proxy_list != nil ? get_random_proxy : nil @browsers[chosen_proxy] = [gen_driver(chosen_proxy), Time.now] end
open_n_browsers()
click to toggle source
Open the specified number of browsers
# File lib/requestmanager.rb, line 16 def open_n_browsers (1..@browser_num).each do |i| open_browser end end
parse_proxy_list(proxy_file)
click to toggle source
Parse the proxy list
# File lib/requestmanager.rb, line 132 def parse_proxy_list(proxy_file) if proxy_file return IO.readlines(proxy_file).map{ |proxy| proxy.strip } end end
restart_browser()
click to toggle source
Restart the browser and open new one
# File lib/requestmanager.rb, line 60 def restart_browser # Get most recently used browser and close it close_browser = get_most_recent_browser close_browser[1][0].quit # Remove it from lists of used browsers and start new @browsers.delete(close_browser[0]) open_browser @used_proxies.delete(close_browser[0]) end