class RequestManager

Public Class Methods

new(proxy_list, request_interval, browser_num) click to toggle source
# File lib/requestmanager.rb, line 6
def initialize(proxy_list, request_interval, browser_num)
  @proxy_list = parse_proxy_list(proxy_list)
  @request_interval = request_interval
  @used_proxies = Array.new
  @browser_num = browser_num
  @browsers = Hash.new
  open_n_browsers
end

Public Instance Methods

close_all_browsers() click to toggle source

Close all the browsers

# File lib/requestmanager.rb, line 72
def close_all_browsers
  @browsers.each do |browser|
    browser[1][0].quit
  end
end
gen_driver(chosen_proxy) click to toggle source

Generate driver for searches

# File lib/requestmanager.rb, line 100
def gen_driver(chosen_proxy)
  # Profile settings
  profile = Selenium::WebDriver::Firefox::Profile.new
  profile['intl.accept_languages'] = 'en'
    
  # Set proxy if proxy list, otherwise sleep
  if chosen_proxy
    proxy = Selenium::WebDriver::Proxy.new(http: chosen_proxy, ssl: chosen_proxy)
    profile.proxy = proxy
  else
    sleep(rand(@request_interval[0]..@request_interval[1]))
  end
  
  return Selenium::WebDriver.for :firefox, profile: profile
end
get_least_recent_browser() click to toggle source

Get the least recently used browser

# File lib/requestmanager.rb, line 46
def get_least_recent_browser
  least_recent = @browsers.first
  @browsers.each do |browser|
      if browser[1][1] < least_recent[1][1]
        least_recent = browser
      end
  end
  
  # Update the usage time
  @browsers[least_recent[0]] = [least_recent[1][0], Time.now]
  return least_recent[1][0]
end
get_most_recent_browser() click to toggle source

Get the most recently used browser

# File lib/requestmanager.rb, line 34
def get_most_recent_browser
  most_recent = @browsers.first
  @browsers.each do |browser|
    if browser[1][1] > most_recent[1][1]
      most_recent = browser
    end
  end

  return most_recent
end
get_page(url, form_input = nil) click to toggle source

Get the page requested

# File lib/requestmanager.rb, line 79
def get_page(url, form_input = nil)
  # Get the page
  browser = get_least_recent_browser
  browser.navigate.to url
  puts "Getting page " + url

  # Handle form input if there is any
  if form_input
    sleep(2)
    element = browser.find_element(name: "q")
    element.send_keys form_input
    element.submit
  end

  # Sleep while things load then save output
  sleep(rand(@request_interval[0]..@request_interval[1]))
  page_html = browser.page_source
  return page_html
end
get_random_proxy() click to toggle source

Choose a random proxy that hasn't been used recently

# File lib/requestmanager.rb, line 117
def get_random_proxy
  max = @proxy_list.length
  chosen_proxy = @proxy_list[Random.rand(max)]
  
  # Only use proxy if it hasn't been used in last n seconds on same host
  if !@used_proxies.include?(chosen_proxy)
    @used_proxies.push(chosen_proxy)
    return chosen_proxy
  else
    sleep(0.005)
    get_random_proxy
  end
end
get_updated_current_page() click to toggle source

Get the html on the page now

# File lib/requestmanager.rb, line 29
def get_updated_current_page
  return get_most_recent_browser[1][0].page_source
end
open_browser() click to toggle source

Open the browser with a random proxy

# File lib/requestmanager.rb, line 23
def open_browser
  chosen_proxy = @proxy_list != nil ? get_random_proxy : nil
  @browsers[chosen_proxy] = [gen_driver(chosen_proxy), Time.now]
end
open_n_browsers() click to toggle source

Open the specified number of browsers

# File lib/requestmanager.rb, line 16
def open_n_browsers
  (1..@browser_num).each do |i|
    open_browser
  end
end
parse_proxy_list(proxy_file) click to toggle source

Parse the proxy list

# File lib/requestmanager.rb, line 132
def parse_proxy_list(proxy_file)
  if proxy_file
    return IO.readlines(proxy_file).map{ |proxy| proxy.strip }
  end
end
restart_browser() click to toggle source

Restart the browser and open new one

# File lib/requestmanager.rb, line 60
def restart_browser
  # Get most recently used browser and close it
  close_browser = get_most_recent_browser
  close_browser[1][0].quit

  # Remove it from lists of used browsers and start new
  @browsers.delete(close_browser[0])
  open_browser
  @used_proxies.delete(close_browser[0])
end