module Twitterscraper::Proxy

Constants

PROXY_URL

Public Instance Methods

get_proxies(retries = 3) click to toggle source
# File lib/twitterscraper/proxy.rb, line 41
def get_proxies(retries = 3)
  response = Twitterscraper::Http.get(PROXY_URL)
  html = Nokogiri::HTML(response)
  table = html.xpath('//table[@id="proxylisttable"]').first

  proxies = []

  table.xpath('tbody/tr').each do |tr|
    cells = tr.xpath('td')
    ip, port, anonymity, https = [0, 1, 4, 6].map { |i| cells[i].text.strip }
    next unless ['elite proxy', 'anonymous'].include?(anonymity)
    next if https == 'no'
    proxies << ip + ':' + port
  end

  proxies.shuffle
rescue => e
  if (retries -= 1) > 0
    retry
  else
    raise RetryExhausted.new(e.inspect)
  end
end