class GetProxyList::FromProxyCn

Attributes

site[R]

Public Class Methods

new() click to toggle source
# File lib/get_proxy_list/from_proxy_cn.rb, line 10
def initialize
  @site = "http://www.cnproxy.com"
end

Public Instance Methods

_getproxylist(url) click to toggle source

获取代理列表

# File lib/get_proxy_list/from_proxy_cn.rb, line 16
def _getproxylist(url)
  p " Url : #{url}"
  begin
    hash_port = {"c"=>"1","a"=>"2","z"=>"3","m"=>"4","b"=>"5","w"=>"6","i"=>"7","x"=>"8","l"=>"9","f"=>"0"}
    html = open(url).read
    html.force_encoding("gbk")
    html.encode!("utf-8", :undef => :replace, :replace => "", :invalid => :replace)
    doc=Nokogiri::HTML.parse html
    #doc = Nokogiri::HTML(open(url))
    proxylist = []
    doc.css('table').each do |_table|
      _table.css('tr').each do |tr|
        if !tr.css('td')[0].content.eql?("IP:Port")&&!tr.css('td')[0].content.empty?
          proxy=tr.css('td')[0].content
          p "Proxy: #{proxy[1]}"
          proxy = proxy.split("document.write")
          _proxy = proxy[1].split(":")
          port = _proxy[1].gsub(")","")
          p "port : #{port}"
          port=port.split("+")
          _port = ""
          1.upto(port.length-1).each do |index|
           p "Hash:#{hash_port[port[index]]}"
            unless hash_port[port[index]].nil?
                    _port+=hash_port[port[index]]
            else
              _port=8080 
              break
            end
          end
          proxylist << {"ip"=>proxy[0],"port"=>_port}
        end
      end
    end
    p "ProxyList: #{proxylist}"
    return proxylist
    #rescue => err
    #  raise "Get Proxy list Error! Class:'FromProxyCn' Gem:'get_proxy_list' Error:'#{err.to_s}'"
  end
end
get_proxylist(page) click to toggle source
# File lib/get_proxy_list/from_proxy_cn.rb, line 69
def get_proxylist(page)
  urls = get_url_list(page)
  proxylist = []
  urls.each do |url|
    proxylist =proxylist+proxylist|_getproxylist(url)
  end
  return proxylist
end
get_url_list(page) click to toggle source

根据需要的页数获取URL列表 page 将获取的页数

# File lib/get_proxy_list/from_proxy_cn.rb, line 60
def get_url_list(page)
  urls = []
  1.upto(page.to_i).each do |_page|
    urls << @site+"/proxy#{_page.to_s}.html"
  end
  return urls
end