class Baiduserp::Client
Constants
- AllUserAgents
Public Class Methods
get_rank_url(url)
click to toggle source
# File lib/baiduserp/client.rb, line 20 def self.get_rank_url(url) self.new.get_rank_url(url) end
get_serp(url,retries = 3)
click to toggle source
# File lib/baiduserp/client.rb, line 16 def self.get_serp(url,retries = 3) self.new.get_serp(url,retries) end
rand_ua()
click to toggle source
# File lib/baiduserp/client.rb, line 7 def self.rand_ua AllUserAgents[rand(AllUserAgents.size)] end
Public Instance Methods
get_rank_url(url)
click to toggle source
# File lib/baiduserp/client.rb, line 24 def get_rank_url(url) begin response = self.class.get(url) rescue StandardError => e puts e.class puts e.message sleep(10) retry end response end
get_serp(url, retries = 3)
click to toggle source
# File lib/baiduserp/client.rb, line 36 def get_serp(url, retries = 3) if retries > 0 begin response = self.class.get(url) rescue StandardError => e puts e.class puts e.message sleep(10) retry end if response.code != 200 puts response puts "Retry on URL: #{url}" sleep(rand(60)+1200) response = self.class.get_serp(url,retries - 1) end if response.nil? puts "Still error after 3 tries, sleep 3600s now." sleep(3600) response = self.class.get_serp(url) end ##Baidu Stopped response Content-Length in headers... #if response.headers['Content-Length'].nil? # puts "Can't read Content-Length from response, retry." # response = self.class.get_serp(url,retries-1) #end # #if response.headers['Content-Length'].to_i != response.body.bytesize # issue_file = "/tmp/baiduserp_crawler_issue_#{Time.now.strftime("%Y%m%d%H%M%S")}.html" # open(issue_file,'w').puts(response.body) # puts "Notice:" # puts "Baiduserp get an error when crawl SERP: response size (#{response.headers['Content-Length']}) not match body size." # puts "Please see file #{issue_file} for body content." # puts "Sleep 10s and retry" # sleep(10) # response = self.class.get_serp(url) #end response else nil end end