class Aquatone::Collectors::Netcraft
Constants
- BASE_URI
- DEFAULT_PAGES_TO_PROCESS
- HOSTNAME_REGEX
- RESULTS_PER_PAGE
Public Instance Methods
run()
click to toggle source
# File lib/aquatone/collectors/netcraft.rb, line 18 def run last = nil count = 0 pages_to_process.times do |i| page = i + 1 if page == 1 uri = "#{BASE_URI}/?restriction=site+contains&host=*.#{url_escape(domain.name)}&lookup=wait..&position=limited" else uri = "#{BASE_URI}/?host=*.#{url_escape(domain.name)}&last=#{url_escape(last)}&from=#{count + 1}&restriction=site%20contains&position=limited" end response = get_request(uri, { :headers => { "Referer" => "http://searchdns.netcraft.com/" } } ) hosts = extract_hostnames_from_response(response.body) last = hosts.last count += hosts.count hosts.each { |host| add_host(host) } break if hosts.count != RESULTS_PER_PAGE random_sleep(5) end end
Private Instance Methods
extract_hostnames_from_response(body)
click to toggle source
# File lib/aquatone/collectors/netcraft.rb, line 42 def extract_hostnames_from_response(body) hosts = [] body.scan(HOSTNAME_REGEX).each do |match| hosts << match.last.to_s.strip.downcase end hosts end
pages_to_process()
click to toggle source
# File lib/aquatone/collectors/netcraft.rb, line 50 def pages_to_process if has_cli_option?("netcraft-pages") return get_cli_option("netcraft-pages").to_i end DEFAULT_PAGES_TO_PROCESS end