class KleosTest::LinksCollector
Public Class Methods
inside_links()
click to toggle source
# File lib/kleos_test/links_collector.rb, line 104 def inside_links @@inside_links[:valid] | @@inside_links[:invalid] end
invalid_inside_links()
click to toggle source
# File lib/kleos_test/links_collector.rb, line 112 def invalid_inside_links @@inside_links[:invalid] end
invalid_outside_links()
click to toggle source
# File lib/kleos_test/links_collector.rb, line 124 def invalid_outside_links @@outside_links[:invalid] end
new()
click to toggle source
# File lib/kleos_test/links_collector.rb, line 9 def initialize @target = get_target_link end
outside_links()
click to toggle source
# File lib/kleos_test/links_collector.rb, line 116 def outside_links @@outside_links[:valid] | @@outside_links[:invalid] end
unverified_inside_links()
click to toggle source
# File lib/kleos_test/links_collector.rb, line 100 def unverified_inside_links @@unverified_inside_links end
valid_inside_links()
click to toggle source
# File lib/kleos_test/links_collector.rb, line 108 def valid_inside_links @@inside_links[:valid] end
valid_outside_links()
click to toggle source
# File lib/kleos_test/links_collector.rb, line 120 def valid_outside_links @@outside_links[:valid] end
Public Instance Methods
download_inside_webpage()
click to toggle source
# File lib/kleos_test/links_collector.rb, line 41 def download_inside_webpage address = @target.include?('http') ? @target : KleosTest.base_address + @target @@inside_downloads += 1 print "Downloading (#{@@inside_downloads}|#{@@unverified_inside_links.size})\ #{URI::decode(address)}..." response = RestClient.get(address) puts "OK" [response.body, response.code] rescue RestClient::ExceptionWithResponse => e puts "ERROR" [e.response.body, e.response.code] rescue URI::InvalidURIError puts "ERROR" puts "BAD URI" ['fake body', 1] rescue puts "UNKNOWN ERROR" ['fake body', 1] end
download_outside_webpage(address, counter)
click to toggle source
# File lib/kleos_test/links_collector.rb, line 61 def download_outside_webpage(address, counter) print "Downloading (#{counter}|#{@@unverified_outside_links.size - counter})\ #{URI::decode(address)}..." response = RestClient.get(address) puts "OK" response.code rescue RestClient::ExceptionWithResponse => e puts "ERROR" e.response.code rescue URI::InvalidURIError puts "ERROR" puts "BAD URI" 1 rescue puts "UNKNOWN ERROR" 1 end
extract_links(page)
click to toggle source
# File lib/kleos_test/links_collector.rb, line 79 def extract_links(page) query = '//a[@href!=""]' query += '[not(starts-with(@href, "javascript:void(0)"))]' query += '[not(starts-with(@href, "#"))]' query += '[not(starts-with(@href, "mailto"))]' Nokogiri::HTML(page).xpath(query).map { |link| link.attribute('href').value } end
get_new_links()
click to toggle source
# File lib/kleos_test/links_collector.rb, line 17 def get_new_links page, response_code = download_inside_webpage if response_code == 200 @@inside_links[:valid] << @target @@verified_links << @target links = extract_links(page) refill_unverified_links(links) else @@inside_links[:invalid] << @target @@verified_links << @target end end
get_target_link()
click to toggle source
# File lib/kleos_test/links_collector.rb, line 13 def get_target_link @@unverified_inside_links.empty? ? '/' : @@unverified_inside_links.shift end
refill_unverified_links(links)
click to toggle source
# File lib/kleos_test/links_collector.rb, line 87 def refill_unverified_links(links) regexp = /^(\/|\?|https?:\/\/(www\.)?kleos\.ru)(?!\/forum)/ @@unverified_inside_links.concat(links.select do |link| link.match(regexp) && !( @@unverified_inside_links.include?(link) || @@verified_links.include?(link)) end.uniq) @@unverified_outside_links.concat( links.reject { |link| link.match(regexp) }).uniq! end
verify_outside_links()
click to toggle source
# File lib/kleos_test/links_collector.rb, line 30 def verify_outside_links @@unverified_outside_links.each_with_index do |link, i| code = download_outside_webpage(link, i + 1) if code == 200 @@outside_links[:valid] << link else @@outside_links[:invalid] << link end end end