class Crabbs::Crawler
Attributes
site_map[R]
visited[R]
Public Class Methods
new(options={})
click to toggle source
# File lib/crabbs/crawler.rb, line 7 def initialize(options={}) @visited = [] @site_map = {} @options = options end
Public Instance Methods
crawl(uri_string)
click to toggle source
# File lib/crabbs/crawler.rb, line 13 def crawl(uri_string) recursively_crawl uri_string, @site_map end
Private Instance Methods
extract_links(uri_string)
click to toggle source
# File lib/crabbs/crawler.rb, line 32 def extract_links(uri_string) uri = URI.parse(uri_string) STDOUT.puts "Visiting: #{uri_string}" if @options[:verbose] STDOUT.putc '.' unless @options[:verbose] page = Crabbs::Page.new(Net::HTTP.get(uri), uri.to_s) page.links rescue URI::InvalidURIError [] end
recursively_crawl(uri_string, hash)
click to toggle source
# File lib/crabbs/crawler.rb, line 19 def recursively_crawl(uri_string, hash) hash[uri_string] = Hash.new return if (@visited.include? uri_string) links = extract_links uri_string @visited << uri_string links.each do |link| recursively_crawl(link, hash[uri_string]) end end