class SiteCrawler
Attributes
current_path[RW]
current_path_bang[RW]
queued_paths[RW]
saved_paths[RW]
start_paths[RW]
Public Class Methods
new(start_paths=['/'])
click to toggle source
# File lib/site_crawler.rb, line 10 def initialize(start_paths=['/']) self.start_paths = start_paths self.saved_paths = [] self.queued_paths= [] capybara_setup end
Public Instance Methods
crawl(paths)
click to toggle source
# File lib/site_crawler.rb, line 22 def crawl(paths) paths.each do |path| @current_path = path @current_path_bang = "#!#{@current_path}" if !saved_paths.include? @current_path visit_page save_page end end queued_paths.present? ? crawl(queued_paths) : done_message end
start_crawl()
click to toggle source
# File lib/site_crawler.rb, line 17 def start_crawl start_message crawl(@start_paths) end
Private Instance Methods
capybara_setup()
click to toggle source
# File lib/site_crawler.rb, line 50 def capybara_setup Capybara.current_driver = :poltergeist Capybara.app_host = 'http://localhost:3000/' end
done_message()
click to toggle source
# File lib/site_crawler.rb, line 68 def done_message puts "\nDone crawling." end
queue_links()
click to toggle source
# File lib/site_crawler.rb, line 55 def queue_links @document.css('a').each do |link| path = link['href'].gsub("#!",'') if !queued_paths.include? path and !path.include?('mailto:') queued_paths << path end end end
save_page()
click to toggle source
# File lib/site_crawler.rb, line 43 def save_page @document.css('script').remove StaticFileSaver.new(@current_path_bang, @document).save saved_paths << current_path queued_paths.delete(current_path) end
start_message()
click to toggle source
# File lib/site_crawler.rb, line 64 def start_message puts "Ember SEO Crawling...\n\n" end
visit_page()
click to toggle source
# File lib/site_crawler.rb, line 36 def visit_page puts "visiting #{current_path}..." visit(@current_path_bang) @document = Nokogiri::HTML.parse(source) queue_links end