class Spieker::Crawler

Public Class Methods

new(url, verbose: false, lang: 'en') click to toggle source
# File lib/spieker/crawler.rb, line 3
def initialize(url, verbose: false, lang: 'en')
  @url = url
  @tracked_links = []
  @verbose = verbose
  @lang = lang
end

Public Instance Methods

crawl!() click to toggle source
# File lib/spieker/crawler.rb, line 10
def crawl!
  report "Starting to crawl on #{@url}"

  scraper = LinkScraper.new(@url, lang: @lang)
  track_link(@url)
  links =  scraper.result
  recursively_crawl(links)

  print_results if @verbose
end
current_path() click to toggle source
# File lib/spieker/crawler.rb, line 21
def current_path
  URI.parse(@url).path
end

Private Instance Methods

print_results() click to toggle source
recursively_crawl(links) click to toggle source
# File lib/spieker/crawler.rb, line 26
def recursively_crawl(links)
  new_links_threaded = {}
  links.each do |link|
    report "Crawling page #{link}"

    scraper = LinkScraper.new(link, lang: @lang)
    new_links_threaded[link] = scraper.result
    track_link(link)

    report "Finished page #{link}, #{new_links_threaded[link].length} links found"
  end
  new_links = select_untracked_links(new_links_threaded.values.flatten.uniq)
  report "Recursively crawling #{new_links.length} links ..."
  report "NEW LINKS FOUND: \n#{new_links.join("\n")}"
  report "TRACKED LINKS: \n#{@tracked_links.join("\n")}"

  recursively_crawl(new_links) if new_links.any?
end
report(text) click to toggle source
# File lib/spieker/crawler.rb, line 53
def report(text)
  if @verbose
    puts text
  end
end