class Biblionet::Crawlers::PublisherCrawler
Public Class Methods
new(options = {})
click to toggle source
Calls superclass method
Biblionet::Crawlers::Base::new
# File lib/bookshark/crawlers/publisher_crawler.rb, line 7 def initialize(options = {}) options[:folder] ||= 'lib/bookshark/storage/html_publisher_pages' options[:base_url] ||= 'http://www.biblionet.gr/com/' options[:page_type] ||= 'publisher' options[:extension] ||= '.html' options[:start] ||= 1 options[:finish] ||= 800 options[:step] ||= 100 super(options) end
Public Instance Methods
crawl_and_save()
click to toggle source
# File lib/bookshark/crawlers/publisher_crawler.rb, line 18 def crawl_and_save downloader = Extractors::Base.new spider do |url_to_download, file_to_save| downloader.load_page(url_to_download) # Create a new directory (does nothing if directory exists) path = File.dirname(file_to_save) FileUtils.mkdir_p path unless File.directory?(path) downloader.save_page(file_to_save) unless downloader.page.nil? or downloader.page.length < 1024 end end