class DaimonSkycrawlers::Commands::Enqueue
@private
Public Instance Methods
list(path)
click to toggle source
# File lib/daimon_skycrawlers/commands/enqueue.rb, line 55 def list(path) load_init File.open(path, "r") do |file| file.each_line do |line| line.chomp! next if /\A#/ =~ line case options["type"] when "response" DaimonSkycrawlers::Processor.enqueue_http_response(line) when "url" DaimonSkycrawlers::Crawler.enqueue_url(line) else raise ArgumentError, "Unknown type: #{options["type"]}" end end end end
response(url, *rest)
click to toggle source
# File lib/daimon_skycrawlers/commands/enqueue.rb, line 23 def response(url, *rest) load_init message = rest.map {|arg| arg.split(":") }.to_h log.debug("Enqueue URL for processor: #{url} : #{message}") DaimonSkycrawlers::Processor.enqueue_http_response(url, message) end
sitemap(url)
click to toggle source
# File lib/daimon_skycrawlers/commands/enqueue.rb, line 34 def sitemap(url) load_init if options["robots-txt"] webrobots = WebRobots.new("DaimonSkycrawlers/#{DaimonSkycrawlers::VERSION}") sitemaps = webrobots.sitemaps(url).uniq else sitemaps = [url] end sitemap_parser = DaimonSkycrawlers::SitemapParser.new(sitemaps) urls = sitemap_parser.parse if options["dump"] puts urls.join("\n") return end urls.each do |_url| DaimonSkycrawlers::Crawler.enqueue_url(_url) end end
url(url, *rest)
click to toggle source
# File lib/daimon_skycrawlers/commands/enqueue.rb, line 15 def url(url, *rest) load_init message = rest.map {|arg| arg.split(":") }.to_h log.debug("Enqueue URL for crawler: #{url} : #{message}") DaimonSkycrawlers::Crawler.enqueue_url(url, message) end
yaml(path)
click to toggle source
# File lib/daimon_skycrawlers/commands/enqueue.rb, line 75 def yaml(path) load_init yaml_text = ERB.new(File.read(path), nil, "-").result(binding) YAML.load(yaml_text).each do |hash| url = hash["url"] message = hash["message"] || {} raise "Could not find URL: #{hash}" unless url case options["type"] when "response" DaimonSkycrawlers::Processor.enqueue_http_response(url, message) when "url" DaimonSkycrawlers::Crawler.enqueue_url(url, message) else raise ArgumentError, "Unknown type: #{options["type"]}" end end end
Private Instance Methods
load_init()
click to toggle source
# File lib/daimon_skycrawlers/commands/enqueue.rb, line 95 def load_init DaimonSkycrawlers.load_init end
log()
click to toggle source
# File lib/daimon_skycrawlers/commands/enqueue.rb, line 99 def log DaimonSkycrawlers.configuration.logger end