class DaimonSkycrawlers::Commands::Enqueue

@private

Public Instance Methods

list(path) click to toggle source
# File lib/daimon_skycrawlers/commands/enqueue.rb, line 55
def list(path)
  load_init
  File.open(path, "r") do |file|
    file.each_line do |line|
      line.chomp!
      next if /\A#/ =~ line
      case options["type"]
      when "response"
        DaimonSkycrawlers::Processor.enqueue_http_response(line)
      when "url"
        DaimonSkycrawlers::Crawler.enqueue_url(line)
      else
        raise ArgumentError, "Unknown type: #{options["type"]}"
      end
    end
  end
end
response(url, *rest) click to toggle source
# File lib/daimon_skycrawlers/commands/enqueue.rb, line 23
def response(url, *rest)
  load_init
  message = rest.map {|arg| arg.split(":") }.to_h
  log.debug("Enqueue URL for processor: #{url} : #{message}")
  DaimonSkycrawlers::Processor.enqueue_http_response(url, message)
end
sitemap(url) click to toggle source
# File lib/daimon_skycrawlers/commands/enqueue.rb, line 34
def sitemap(url)
  load_init
  if options["robots-txt"]
    webrobots = WebRobots.new("DaimonSkycrawlers/#{DaimonSkycrawlers::VERSION}")
    sitemaps = webrobots.sitemaps(url).uniq
  else
    sitemaps = [url]
  end
  sitemap_parser = DaimonSkycrawlers::SitemapParser.new(sitemaps)
  urls = sitemap_parser.parse
  if options["dump"]
    puts urls.join("\n")
    return
  end
  urls.each do |_url|
    DaimonSkycrawlers::Crawler.enqueue_url(_url)
  end
end
url(url, *rest) click to toggle source
# File lib/daimon_skycrawlers/commands/enqueue.rb, line 15
def url(url, *rest)
  load_init
  message = rest.map {|arg| arg.split(":") }.to_h
  log.debug("Enqueue URL for crawler: #{url} : #{message}")
  DaimonSkycrawlers::Crawler.enqueue_url(url, message)
end
yaml(path) click to toggle source
# File lib/daimon_skycrawlers/commands/enqueue.rb, line 75
def yaml(path)
  load_init
  yaml_text = ERB.new(File.read(path), nil, "-").result(binding)
  YAML.load(yaml_text).each do |hash|
    url = hash["url"]
    message = hash["message"] || {}
    raise "Could not find URL: #{hash}" unless url
    case options["type"]
    when "response"
      DaimonSkycrawlers::Processor.enqueue_http_response(url, message)
    when "url"
      DaimonSkycrawlers::Crawler.enqueue_url(url, message)
    else
      raise ArgumentError, "Unknown type: #{options["type"]}"
    end
  end
end

Private Instance Methods

load_init() click to toggle source
# File lib/daimon_skycrawlers/commands/enqueue.rb, line 95
def load_init
  DaimonSkycrawlers.load_init
end
log() click to toggle source
# File lib/daimon_skycrawlers/commands/enqueue.rb, line 99
def log
  DaimonSkycrawlers.configuration.logger
end