class Apollo::PlatformProgram
Constants
- DEFAULT_OPTIONS
Public Class Methods
new()
click to toggle source
Initializer - Constructor
Calls superclass method
Apollo::BaseProgram::new
# File lib/apollo_crawler/program/platform_program.rb, line 63 def initialize super self.options.merge!(DEFAULT_OPTIONS) end
Public Instance Methods
enqueue_crawlers_urls(amqp, crawlers=Apollo::Crawler::BaseCrawler.subclasses, opts={})
click to toggle source
# File lib/apollo_crawler/program/platform_program.rb, line 100 def enqueue_crawlers_urls(amqp, crawlers=Apollo::Crawler::BaseCrawler.subclasses, opts={}) crawlers.each do |crawler| i = crawler.new Apollo::Scheduler::BaseScheduler::schedule(i.url, crawler) end end
init_agents(amqp, opts={})
click to toggle source
# File lib/apollo_crawler/program/platform_program.rb, line 125 def init_agents(amqp, opts={}) puts "Initializing agents" init_crawlers(amqp, opts) init_domainers(amqp, opts) init_fetchers(amqp, opts) end
init_crawlers(amqp, opts={})
click to toggle source
# File lib/apollo_crawler/program/platform_program.rb, line 107 def init_crawlers(amqp, opts={}) crawlers = [] crawlers << Apollo::Agent::CrawlerAgent.new(amqp, self.options) end
init_domainers(amqp, opts={})
click to toggle source
# File lib/apollo_crawler/program/platform_program.rb, line 112 def init_domainers(amqp, opts={}) domainers = [] domainers << Apollo::Agent::DomainerAgent.new(amqp, self.options) end
init_domains(opts={})
click to toggle source
# File lib/apollo_crawler/program/platform_program.rb, line 133 def init_domains(opts={}) path = File.join(File.dirname(__FILE__), "../../../tmp/top-1m.csv") puts "#{path}" if(File.exists?(path) == false) return 0 end Thread::new { Apollo::Helper::Mongo::csv_bulk_insert(path, Apollo::Model::Domain, 1000, false) do |row| rank = row[0].to_i name = row[1] res = { :rank => rank, :name => name } res end } end
init_fetchers(amqp, opts={})
click to toggle source
# File lib/apollo_crawler/program/platform_program.rb, line 117 def init_fetchers(amqp, opts={}) fetchers = [] fetchers << Apollo::Agent::FetcherAgent.new(amqp, self.options) # TODO: This should not be here! enqueue_crawlers_urls(amqp, Apollo::Crawler::BaseCrawler.subclasses, opts) end
init_options()
click to toggle source
# File lib/apollo_crawler/program/platform_program.rb, line 69 def init_options() self.optparser = OptionParser.new do | opts | opts.banner = "Usage: apollo-platform [OPTIONS]" opts.separator "" opts.separator "Specific options:" # This displays the help screen, all programs are # assumed to have this option. opts.on('-h', '--help', 'Display this screen') do self.options[:show_help] = true end opts.on('-e', '--environment [NAME]', "Environment used, default '#{options[:env]}'") do |name| self.options[:env] = name end opts.on('-d', '--daemon', 'Run Apollo Platform daemon') do self.options[:daemon] = true end opts.on('-v', '--verbose', 'Enable verbose output') do self.options[:verbose] = true end opts.on('-V', '--version', 'Show version info') do self.options[:version] = true end end end
init_program(args)
click to toggle source
Calls superclass method
Apollo::BaseProgram#init_program
# File lib/apollo_crawler/program/platform_program.rb, line 155 def init_program(args) res = super(args) return res unless res.nil? init_agents(self.amqp, self.options) return nil end
process_options(args)
click to toggle source
# File lib/apollo_crawler/program/platform_program.rb, line 163 def process_options(args) if(self.options[:version]) puts Apollo::VERSION return 0 end if(self.options[:show_help]) puts optparser return 0 end # Return nil, it means program can freely continue. return nil end
requeue_fetching_urls(opts={})
click to toggle source
# File lib/apollo_crawler/program/platform_program.rb, line 178 def requeue_fetching_urls(opts={}) urls = Apollo::Model::QueuedUrl.where(:state => :fetching) urls.each do |url| puts "Requeing '#{url.inspect}'" if opts[:verbose] url.state = :queued url.save end end
run(args = ARGV)
click to toggle source
Run Program
Calls superclass method
Apollo::BaseProgram#run
# File lib/apollo_crawler/program/platform_program.rb, line 189 def run(args = ARGV) res = super(args) return res unless res.nil? init_domains() requeue_fetching_urls(self.options) # Here we start # if(ARGV.length < 1) # puts optparser # return 0 # end res_code = 0 if(self.options[:daemon]) planner = Apollo::Planner::SmartPlanner.new(self.amqp, self.mongo, self.options) res_code = planner.run(self.options) end return request_exit(res_code) end