class Apollo::PlatformProgram

Constants

DEFAULT_OPTIONS

Public Class Methods

new() click to toggle source

Initializer - Constructor

Calls superclass method Apollo::BaseProgram::new
# File lib/apollo_crawler/program/platform_program.rb, line 63
def initialize
        super
        
        self.options.merge!(DEFAULT_OPTIONS)
end

Public Instance Methods

enqueue_crawlers_urls(amqp, crawlers=Apollo::Crawler::BaseCrawler.subclasses, opts={}) click to toggle source
# File lib/apollo_crawler/program/platform_program.rb, line 100
def enqueue_crawlers_urls(amqp, crawlers=Apollo::Crawler::BaseCrawler.subclasses, opts={})
        crawlers.each do |crawler|
                i = crawler.new
                Apollo::Scheduler::BaseScheduler::schedule(i.url, crawler)
        end  
end
init_agents(amqp, opts={}) click to toggle source
# File lib/apollo_crawler/program/platform_program.rb, line 125
def init_agents(amqp, opts={})
        puts "Initializing agents"

        init_crawlers(amqp, opts)
        init_domainers(amqp, opts)
        init_fetchers(amqp, opts)    
end
init_crawlers(amqp, opts={}) click to toggle source
# File lib/apollo_crawler/program/platform_program.rb, line 107
def init_crawlers(amqp, opts={})
        crawlers = []
        crawlers << Apollo::Agent::CrawlerAgent.new(amqp, self.options)
end
init_domainers(amqp, opts={}) click to toggle source
# File lib/apollo_crawler/program/platform_program.rb, line 112
def init_domainers(amqp, opts={})
        domainers = []
        domainers << Apollo::Agent::DomainerAgent.new(amqp, self.options)
end
init_domains(opts={}) click to toggle source
# File lib/apollo_crawler/program/platform_program.rb, line 133
def init_domains(opts={})
        path = File.join(File.dirname(__FILE__), "../../../tmp/top-1m.csv")
        puts "#{path}"
        if(File.exists?(path) == false)
                return 0
        end

        Thread::new {
                Apollo::Helper::Mongo::csv_bulk_insert(path, Apollo::Model::Domain, 1000, false) do |row|
                        rank = row[0].to_i
                        name = row[1]

                        res = {
                                :rank => rank, 
                                :name => name
                        }

                        res
                end
        }
end
init_fetchers(amqp, opts={}) click to toggle source
# File lib/apollo_crawler/program/platform_program.rb, line 117
def init_fetchers(amqp, opts={})
        fetchers = []
        fetchers << Apollo::Agent::FetcherAgent.new(amqp, self.options)

        # TODO: This should not be here!
        enqueue_crawlers_urls(amqp, Apollo::Crawler::BaseCrawler.subclasses, opts)
end
init_options() click to toggle source
# File lib/apollo_crawler/program/platform_program.rb, line 69
def init_options()
        self.optparser = OptionParser.new do | opts |
                opts.banner = "Usage: apollo-platform [OPTIONS]"

                opts.separator ""
        opts.separator "Specific options:"

                # This displays the help screen, all programs are
                # assumed to have this option.
                opts.on('-h', '--help', 'Display this screen') do
                        self.options[:show_help] = true
                end

                opts.on('-e', '--environment [NAME]', "Environment used, default '#{options[:env]}'") do |name|
                        self.options[:env] = name
                end

                opts.on('-d', '--daemon', 'Run Apollo Platform daemon') do
                        self.options[:daemon] = true
                end

                opts.on('-v', '--verbose', 'Enable verbose output') do
                        self.options[:verbose] = true
                end

                opts.on('-V', '--version', 'Show version info') do
                        self.options[:version] = true
                end
        end
end
init_program(args) click to toggle source
Calls superclass method Apollo::BaseProgram#init_program
# File lib/apollo_crawler/program/platform_program.rb, line 155
def init_program(args)
        res = super(args)
        return res unless res.nil?

        init_agents(self.amqp, self.options)
        return nil
end
process_options(args) click to toggle source
# File lib/apollo_crawler/program/platform_program.rb, line 163
def process_options(args)
        if(self.options[:version])
                puts Apollo::VERSION
                return 0
        end

        if(self.options[:show_help])
                puts optparser
                return 0
        end

        # Return nil, it means program can freely continue.
        return nil
end
requeue_fetching_urls(opts={}) click to toggle source
# File lib/apollo_crawler/program/platform_program.rb, line 178
def requeue_fetching_urls(opts={})
        urls = Apollo::Model::QueuedUrl.where(:state => :fetching)
        urls.each do |url|
                puts "Requeing '#{url.inspect}'" if opts[:verbose]

                url.state = :queued
                url.save
        end
end
run(args = ARGV) click to toggle source

Run Program

Calls superclass method Apollo::BaseProgram#run
# File lib/apollo_crawler/program/platform_program.rb, line 189
def run(args = ARGV)
        res = super(args)
        return res unless res.nil?

        init_domains()

        requeue_fetching_urls(self.options)

        # Here we start
        # if(ARGV.length < 1)
        #    puts optparser
        #    return 0
        # end

        res_code = 0
        if(self.options[:daemon])
                planner = Apollo::Planner::SmartPlanner.new(self.amqp, self.mongo, self.options)
                res_code = planner.run(self.options)
        end

        return request_exit(res_code)
end