class Apollo::Agent::CrawlerAgent

Attributes

declarations[RW]

Public Class Methods

new(amqp, opts={}) click to toggle source
# File lib/apollo_crawler/agent/crawler_agent.rb, line 33
def initialize(amqp, opts={})
        if(opts[:verbose])
                puts "Initializing crawler agent..."
        end

        # Declarations
        channel = amqp.create_channel
        self.declarations = Apollo::Agent.declare_entities(channel, opts)# Binding
        
        # Binding
        queue = self.declarations[:queues]["crawler.queue"]
        exchange = self.declarations[:exchanges]["crawler"]

        queue.bind(exchange).subscribe do |delivery_info, metadata, payload|
                msg = JSON.parse(payload)

                request = msg['request']
                response = msg['response']
                url = request["url"]

                puts "CrawlerAgent: Received - '#{url}', metadata #{metadata.inspect}" if opts[:verbose]

                doc = Nokogiri::HTML(response['body'])
                crawler = request['crawler_name'].constantize.new
                data = crawler.extract_data(doc)
                links = crawler.extract_links(doc)

                # puts crawler.to_s
                # puts res.inspect

                if(metadata[:reply_to] != nil)
                        x = self.declarations[:exchanges][metadata[:reply_to]]

                        msg = {
                                :request => request,
                                :response => response,
                                :data => data,
                                :links => links
                        }

                        x.publish(msg.to_json)
                end
        end
end