class SpiderBot::CLI

Public Instance Methods

crawl() click to toggle source
# File lib/spider_bot/cli.rb, line 46
def crawl
  $expire_num = options[:expire].to_i if options[:expire]
  require File.join(File.expand_path('../..',__FILE__), "spider_bot/load")

  if options[:bot]
    bot_file = File.expand_path(options[:bot]) 
    return raise "Bot file not found" if !File.exists?(bot_file)
    load bot_file 
  end

  if options[:dir]
    bot_dir = File.expand_path(options[:dir]) 
    return raise "Dir is not found" if !Dir.exists?(bot_dir)
    
    threads = []
    Dir.glob("#{bot_dir}/*_bot.rb").each do |file|
      threads << Thread.new do
        begin
          SpiderBot.logger.info "loading bot file with #{file}."
          load file
        rescue Exception => e
          SpiderBot.logger.error "has errors with loading bot file #{ file }"
          SpiderBot.logger.error e.to_s
        end
      end
    end
    threads.each { |t| t.join }
  end
end
start() click to toggle source
# File lib/spider_bot/cli.rb, line 99
def start
  puts "start....."
  
  $expire_num = options[:expire].to_i if options[:expire]
  
  if options[:env]
    ENV['RACK_ENV'] = options[:env]
  else
    ENV['RACK_ENV']= 'development'
  end
  
  require File.join(File.expand_path('../..',__FILE__), "spider_bot/load")

  FileUtils.mkdir_p("tmp/pids") if !File.exists?("tmp/pids")
  
  daemon_options = {
    app_name: 'spider',
    ontop: true,
    dir: 'tmp/pids',
  }

  sleep_time = 10
  
  if options[:daemon]
    daemon_options[:ontop] = false 
  else
    puts "press ctrl-c exit"
  end

  stop if File.exists?("tmp/spider.pid")

  if option_time = options[:time]
    parse_time = option_time.match(/[d|h|m]/)
    sleep_time = if parse_time
      case parse_time[0]
      when "d"
        option_time.to_i * 60 * 60 * 24
      when "h"
        option_time.to_i * 60 * 60
      when "m"
        option_time.to_i * 60
      end
    else
      option_time.to_i
    end
  end

  Daemons.daemonize(daemon_options)
  
  loop do
    threads = []
    
    BOTDIR.each do |file|
      threads << Thread.new do
        begin
          SpiderBot.logger.info "loading bot file with #{file}."
          load file
        rescue Exception => e
          SpiderBot.logger.error "has errors with loading bot file #{ file }"
          SpiderBot.logger.error e.to_s
        end
        sleep(10)
      end
    end

    threads.each { |t| t.join }
    
    if options[:random]
      random_time = Random.new.rand(sleep_time)
      sleep(random_time.to_i)
    else
      sleep(sleep_time.to_i)
    end
  end
end
stop() click to toggle source
# File lib/spider_bot/cli.rb, line 177
def stop
  pid = File.read("tmp/pids/spider.pid").to_i
  Process.kill(9, pid)
  File.delete("tmp/pids/spider.pid")
end
url(arg) click to toggle source
# File lib/spider_bot/cli.rb, line 26
def url(arg)
  data = Crawl.new(arg, options).crawl_data
  return File.open(options[:out], "w"){ file.puts data } if options[:out]
  return puts data 
end