class SpiderBot::CLI
Public Instance Methods
crawl()
click to toggle source
# File lib/spider_bot/cli.rb, line 46 def crawl $expire_num = options[:expire].to_i if options[:expire] require File.join(File.expand_path('../..',__FILE__), "spider_bot/load") if options[:bot] bot_file = File.expand_path(options[:bot]) return raise "Bot file not found" if !File.exists?(bot_file) load bot_file end if options[:dir] bot_dir = File.expand_path(options[:dir]) return raise "Dir is not found" if !Dir.exists?(bot_dir) threads = [] Dir.glob("#{bot_dir}/*_bot.rb").each do |file| threads << Thread.new do begin SpiderBot.logger.info "loading bot file with #{file}." load file rescue Exception => e SpiderBot.logger.error "has errors with loading bot file #{ file }" SpiderBot.logger.error e.to_s end end end threads.each { |t| t.join } end end
start()
click to toggle source
# File lib/spider_bot/cli.rb, line 99 def start puts "start....." $expire_num = options[:expire].to_i if options[:expire] if options[:env] ENV['RACK_ENV'] = options[:env] else ENV['RACK_ENV']= 'development' end require File.join(File.expand_path('../..',__FILE__), "spider_bot/load") FileUtils.mkdir_p("tmp/pids") if !File.exists?("tmp/pids") daemon_options = { app_name: 'spider', ontop: true, dir: 'tmp/pids', } sleep_time = 10 if options[:daemon] daemon_options[:ontop] = false else puts "press ctrl-c exit" end stop if File.exists?("tmp/spider.pid") if option_time = options[:time] parse_time = option_time.match(/[d|h|m]/) sleep_time = if parse_time case parse_time[0] when "d" option_time.to_i * 60 * 60 * 24 when "h" option_time.to_i * 60 * 60 when "m" option_time.to_i * 60 end else option_time.to_i end end Daemons.daemonize(daemon_options) loop do threads = [] BOTDIR.each do |file| threads << Thread.new do begin SpiderBot.logger.info "loading bot file with #{file}." load file rescue Exception => e SpiderBot.logger.error "has errors with loading bot file #{ file }" SpiderBot.logger.error e.to_s end sleep(10) end end threads.each { |t| t.join } if options[:random] random_time = Random.new.rand(sleep_time) sleep(random_time.to_i) else sleep(sleep_time.to_i) end end end
stop()
click to toggle source
# File lib/spider_bot/cli.rb, line 177 def stop pid = File.read("tmp/pids/spider.pid").to_i Process.kill(9, pid) File.delete("tmp/pids/spider.pid") end
url(arg)
click to toggle source
# File lib/spider_bot/cli.rb, line 26 def url(arg) data = Crawl.new(arg, options).crawl_data return File.open(options[:out], "w"){ file.puts data } if options[:out] return puts data end