class Digger::Index

Public Class Methods

batch(entities, cocurrence = 1, &block) click to toggle source
# File lib/digger/index.rb, line 49
def self.batch(entities, cocurrence = 1, &block)
  raise NoBlockError, 'No block given' unless block

  if cocurrence > 1
    results = Array.new(entities.size)
    entities.each_slice(cocurrence).with_index do |group, idx1|
      threads = []
      group.each_with_index do |entity, idx2|
        index = idx1 * cocurrence + idx2
        threads << Thread.new(entity) do |ent|
          results[index] = block.call(ent)
        end
      end
      threads.each(&:join)
    end
    results
  else
    entities.map { |ent| block.call(ent) }
  end
end
slow_down(entities, conf = {}, &block) click to toggle source
# File lib/digger/index.rb, line 20
def self.slow_down(entities, conf = {}, &block)
  raise NoBlockError, 'No block given' unless block

  config = {
    sleep_range_seconds: 4...10, # 随机等待时间范围
    fail_max_cnt: 10, # 最多失败次数
    fail_unit_seconds: 10 * 60, # 失败等待时间
    when_fail: ->(ent, e, failed_cnt) {}
  }.merge(conf)
  failed_cnt = 0
  cursor = 0
  result = []
  while cursor < entities.length
    begin
      result << block.call(entities[cursor])
    rescue StandardError => e
      failed_cnt += 1
      config[:when_fail].call(entities[cursor], e, failed_cnt)
      break if failed_cnt >= config[:fail_max_cnt]

      sleep(failed_cnt * config[:fail_unit_seconds])
    else
      cursor += 1
      sleep(rand(config[:sleep_range_seconds]))
    end
  end
  result
end

Public Instance Methods

pattern_applied_url(arg) click to toggle source
# File lib/digger/index.rb, line 16
def pattern_applied_url(arg)
  pattern.gsub('*').each_with_index { |_, i| arg[i] }
end
process(cocurrence = 1, &block) click to toggle source
# File lib/digger/index.rb, line 5
def process(cocurrence = 1, &block)
  Index.batch(urls, cocurrence, &block)
end
urls() click to toggle source
# File lib/digger/index.rb, line 9
def urls
  @urls ||= begin
    args = self.args.map { |a| a.respond_to?(:each) ? a.to_a : [a] }
    args.shift.product(*args).map { |arg| pattern_applied_url(arg) }
  end
end