module SharedCount::Cli

Constants

JOIN_TIMEOUT
LINES_PER_ITERATION
MAX_CONCURRENCY
MAX_RETRIES
SLEEP_TIME
VERSION

Attributes

concurrency[W]
iteration_size[W]

Public Class Methods

concurrency() click to toggle source
# File lib/shared_count/cli.rb, line 119
def concurrency
  @concurrency ||= MAX_CONCURRENCY
end
configure() { |self| ... } click to toggle source
# File lib/shared_count/cli.rb, line 115
def configure
  yield self
end
iteration_size() click to toggle source
# File lib/shared_count/cli.rb, line 123
def iteration_size
  @iteration_size ||= LINES_PER_ITERATION
end
run(lines) click to toggle source
# File lib/shared_count/cli.rb, line 23
def run(lines)
  configure_shared_count_client
  logger.info "Using #{concurrency} threads"
  logger.info "The iteration size is #{iteration_size} URLs"

  iterations, mod =  lines.length.divmod(iteration_size)
  iterations += 1 if mod > 0
  results = Queue.new

  iterations.times do |iteration|
    logger.error "Iteration ##{iteration + 1}"
    queue = Queue.new
    from = iteration_size * iteration
    lines[from, iteration_size].each { |url| queue.push(url) }
    thread_count = [MAX_CONCURRENCY, lines.length].min

    threads = (0...thread_count).map do |thread|
      Thread.new(thread) do |thread|
        error = 0

        url = begin
                queue.pop(true)
              rescue ThreadError; end

        while url do
          url.chomp!
          uri = URI(url)
          host = uri.host || url[/\Ahttps?:\/\/([^\/]+)/, 1]
          url = "#{uri.scheme}://#{host}"

          response = nil
          begin
            response = SharedCountApi::Client.new(url).response
          rescue SharedCountApi::Error
            logger.error "[Thread ##{thread}] - error while processing '#{url}'"
          rescue => err
            logger.error "[Thread ##{thread}] - error while processing '#{url}', retry: ##{error} - #{err.inspect}"
            error += 1
            sleep(SLEEP_TIME)
            if error <= MAX_RETRIES
              retry
            else
              queue.push(url)
              break
            end
          else
            error = 0
          end

          if response
            logger.debug "[Thread ##{thread}] - #{url}"

            facebook_metrics = response.delete("Facebook")
            facebook_metrics = {} unless facebook_metrics.is_a?(Hash)
            values = response.values.unshift(url)
            results.push(values.concat(facebook_metrics.values))
          else
            logger.warn "[Thread ##{thread}] - no response for '#{url}'"
          end

          url = begin
                  queue.pop(true)
                rescue ThreadError; end
        end
      end
    end

    threads.each do |thread|
      begin
        thread.join(JOIN_TIMEOUT)
      rescue => err
        logger.error "[Thread ##{thread}] - error while joining main thread: #{err.inspect}"
        logger.error "[Thread ##{thread}] - #{err.backtrace.join("\n")}"
      end
    end
  end


  CSV.generate do |csv|
    csv << %w(URL StumbleUpon Reddit Delicious GooglePlusOne Buzz Twitter Diggs Pinterest LinkedIn commentsbox_count click_count total_count comment_count like_count share_count)
    csv << []
    loop do
      begin
        arr = results.pop(true)
        csv << arr
      rescue ThreadError
        break
      end
    end
  end
end

Private Class Methods

configure_shared_count_client() click to toggle source
# File lib/shared_count/cli.rb, line 135
def configure_shared_count_client
  SharedCountApi.configure do |config|
    config.apikey = ENV["SHARED_COUNT_APIKEY"]
  end
end
logger() click to toggle source
# File lib/shared_count/cli.rb, line 129
def logger
  @logger ||= Logger.new("shared_count-cli.log").tap do |logger|
    logger.level = ENV["DEBUG"] ? Logger::DEBUG : Logger::ERROR
  end
end