module Croque::Aggregator

Public Class Methods

aggregate(date) click to toggle source
# File lib/croque/aggregator.rb, line 4
def aggregate(date)
  # remove files
  remove_files(date)
  # aggregate per hour
  aggregate_per_hour(date)
  # generate_ranking
  generate_ranking(date)
end
aggregate_per_hour(date) click to toggle source
# File lib/croque/aggregator.rb, line 13
def aggregate_per_hour(date)
  # scan each file
  log("aggregate logs per hour on #{date} start")
  log_files.each do |file|
    log("check skippable of #{file}")
    # check skippable
    next if skippable?(date, file)
    log("aggregate logs of #{file}")
    # all lines
    linage = 1000
    wc_result = `wc -l #{file}`
    line_count = wc_result.match(/\d+/)[0].to_i
    k = 1
    lines = []
    while (k-1)*linage < line_count
      log("aggregate logs for #{(k-1)*linage}-#{k*linage} in #{line_count} on #{date}")
      fragment = `head -n #{k*1000} #{file} | tail -n #{linage}`
      fragment_lines = fragment.lines
      lines += fragment_lines.select do |line|
        line.match(date_matcher(date))
      end
      k += 1
    end
    # extract the matched line (Date)
    lines = lines
    hours.each do |hour|
      # craete csv file
      log("create csv for #{date} #{hour} hour")
      create_csv(date, hour, lines)
    end
  end
  log("aggregate logs per hour on #{date} end")
end
all() click to toggle source
# File lib/croque/aggregator.rb, line 77
def all
  paths = Dir.glob(store_path + '*')
  paths = paths.select do |path|
    path.match(/\d{4}\-\d{2}\-\d{2}/)
  end
  paths.map do |path|
    Date.parse(File.basename(path))
  end
end
generate_ranking(date) click to toggle source
# File lib/croque/aggregator.rb, line 47
def generate_ranking(date)
  log("generate ranking on #{date} start")
  array = []
  hours.each do |hour|
    log("generate array for ranking in #{date} #{hour} hour")
    # csv data
    path = csv_path(date, hour)
    # next if no file
    next unless File.exist?(path)
    csv_data = File.open(path, "r").read.gsub(/\r/, "")
    csv = CSV.new(csv_data)
    csv.to_a.each do |line|
      uuid = line[0]
      processing_time = line[1].to_f
      next if low?(processing_time)
      array << [date, hour, uuid, processing_time]
    end
  end
  log("sort array for ranking on #{date}")
  # Processing Time Desc
  array = array.sort{ |a, b| b[3] <=> a[3] }
  log("generate ranking csv on #{date}")
  # Generate CSV
  data = CSV.generate("", csv_option) do |csv|
    array.each{ |line| csv << line }
  end
  store_csv(ranking_path(date), data)
  log("generate ranking on #{date} end")
end

Private Class Methods

convert_matcher(matcher:, date: nil, hour: nil, severity: nil) click to toggle source
# File lib/croque/aggregator.rb, line 186
def convert_matcher(matcher:, date: nil, hour: nil, severity: nil)
  # Regexp => String
  matcher = matcher.source
  # date => XXXX-XX-XX
  date = if date
    date.to_s
  else
    "\\d{4}-\\d{2}-\\d{2}"
  end.gsub(/\-/, "\\-")
  # hour = format("%02d", hour)
  hour = if hour
    format("%02d", hour)
  else
    "\\d{2}"
  end
  severity = if severity
    severity
  else
    "#\\d+"
  end
  # replace particular string
  matcher = matcher.gsub(/severity/, severity)
  matcher = matcher.gsub(/hour/, hour)
  matcher = matcher.gsub(/date/, date)
  # String => Regexp
  Regexp.new(matcher)
end
create_csv(date, hour, lines) click to toggle source
# File lib/croque/aggregator.rb, line 244
def create_csv(date, hour, lines)
  # extract the matched line (Hour)
  path = csv_path(date, hour)
  lines_per_hour = lines.select{ |line| line.match(hour_matcher(hour)) }
  # get start line of request
  start_indexes = get_start_indexes(lines_per_hour)
  data = CSV.generate("", csv_option) do |csv|
    start_indexes.each do |start_index|
      values = []
      start_line = lines_per_hour[start_index]
      severity = get_severity(start_line)
      end_index = get_end_index(date, severity, start_index, lines_per_hour)
      if end_index
        # Line ID
        values << SecureRandom.uuid
        # get End Line
        end_line = lines_per_hour[end_index]
        # Processing Time
        values << get_processing_time(end_line)
        # Views Time
        values << get_views_time(end_line)
        # ActiveRecord Time
        values << get_active_record_time(end_line)
        # Full path
        full_path = get_full_path(start_line)
        values << full_path
        # Path Info
        values << get_path_info(full_path)
        # Params
        values << get_params(full_path)
        # Body
        lines_per_severity = get_lines_per_severity(date, start_index, end_index, severity, lines_per_hour)
        values << lines_per_severity.join("\t")
        # values to CSV
        csv << values
      end
    end
  end
  store_csv(path, data)
end
csv_option() click to toggle source
# File lib/croque/aggregator.rb, line 222
def csv_option
  {
    row_sep: "\r\n",
    headers: false,
    write_headers: true,
    force_quotes: true
  }
end
csv_path(date, hour) click to toggle source
# File lib/croque/aggregator.rb, line 218
def csv_path(date, hour)
  store_path.join("#{date}", "#{hour}.csv")
end
date_matcher(date) click to toggle source
# File lib/croque/aggregator.rb, line 153
def date_matcher(date)
  convert_matcher(
    matcher: Croque.config.matcher,
    date: date
  )
end
dir_path() click to toggle source
# File lib/croque/aggregator.rb, line 94
def dir_path
  Croque.config.log_dir_path
end
end_matcher() click to toggle source
# File lib/croque/aggregator.rb, line 178
def end_matcher
  Croque.config.end_matcher
end
except_path_matcher() click to toggle source
# File lib/croque/aggregator.rb, line 182
def except_path_matcher
  Croque.config.except_path_matcher
end
get_active_record_time(line) click to toggle source
# File lib/croque/aggregator.rb, line 338
def get_active_record_time(line)
  match = line.match(/ActiveRecord: ([1-9]\d*|0)(\.\d+)?ms/)
  if match
    match[0].match(/([1-9]\d*|0)(\.\d+)?/)[0].to_f.round(1)
  else
    0
  end
end
get_date_from_line(line) click to toggle source
# File lib/croque/aggregator.rb, line 140
def get_date_from_line(line)
  if line.present?
    match = line.match(/\d{4}\-\d{2}\-\d{2}/)
    if match
      begin
        Date.parse(match[0])
      rescue
        nil
      end
    end
  end
end
get_end_index(date, severity, start_index, lines) click to toggle source
# File lib/croque/aggregator.rb, line 306
def get_end_index(date, severity, start_index, lines)
  # end line = first of matched lines
  lines.map.with_index do |line, index|
    index if start_index < index && line.match(end_matcher) &&
      line.match(severity_matcher(date, severity))
  end.compact.first
end
get_full_path(line) click to toggle source
# File lib/croque/aggregator.rb, line 347
def get_full_path(line)
  line.match(/\".*\"/)[0].gsub(/\"/, '')
end
get_lines_per_severity(date, start_index, end_index, severity, lines) click to toggle source
# File lib/croque/aggregator.rb, line 314
def get_lines_per_severity(date, start_index, end_index, severity, lines)
  lines[start_index..end_index].select do |line|
    line.match(severity_matcher(date, severity))
  end
end
get_params(full_path) click to toggle source
# File lib/croque/aggregator.rb, line 355
def get_params(full_path)
  URI.parse("http://example.com#{full_path}").query
end
get_path_info(full_path) click to toggle source
# File lib/croque/aggregator.rb, line 351
def get_path_info(full_path)
  URI.parse("http://example.com#{full_path}").path
end
get_processing_time(line) click to toggle source
# File lib/croque/aggregator.rb, line 320
def get_processing_time(line)
  match = line.match(/([1-9]\d*|0)(\.\d+)?ms/)
  if match
    match[0].match(/([1-9]\d*|0)(\.\d+)?/)[0].to_f.round(1)
  else
    0
  end
end
get_severity(line) click to toggle source
# File lib/croque/aggregator.rb, line 302
def get_severity(line)
  line.match(/#\d+/)[0]
end
get_start_indexes(lines) click to toggle source
# File lib/croque/aggregator.rb, line 295
def get_start_indexes(lines)
  # map index only matched line
  lines.map.with_index do |line, index|
    index if line.match(start_matcher) && !line.match(except_path_matcher)
  end.compact
end
get_views_time(line) click to toggle source
# File lib/croque/aggregator.rb, line 329
def get_views_time(line)
  match = line.match(/Views: ([1-9]\d*|0)(\.\d+)?ms/)
  if match
    match[0].match(/([1-9]\d*|0)(\.\d+)?/)[0].to_f.round(1)
  else
    0
  end
end
headers() click to toggle source
# File lib/croque/aggregator.rb, line 231
def headers
  [
    "Line ID", # 0
    "Processing Time (ms)", # 1
    "Views Time (ms)", # 2
    "ActiveRecord Time (ms)", # 3
    "Full Path", # 4
    "Path Info", # 5
    "Params", # 6
    "Body" # 7
  ]
end
hour_matcher(hour) click to toggle source
# File lib/croque/aggregator.rb, line 160
def hour_matcher(hour)
  convert_matcher(
    matcher: Croque.config.hour_matcher,
    hour: hour
  )
end
hours() click to toggle source
# File lib/croque/aggregator.rb, line 214
def hours
  (0..23).to_a
end
log(message) click to toggle source
# File lib/croque/aggregator.rb, line 363
def log(message)
  Croque.config.logger.try(:info, message)
end
log_file_matcher() click to toggle source
# File lib/croque/aggregator.rb, line 106
def log_file_matcher
  Croque.config.log_file_matcher
end
log_files() click to toggle source
# File lib/croque/aggregator.rb, line 88
def log_files
  Dir::glob(dir_path + '*').select do |path|
    path.match(log_file_matcher)
  end
end
low?(time) click to toggle source
# File lib/croque/aggregator.rb, line 359
def low?(time)
  time < Croque.config.lower_time
end
ranking_path(date) click to toggle source
# File lib/croque/aggregator.rb, line 102
def ranking_path(date)
  store_path.join("#{date}", "ranking.csv")
end
remove_files(date) click to toggle source
# File lib/croque/aggregator.rb, line 110
def remove_files(date)
  path = store_path.join("#{date}")
  if Dir.exist?(path)
    FileUtils.remove_dir(path)
  end
end
severity_matcher(date, severity) click to toggle source
# File lib/croque/aggregator.rb, line 167
def severity_matcher(date, severity)
  convert_matcher(
    matcher: Croque.config.matcher,
    severity: severity
  )
end
skippable?(date, file) click to toggle source
# File lib/croque/aggregator.rb, line 117
def skippable?(date, file)
  # matcher
  matcher = convert_matcher(matcher: Croque.config.matcher)
  # head
  head_lines = `head -n 100 #{file}`
  # get lines as Array
  head_lines = head_lines.lines
  head_line = head_lines.select do |line|
    line.match(matcher)
  end.first
  head_date = get_date_from_line(head_line)
  # tail
  tail_lines = `tail -n 100 #{file}`
  # get lines as Array
  tail_lines = tail_lines.lines
  tail_line = tail_lines.select do |line|
    line.match(matcher)
  end.last
  tail_date = get_date_from_line(tail_line)
  # include date during range
  return !(head_date && tail_date && (head_date..tail_date).include?(date))
end
start_matcher() click to toggle source
# File lib/croque/aggregator.rb, line 174
def start_matcher
  Croque.config.start_matcher
end
store_csv(path, data) click to toggle source
# File lib/croque/aggregator.rb, line 285
def store_csv(path, data)
  # make dirctroy
  unless Dir.exist?(File.dirname(path))
    FileUtils.mkdir_p(File.dirname(path))
  end
  File.open(path, 'a') do |f|
    f.write data
  end
end
store_path() click to toggle source
# File lib/croque/aggregator.rb, line 98
def store_path
  Croque.config.store_path
end