class Splog::LogParser

Attributes

client[R]

Define the accessors to mongo, all db writes happen to the configured @coll

coll[R]

Define the accessors to mongo, all db writes happen to the configured @coll

config[RW]
options[RW]
pattern_name[RW]

Public Class Methods

new() click to toggle source
# File lib/splog.rb, line 23
def initialize
  # Yaml config options
  @config = {}

  # Progress bar to create if in verbose mode
  @progress_bar = nil
  @line_count = nil

  # Command line options
  @options = {
    :append => true
  }

  # Defines how each line is split apart with the array of regex
  @pattern_name = nil
  @pattern = nil

  # Defines how each regex group is mapped to a data type
  @mapping_name = nil
  @mapping = nil

  # Define the mongo client, nil by default until first persist to log entry
  @client = nil

end

Public Instance Methods

cli(args=nil) click to toggle source
# File lib/splog.rb, line 426
def cli(args=nil)
  options = {
    :append => true,
    :output => 'stdout',
    :md5 => true  # By defualt md5 the hash as the unique identifier
  }
  opts = OptionParser.new do |parser|
    parser.banner = 'Usage: splog [options]'

    parser.separator ''
    parser.separator 'Parse logs in arbitrary formats defined in ~/.splog.yml:'

    parser.on('-p', '--pattern STR', 'Mapping name defined in ~/.splog.yml') do |setting|
      options[:pattern_name] = setting
    end

    parser.on('-f', '--file PATH', 'File to parse') do |setting|
      options[:file_name] = setting ? File.expand_path(setting) : setting
    end

    parser.on('-c', '--config PATH', 'Optional dot file path.  Defaults to ~/.splog.yml') do |setting|
      options[:dot_file_name] = setting ? File.expand_path(setting) : setting
    end

    parser.on('-o', '--output [stdout|filename]', 'Defaults to stdout, if specifying just -o then defaults to no standard output.') do |setting|
      options[:output] = setting ? setting : nil
    end

    parser.on('--no-append', "When a line doesn't match the regex, don't append it to the previously matched line. The default is to append.") do |setting|
      options[:append] = setting.nil?
    end

    parser.on('-k', '--key STR', 'The unique business key to use as the database id.  If none specified an automatic id will be generated.') do |setting|
      options[:key] = setting
    end

    parser.on('-d', '--database STR', 'Specify a database reference defined in ~/.splog.yml to write to') do |ext|
      options[:db_ref_name] = ext || nil
    end

    parser.on('--db STR', 'Override the Mongo database defined in ~/.splog.yml') do |ext|
      options[:mongo_db] = ext || nil
    end

    parser.on('--coll STR', 'Override the Mongo collection defined in ~/.splog.yml') do |ext|
      options[:mongo_coll] = ext || nil
    end

    parser.on('--line-count N', Integer, 'If reading from STDIN (using pipes) Setting the line count will allow splog to better log th progress in verbose mode.  Ex. --line-count `wc -l some.log`') do |ext|
      options[:line_count] = ext || nil
    end

    parser.on('--[no-]md5', 'When saving to mongo md5 the hash and set that to the _id.  This means repeated parses of the same log file should be idempotent.  Otherwise there will be duplicated lines in the database.') do |ext|
      options[:md5] = ext  # if -m then == true
    end

    parser.on('-v', 'Verbose logging, recommended in conjunction with -o without any arguments.') do |ext|
      options[:verbose] = ext  # if -m then == true
    end

    parser.on_tail('-h', '--help', '--usage', 'Show this usage message and quit.') do |setting|
      puts parser.help
      exit
    end


    #parser.on_tail("-v", "--version", "Show version information about this program and quit.") do
    #  puts "Splog v1.0.0"
    #  exit
    #end
  end

  begin
    if args and not args.length == 0
      opts.parse!(args)
    else
      ARGV << '-h' if ARGV.size == 0
      opts.parse!(ARGV)
    end
  rescue OptionParser::ParseError
    $stderr.print "Error: #{$!}\n"
    exit
  end

  if (options[:file_name] and options[:pattern_name]) or not $stdin.tty?
    @options = options

    # At this point the options are loaded so load the dot file before continuing so the config can be properly
    # Loaded from the dot file and further options determined
    load_dot_file

    set_pattern(options)
    set_pattern_match_forward
    set_mapping(options)

    # Total line count, if file input we can easily do wc -l on the file.  If $stdin we can allow allow a user defined
    # input from --line-count `wc -l <filename>`

    # Get the enum from the file
    e = nil
    if options[:file_name] and options[:pattern_name]
      e = read_log_file(options[:file_name])
      @line_count = %x{wc -l #{options[:file_name]}}.split.first.to_i

      # Set the progress bar total
      #update_progress_bar_total(line_count)

    # Or stdin otherwise
    elsif not $stdin.tty?
      e = $stdin.to_enum
      @line_count = options[:line_count]
    else
      $stderr.print 'Please either specify a -f FILENAME or pipe content to splog.'
      exit
    end

    # outputting to stdout simply prints 1 parsed line per line
    if options[:output] == 'stdout'
      # Parse each line of the file through the log parser
      parse(e).each do |parsed_line|
        if options[:db_ref_name]
          persist_log_entry(parsed_line)
        end

        # Then write to stdout
        $stdout.write parsed_line.to_s
        $stdout.write "\n"
      end

    # outputting to json will construct a valid json array so you can do something like splog ... | prettyjson
    elsif options[:output] == 'json'
      # Parse each line of the file through the log parser
      $stdout.write '['
      pe = parse(e)
      begin
        while true
          parsed_line = pe.next

          if options[:db_ref_name]
            persist_log_entry(parsed_line)
          end

          # Then write to stdout
          $stdout.write parsed_line.to_json
          $stdout.write ',' unless pe.peek.nil?
        end
      rescue => detail
        nil
      end
      # If a \n is not written a % shows on the console output thus breaking the json array
      $stdout.write "]\n"

    # outputting nothing if -o given with no value.  Useful for perf testing mainly
    elsif options[:output] == nil
      pe = parse(e)
      begin
        while true
          parsed_line = pe.next
          if options[:db_ref_name]
            persist_log_entry(parsed_line)
          end

          update_progress_bar
        end
      rescue => detail
        nil
      end
    # Otherwise return the enumerator back up to be iterated over either in testing or in a program requiring this code
    else
      return read_log_file(options[:file_name])
    end
  else
    $stderr.print "Please either specify a -f FILENAME or pipe in content\n"
  end
end
createsig(body) click to toggle source

stackoverflow.com/questions/6461812/creating-an-md5-hash-of-a-number-string-array-or-hash-in-ruby

# File lib/splog.rb, line 50
def createsig(body)
  Digest::MD5.hexdigest( sigflat body )
end
load_dot_file() click to toggle source
# File lib/splog.rb, line 112
def load_dot_file
  # yml config
  dot_file = @options[:dot_file_name] || '~/.splog.yml'
  #puts "Loading dot_file from #{dot_file}"
  begin
    prop_list = YAML.load_file(File.expand_path(dot_file))
    prop_list.each do |key, value|
      @config[key] = value
    end
  rescue => detail
    $stderr.puts "Unable to find or read #{dot_file}\n"
    $stderr.puts $!
    exit
  end
end
parse(enum_ref) click to toggle source

Takes an enum and iterates over it with logic to parse the log lines based on the configuration

# File lib/splog.rb, line 269
def parse(enum_ref)
  e = Enumerator.new do |y|
    # Defines the current parsed line.  Next linese can be added to this one potentially based on a key
    current_working_line = nil
    parsed_line = nil
    begin
      while enum_ref

        line = enum_ref.next
        parsed_line = parse_line(line)

        next_line = enum_ref.peek
        # Pass in the 'match_forward_regex' if it exists so the next line can be evaluated in this context
        #parsed_next_line = @pattern_match_forward.nil? ? parse_line(next_line) : parse_line(next_line, {:regex => @pattern_match_forward})
        #parsed_next_line_test = @pattern_match_forward.nil? ? parse_line(next_line) : parse_line(next_line, {:regex => @pattern_match_forward})

        # Performance optimization here, don't do a full #match only =~ since not all next lines need to be parsed period
        #parsed_next_line_test = @pattern_match_forward.nil? ? next_line =~ @pattern : next_line =~ @pattern_match_forward
        #egrep = "echo \"#{next_line}\" | egrep \"#{@pattern_egrep}\""
        #egrep_fwd = 'echo ' + next_line + ' | egrep ' + @pattern_match_forward_egrep
        #p egrep
        #p egrep_fwd
        #parsed_next_line_test = @pattern_match_forward.nil? ? `#{egrep}` : `#{egrep_fwd}`

        o, e, s = nil
        begin
          o, e, s = Open3.capture3(@pattern_match_forward.nil? ? @pattern_egrep : @pattern_match_forward_egrep, :stdin_data=>next_line)
        rescue Errno::EPIPE
          #puts "Connection broke!"
          nil
        end

        ############################################################################################################
        # If the next line matches the match_forward_regex
        ############################################################################################################
        #if parsed_next_line and @config[@pattern_name]['match_forward_regex']
        #if not parsed_next_line_test.nil? and @config[@pattern_name]['match_forward_regex']
        if s && s.success? and @config[@pattern_name]['match_forward_regex']

          # Do the actual match now that we know it matches
          parsed_next_line = @pattern_match_forward.nil? ? parse_line(next_line) : parse_line(next_line, {:regex => @pattern_match_forward})

          # If the current_working_line does not yet exist, set it to the latest parsed line
          if current_working_line.nil? and parsed_line
            current_working_line = parsed_line
          end

          # Add to the match_forward_keyname_source from the match_forward_keyname_dest
          current_working_line[@config[@pattern_name]['match_forward_keyname_source']] << parsed_next_line[@config[@pattern_name]['match_forward_keyname_source']]

          # fast forward the enum one click to account for the peek
          enum_ref.next

          # Read until StopIteration or the match_forward_regex no longer matches
          while true
            # Only peek here to not advance the enum unnecessarily
            sub_line = enum_ref.peek
            #parsed_sub_line = @config[@pattern_name]['match_forward_regex'].nil? ? nil : parse_line(sub_line, {:regex => @pattern_match_forward})
            parsed_sub_line = @pattern_match_forward.nil? ? nil : parse_line(sub_line, {:regex => @pattern_match_forward})
            if parsed_sub_line
              # if matched advance the enum and add the data to the current working line
              enum_ref.next
              current_working_line[@config[@pattern_name]['match_forward_keyname_source']] << parsed_sub_line[@config[@pattern_name]['match_forward_keyname_source']]
            else
              # Otherwise we've reached the end of the matched pattern yield this match out
              y << current_working_line

              # Since that is yielded, set the current_working_line to nil so it has a fresh start for the next iter
              current_working_line = nil
              break
            end
          end
        ############################################################################################################
        # Otherwise if the next line is nil but the parsed line matched and we are appending
        ############################################################################################################
        elsif parsed_line and parsed_next_line.nil? and @options[:append]
          # If the current_working_line does not yet exist, set it to the latest parsed line
          if current_working_line.nil? and parsed_line
            current_working_line = parsed_line
          end

          # Read until StopIteration or a new parsed line is found
          while true
            # Only peek here to not advance the enum unnecessarily
            sub_line = enum_ref.peek

            # TODO this can be optimized too since I'm attmpting to not match it!  I don't even read the parsed_sub_line
            #parsed_sub_line = parse_line(sub_line)
            o, e, s = nil
            begin
              o, e, s = Open3.capture3(@pattern_egrep, :stdin_data=>sub_line)
            rescue Errno::EPIPE
              #puts "Connection broke!"
              nil
            end

            #if parsed_sub_line.nil? and @config[@pattern_name]['unmatched_append_key_name']
            if (s.nil? or not s.success?) && @config[@pattern_name]['unmatched_append_key_name']
              # if unmatched advance the enum and add the data to the current working line
              enum_ref.next
              current_working_line[@config[@pattern_name]['unmatched_append_key_name']] << sub_line
            else
              # Otherwise we've reached the end of the matched pattern yield this match out
              y << current_working_line

              # Since that is yielded, set the current_working_line to nil so it has a fresh start for the next iter
              current_working_line = nil
              break
            end
          end
        ############################################################################################################
        # Otherwise just your average joe matched line
        ############################################################################################################
        elsif parsed_line
          y << parsed_line
        end
      end
    rescue StopIteration => e
      #if both current_working_line and parsed line yield them both as this situation can happen when peeking forward
      # After an unmatched line
      if current_working_line and parsed_line and current_working_line != parsed_line
        y << current_working_line
        y << parsed_line
      # Yield point for a successfully parsed line
      elsif current_working_line
        y << current_working_line
      else
        y << parsed_line
      end
    end
  end
end
parse_datetime(the_input, the_format=nil) click to toggle source

Attempt to parse a datetime or return None

# File lib/splog.rb, line 203
def parse_datetime(the_input, the_format=nil)
  output = the_input
  begin
    output = the_format ? DateTime.strptime(the_input, the_format) : DateTime.parse(the_input)
    # Convert the time to utc for mongo
    output = output.nil? ? nil : output.to_time.utc
  rescue => detail
    nil
  end
  output
end
parse_float(the_input) click to toggle source

Attempt to parse a float or return 0

# File lib/splog.rb, line 192
def parse_float(the_input)
  output = 0
  begin
    output = the_input.to_f
  rescue => detail
    nil
  end
  output
end
parse_int(the_input) click to toggle source

Attempt to parse an int or return 0

# File lib/splog.rb, line 181
def parse_int(the_input)
  output = 0
  begin
    output = the_input.to_i
  rescue => detail
    nil
  end
  output
end
parse_line(line, opts={}) click to toggle source
# File lib/splog.rb, line 215
def parse_line(line, opts={})
  res = {}
  #parts = opts[:parts] || @config[@pattern_name]['regex']
  begin
    #pattern = @config[@pattern_name].has_key?('delim') ? "\\s*#{parts.join(@config[@pattern_name]['delim'])}\\s*" : "\\s*#{parts.join()}\\s*"  # was working line
    #r = Regexp.new(pattern, Regexp::MULTILINE)
    #m = r.match(line)

    if opts[:regex]
      m = opts[:regex].match(line)
    else
      m = @pattern.match(line)
    end
    res = {}
    if m
      m.names.each do |group_name|
        k = group_name
        v = m[k]
        # print("k: {}, v: {}".format(k, v))
        if @mapping and @mapping.has_key?(k)
          # print("self.mapping[k]: %s" % self.mapping[k])
          if ['Int', 'Integer'].include? @mapping[k]['data_type']
            res[k] = parse_int(m[k])
          elsif ['Float'].include? @mapping[k]['data_type']
            res[k] = parse_float(m[k])
          elsif ['DateTime'].include? @mapping[k]['data_type']
            res[k] = parse_datetime(m[k], @mapping[k]['format'])
          end
        else
          res[k] = v
        end
      end
    end
  rescue => detail
    $stderr.puts $!
    detail.backtrace.each { |e| $stderr.puts e}
  end

  # If a key exists add the key to the parsed_line, This can help differentiate the log if not putting each
  # Log into a unique collection, or even then helps differentiate the logs within a collection.  Ex. if you had
  # access_log and error_log in the same collection you may want a specific key for each of those
  if @options[:key] && res && res.length != 0
    res['key'] = @options[:key]
  end

  if @options[:md5] && res && res.length != 0
    res['_id'] = createsig(res)
  end

  # Return nil if the hash hasn't been populated
  res.length == 0 ? nil : res
end
persist_log_entry(parsed_line) click to toggle source
# File lib/splog.rb, line 76
def persist_log_entry(parsed_line)
  begin
    if @client.nil? and @options[:db_ref_name]
      db_ref_name = @options[:db_ref_name]
      host = @config['db_refs'][db_ref_name]['host'] || '127.0.0.1'
      port = @config['db_refs'][db_ref_name]['port'] || 27107
      user = @config['db_refs'][db_ref_name]['user'] || nil
      pass = @config['db_refs'][db_ref_name]['pass'] || nil
      db = @options[:mongo_db] || @config['db_refs'][db_ref_name]['db']
      coll = @options[:mongo_coll] || @config['db_refs'][db_ref_name]['collection']

      @client = MongoClient.new(host, port, :pool_size => 1)
      db = @client.db(db)
      auth = nil
      if user and user != '' && pass
        auth = db.authenticate(user, pass)
        #p "Authentication to mongo returned: #{auth}"
      end
      @coll = db[coll]
    end

    # Assuming the above is successfull write to the collection, otherwise silently do nothing
    if @client and @coll
      # If an _id exists upsert the doc
      if parsed_line.has_key?('_id')
        @coll.update({:_id => parsed_line['_id']}, parsed_line, opts = {:upsert => true})
      # Otherwise insert the parsed_line which will cause a Mongo specific _id to be generated
      else
        @coll.insert(parsed_line)
      end
    end
  rescue => detail
    $stderr.puts $!
  end
end
read_input(the_input) click to toggle source
# File lib/splog.rb, line 402
def read_input(the_input)
  # Split the input by lines, chomp them, and return an enum
  #the_input.lines.map(&:chomp).to_enum
  the_input.lines.to_enum
end
read_log_file(file_name) click to toggle source
# File lib/splog.rb, line 408
def read_log_file(file_name)
  File.open(file_name).to_enum
end
set_mapping(options) click to toggle source
# File lib/splog.rb, line 168
def set_mapping(options)
  begin
    tmp = {}
    @config[options[:pattern_name]]['mapping'].each { |x| tmp[x['name']] = x } unless @config[options[:pattern_name]]['mapping'].nil?
    @mapping = tmp
  rescue => detail
    puts 'Unable to read the mapping in your .splog.yaml configuration.  Please reference https://github.com/engineersamuel/splog for proper formatting.'
    $stderr.puts $!
    exit
  end
end
set_pattern(options) click to toggle source
# File lib/splog.rb, line 128
def set_pattern(options)
  @pattern_name = options[:pattern_name]
  begin
    # MULTILINE to match the \n chars
    #Regexp::MULTILINE | Regexp::IGNORECASE
    @pattern = @config[options[:pattern_name]]['regex']
    delim =  @config[@pattern_name].nil? ? "\\s+" : @config[@pattern_name]['delim']
    c = "#{@pattern.join(delim)}"
    # Remove the grouped named
    @pattern_egrep = 'egrep "' + c.gsub(/\?<.*?>/, '') + '"'
    r = Regexp.new(c, Regexp::MULTILINE)
    @pattern = r
  rescue => detail
    #detail.backtrace.each { |e| $stderr.puts e}
    #$stderr.puts $!
    puts "No pattern matching '#{options[:pattern_name]}' found.  Please choose another name or define this pattern in the your .splog.yaml"
    exit
  end
end
set_pattern_match_forward() click to toggle source
# File lib/splog.rb, line 148
def set_pattern_match_forward
  begin
    @pattern_match_forward = @config[options[:pattern_name]]['match_forward_regex']
    # since this is optional only compile if set
    if @pattern_match_forward
      delim =  @config[@pattern_name].nil? ? "\\s+" : @config[@pattern_name]['delim']
      # Remove the grouped named
      c = "#{@pattern_match_forward.join(delim)}"
      r = Regexp.new(c, Regexp::MULTILINE)
      @pattern_match_forward_egrep = 'egrep "' + c.gsub(/\?<.*?>/, '') + '"'
      @pattern_match_forward = r
    end
  rescue => detail
    #detail.backtrace.each { |e| $stderr.puts e}
    $stderr.puts $!
    #puts "No pattern matching '#{options[:pattern_name]}' found.  Please choose another name or define this pattern in the your .splog.yaml"
    exit
  end
end
sigflat(body) click to toggle source
# File lib/splog.rb, line 54
def sigflat(body)
  if body.class == Hash
    arr = []
    body.each do |key, value|
      arr << "#{sigflat key}=>#{sigflat value}"
    end
    body = arr
  end
  if body.class == Array
    str = ''
    body.map! do |value|
      sigflat value
    end.sort!.each do |value|
      str << value
    end
  end
  if body.class != String
    body = body.to_s << body.class.to_s
  end
  body
end
update_progress_bar() click to toggle source
# File lib/splog.rb, line 412
def update_progress_bar
  if options[:verbose] and not @progress_bar
    if @line_count.nil?
      @progress_bar = ProgressBar.create(:starting_at => 0, :total => @line_count)
    else
      @progress_bar = ProgressBar.create(:title => 'Lines Read', :format => '[%a] %c Completed |%b>>%i| %p%% %t [%e]', :total => @line_count)
    end
  end

  if @options[:verbose] and @progress_bar
    @progress_bar.increment
  end
end