class ServerLogParser::Parser

Attributes

names[R]

The list of field names that extracted from log format.

regexp[R]

Regexp instance used for parsing a log line.

Public Class Methods

new(format) click to toggle source

Initializes a new parser instance with given log format.

# File lib/server_log_parser/parser.rb, line 13
def initialize(format)
  @regexp = nil
  @names  = []
  @format = parse_format(format)
end

Public Instance Methods

handle(line) click to toggle source

Parses line according to current log format and returns an hash of log field => typed value on success. Returns nil if line doesn’t match current log format.

# File lib/server_log_parser/parser.rb, line 47
def handle(line)
  parsed = parse(line)
  return unless parsed

  handle_parsed(parsed)
end
handle!(line) click to toggle source

Same as ServerLogParser#handle but raises a ParseError if line doesn’t match current format.

Raises

ParseError

if line doesn’t match current format

# File lib/server_log_parser/parser.rb, line 72
def handle!(line)
  parsed = parse!(line)

  handle_parsed(parsed)
end
parse(line) click to toggle source

Parses line according to current log format and returns an hash of log field => value on success. Returns nil if line doesn’t match current log format.

# File lib/server_log_parser/parser.rb, line 22
def parse(line)
  row = line.to_s
  row.chomp!
  row.strip!
  return unless match = regexp.match(row)

  data = {}
  names.each_with_index { |field, index| data[field] = match[index + 1] } # [0] == line
  data
end
parse!(line) click to toggle source

Same as ServerLogParser#parse but raises a ParseError if line doesn’t match current format.

Raises

ParseError

if line doesn’t match current format

# File lib/server_log_parser/parser.rb, line 40
def parse!(line)
  parse(line) || raise(ParseError, "Invalid format `%s` for line `%s`" % [@format, line])
end

Protected Instance Methods

handle_parsed(parsed) click to toggle source
# File lib/server_log_parser/parser.rb, line 120
def handle_parsed(parsed)
  data = {}

  parsed.each_pair do |field, value|
    data[field] = if value == '-'
      nil
    else
      case field
      when '%B', '%b', '%k', '%p', /%{\S+}p/, '%P', /%{\S+}P/, '%s', '%>s', '%I', '%O'
        Integer(value)
      when '%D', '%T'
        Float(value)
      when '%t'
        DateTime.strptime(value, '[%d/%b/%Y:%H:%M:%S %Z]')
      when '%r'
        { 'method'   => value[/^(\w*)/, 1],
          'resource' => value[/(\/\S*) /, 1],
          'protocol' => value[/.* (.*)$/, 1] }    
      else
        value
      end
    end
  end

  data
end
parse_format(format) click to toggle source

Parse log format into a suitable Regexp instance.

# File lib/server_log_parser/parser.rb, line 88
def parse_format(format)
  format = format.to_s
  format.chomp!                # remove carriage return
  format.strip!                # remove leading and trailing space
  format.gsub!(/[ \t]+/, ' ')  # replace tabulations or spaces with a space

  pattern = format.split(' ').map do |element|
    has_quotes = element =~ /^\\"/
    element = element.gsub(/^\\"/, '').gsub(/\\"$/, '') if has_quotes

    self.names << rename_this_name(element)

    case
      when has_quotes
        if element == '%r' || element =~ /^%{Referer}/ || element =~ /^%{User-agent}/
          /"([^"\\]*(?:\\.[^"\\]*)*)"/
        else
          '\"([^\"]*)\"'
        end
      when element =~ /^%.*t$/
          '(\[[^\]]+\])'
      when element == '%U'
          '(.+?)'
      else
          '(\S*)'
    end
  end.join(' ')

  @regexp = Regexp.new("^#{pattern}$")
  format
end
rename_this_name(name) click to toggle source

Overwrite this method if you want to use some human-readable name for log fields. This method is called only once at parse_format time.

# File lib/server_log_parser/parser.rb, line 83
def rename_this_name(name)
  name
end