class RXRawLineParser

Public Class Methods

new(format_mask) click to toggle source
# File lib/rxraw-lineparser.rb, line 27
def initialize(format_mask)
  @format_mask = format_mask
end

Public Instance Methods

parse(line) click to toggle source
# File lib/rxraw-lineparser.rb, line 31
def parse(line)
  
  if @format_mask.to_s.include? '(?<'  then      

    r = line.match(/#{@format_mask}/)
    field_names, field_values = r.names, r.captures
    
  else

    field_names = @format_mask.to_s.scan(/\[!(\w+)\]/).flatten.map(&:to_sym)        

    # only perform possible patterns to match when a custom
    #                                                  format_mask is detected
    pattern = if @format_mask[0] != '\\' then

      patterns = possible_patterns(@format_mask)        

      if field_names.map{|x| "[!%s]" % x}.join(' ') == @format_mask \
          and field_names.length > 1 then
        insert2space_patterns(field_names.length, patterns)
      end

      pattern = patterns.detect do |x|
        line.match(/#{x.join}/)
      end.join

      if patterns.length > 1 then
        end_part = @format_mask[/[^\]]+$/].to_s
        pattern += end_part      
      else
        pattern
      end
    else

      @format_mask.gsub(/\[!\w+\]/,'(.*)')
    end
        
    field_values = line.match(/#{pattern}/).captures.map(&:strip)

    found_quotes = find_qpattern(pattern)

    if found_quotes then
      found_quotes.each {|i| field_values[i] = field_values[i][1..-2]}
    end        
  end
  
  field_values += [''] * (field_names.length - field_values.length)

  [field_names, field_values]
end

Private Instance Methods

diminishing_permutation(max_fields) click to toggle source
# File lib/rxraw-lineparser.rb, line 166
def diminishing_permutation(max_fields)
  result = max_fields.times.inject([]) do |r,i|
    r + [1,0].repeated_permutation(max_fields-i).to_a
  end
end
find_qpattern(s) click to toggle source
# File lib/rxraw-lineparser.rb, line 172
def find_qpattern(s)
  s.split(/(?=\(\?=)|(?=\(\[")/).map.with_index\
    .select{|x,i| x[/\["'\]\[\^"'\]\+\["'\]/] }.map(&:last)
end
fmask_delimiters(f) click to toggle source
# File lib/rxraw-lineparser.rb, line 177
def fmask_delimiters(f)
  a = f.split(/(?=\[!\w+\])/)[0..-2].map {|x| x[/\](.*)/,1] }.compact
end
insert2space_patterns(field_count, patterns) click to toggle source
# File lib/rxraw-lineparser.rb, line 84
def insert2space_patterns(field_count, patterns)
  
  a = (field_count-2).times.inject([2]) do |r,x|
    v = 2 ** (x + 3)
    diff = field_count - 2
    r << (diff >= x+1 ? v-1 : v)
  end

  unit = ['([^\s]+)\s{2,}']
  
  space2_patterns = a.map.with_index do |x,i|
    [x,(['^'] + Array.new(i+1,unit).concat(['(.*)'])).flatten]
  end

  space2_patterns.each_with_index do |x_line, i|
    x, line = x_line
    patterns = patterns.insert(patterns.length - x-i-2, line) 
  end
end
possible_patterns(format_mask) click to toggle source
# File lib/rxraw-lineparser.rb, line 104
def possible_patterns(format_mask)

  part1 = format_mask[/^[^\[]+/].to_s
  pure_regex = format_mask.gsub(/\[!(\w+)\]/,'(?<\1>.*)')
  tot_fields = format_mask.scan(/\[!\w+\]/).length

  return [[pure_regex]] if tot_fields <= 1 or @format_mask[0] != '['

  main_fields = tot_fields - 1
  qpattern = %q{(["'][^"']+["'])}
  
  a = fmask_delimiters(format_mask)                                                       

  r = diminishing_permutation(main_fields)

  if r.length > 2 then
    itemx  = [r.slice!(-2)]
    r2 = r[0..-3] + itemx + r[-2..-1]
  else
    r2 = r
  end
                                 
  rr = r2.map do |x|
    x2 = x.each_with_index.map do |item, i|

      d = a[i]
      case item
        when  1
          qpattern #+ d
        when 0
          if d.length == 1 then
            if i < x.length - 1 then
              s = "([^%s]+)%s+" % ([d]*2)
            else
              s = "([^%s]+)" % d
            end
          else
            s = i < x.length - 1 ? "(.*)(?=#{d})#{d}" : "(.*)"
          end
        s
      end
      
    end

    r = x2.unshift '^' + part1 + x2.shift #+ end_part
    
  end


  count = 2**main_fields

  rr2 = rr.take(count+1).map {|x| x + [a[-1] +'(.*)']} 
  
  if rr.length > 2 then
    wild_r = rr2.slice!(-1)     
    rrr = rr2 + rr[0..count-1] + [wild_r] + rr[count..-1]    
  else
    rrr = rr2 + rr[0..count-1] +  rr[count..-1]          
  end
  rrr  + [[part1 + '(.*)']]
end