class Parceira::Reader

Constants

DEFAULT_OPTIONS

Attributes

options[R]

Public Class Methods

new(input, options) click to toggle source
# File lib/parceira/reader.rb, line 23
def initialize(input, options)
  @input    = input
  @options  = DEFAULT_OPTIONS.merge(options)
end

Public Instance Methods

process!() click to toggle source
# File lib/parceira/reader.rb, line 29
def process!
  if input_file.nil? && @input.is_a?(String)
    data = CSV.parse(@input, csv_options) # content is already in memory. Process with CSV
    header_data = data.shift if options[:headers_included] # Remove header row
    header_keys = \
      if options[:headers] == true
        self.parse_header( header_data )
      elsif options[:headers].is_a?(Array)
        options[:headers]
      end
    data.map do |arr|
      values = parse_values(arr)
      if header_keys
        convert_to_hash(header_keys, values)
      else
        values
      end
    end
  elsif input_file.is_a?(File)
    output = []
    begin
      $/ = options[:row_sep]
      # Build header
      header_data = input_file.readline.to_s.chomp(options[:row_sep]) if options[:headers_included] # Remove header row
      header_keys = \
        if options[:headers] == true
          begin
            data = CSV.parse(header_data, self.csv_options)
            self.parse_header( data )
          rescue CSV::MalformedCSVError
          end
        elsif options[:headers].is_a?(Array)
          options[:headers]
        end

      # now on to processing all the rest of the lines in the CSV file:
      while !input_file.eof?    # we can't use f.readlines() here, because this would read the whole file into memory at once, and eof => true
        values =  begin
                    parse_values( CSV.parse(input_file.readline.chomp, csv_options) )
                  rescue CSV::MalformedCSVError
                  end
        if header_keys
          output << convert_to_hash(header_keys, values)
        else
          output << values
        end if values
      end
    ensure
      $/ = $/
    end
    output
  end
end

Protected Instance Methods

charset() click to toggle source
# File lib/parceira/reader.rb, line 143
def charset
  options[:file_encoding] || begin
    filename = \
      case @input
      when String
        @input if File.exists?(@input)
      when File
        @input.path
      end
    if filename
      IO.popen(['file', '--brief', '--mime', filename]).read.chomp.match(/charset=([^\s]+)/) { $1 }
    else
      default_charset
    end
  rescue
    default_charset
  end
end
convert_to_hash(header, values) click to toggle source
# File lib/parceira/reader.rb, line 84
def convert_to_hash(header, values)
  header.each_with_index.inject({}) do |r, (key, index)|
    value = values[index]
    if options[:reject_nil] && value.nil?
    else
      r[key] = value
    end
    r
  end
end
csv_options() click to toggle source
# File lib/parceira/reader.rb, line 163
def csv_options
  options.select do |k,v|
    [
      :col_sep,
      :row_sep,
      :quote_char,
      :field_size_limit,
      :converters,
      :unconverted_fields,
      :skip_blanks,
      :force_quotes
    ].include?(k)
  end
end
default_charset() click to toggle source
# File lib/parceira/reader.rb, line 179
def default_charset
  'utf-8'
end
input_file() click to toggle source
# File lib/parceira/reader.rb, line 130
def input_file
  @input_file ||= \
    case @input
    when File
      @input
    when String
      if File.exists?(@input)
        File.open(@input, "r:#{self.charset}")
      end
    end
end
parse_header(arr) click to toggle source
# File lib/parceira/reader.rb, line 117
def parse_header(arr)
  arr.flatten.each_with_index.inject([]) do |arr, (value, index)|
    v = \
      if (str=value.to_s.parameterize('_')).present?
        str.to_sym
      else
        "field_#{index.next}".to_sym
      end
    v = options[:key_mapping][v] if options[:key_mapping].is_a?(Hash) && options[:key_mapping].has_key?(v)
    arr.push(v)
  end
end
parse_values(arr) click to toggle source
# File lib/parceira/reader.rb, line 95
def parse_values(arr)
  arr.flatten.map do |v|
    value = \
      if options[:convert_to_numeric]
        case v
        when /^[+-]?\d+\.\d+$/
          v.to_f
        when /^[+-]?\d+$/
          v.to_i
        else
          v.to_s.strip
        end
      else
        v.to_s.strip
      end
    value = nil if options[:reject_blank] && value.blank?
    value = nil if options[:reject_zero]  && value.respond_to?(:zero?) && value.zero?
    value = nil if options[:reject_matching] && value =~ options[:reject_matching]
    value
  end
end