class CsvHuman

Constants

HEADER_CONVERTERS
MAJOR
MINOR
PATCH
TYPE_CONVERTERS
TYPE_MAPPINGS

convert guess_type to proc (is there a better/idomatic way)?

->(name, attributes) { guess_type( name, attributes ) }

TYPE_MAPPING_GUESS = Kernel.method( :guess_type )

VERSION

Attributes

header[R]
tags[R]

Public Class Methods

banner() click to toggle source
convert_to_date( value ) click to toggle source
# File lib/csvhuman/converter.rb, line 103
def self.convert_to_date( value )
  if value.nil? || value.empty?
    nil   ## return nil - why? why not?
  else
    ## todo/fix: add support for more formats
    ##    how to deal with conversion errors (throw exception? ignore? why? why not?)
    if value =~ /\d{4}-\d{1,2}-\d{1,2}/    ### todo: check if 2014-1-9 works for strptime too (leading zero rquired)?
      Date.strptime( value, "%Y-%m-%d" )    # 2014-11-09
    elsif value =~ /\d{1,2}\/\d{1,2}\/\d{4}/
      Date.strptime( value, "%d/%m/%Y" )    # 09/11/2014
    else
      ## todo/fix: throw argument/value error - why? why not
      nil
    end
  end
end
convert_to_f( value ) click to toggle source
# File lib/csvhuman/converter.rb, line 93
def self.convert_to_f( value )
  if value.nil? || value.empty?
    nil   ## return nil - why? why not?
  else
    ## todo/fix: add support for NaN, Inf, -Inf etc.
    ##    how to deal with conversion errors (throw exception? ignore? NaN? why? why not?)
    Float( value )
  end
end
convert_to_i( value ) click to toggle source
# File lib/csvhuman/converter.rb, line 85
def self.convert_to_i( value )
  if value.nil? || value.empty?
    nil   ## return nil - why? why not?
  else
    Integer( value )
  end
end
foreach( path, sep: nil, header_converter: nil, &block ) click to toggle source
# File lib/csvhuman/reader.rb, line 39
def self.foreach( path, sep: nil,
                        header_converter: nil, &block )
      csv = CsvReader.open( path, sep: sep, parser: CsvReader::Parser.human )     ## note: returns an enumarator-like object
      human = new( csv, header_converter: header_converter )

      if block_given?
        begin
          human.each( &block )
        ensure
          csv.close
        end
      else
        human.to_enum    ## note: caller (responsible) must close file!!!
        ## remove version without block given - why? why not?
        ## use Csv.open().to_enum  or Csv.open().each
        ##   or Csv.new( File.new() ).to_enum or Csv.new( File.new() ).each ???
      end
end
guess_type( name, attributes ) click to toggle source
# File lib/csvhuman/converter.rb, line 24
def self.guess_type( name, attributes )
  if name == 'date'
     if attributes.include?( 'year' )
       Integer    ##  just the year (e.g. 2011); use an integer number
     else
       Date
     end
  ## todo/fix: add more well-known names with num required!!!
  elsif ['affected', 'inneed', 'targeted', 'reached', 'population'].include?( name )
     Integer
  else
    ## check attributes
    if attributes.nil? || attributes.empty?
      String  ## assume (default to) string
    elsif attributes.include?( 'num' ) ||
          attributes.include?( 'id')   ## assume id is (always) a rowid - why? why not?
      Integer
    elsif attributes.include?( 'date' )   ### todo/check: exists +date?
      Date
    elsif name == 'geo' && (attributes.include?('lat') ||
                            attributes.include?('lon') ||
                            attributes.include?('elevation'))
      Float
    elsif attributes.include?( 'killed' ) ||
          attributes.include?( 'injured' ) ||
          attributes.include?( 'infected' ) ||
          attributes.include?( 'displaced' ) ||
          attributes.include?( 'idps' ) ||
          attributes.include?( 'refugees' ) ||
          attributes.include?( 'abducted' ) ||
          attributes.include?( 'threatened' ) ||
          attributes.include?( 'affected' ) ||
          attributes.include?( 'inneed' ) ||
          attributes.include?( 'targeted' ) ||
          attributes.include?( 'reached' )
      Integer
    else
      String   ## assume (default to) string
    end
  end
end
new( recs_or_stream, sep: nil, header_converter: nil ) click to toggle source
# File lib/csvhuman/reader.rb, line 75
def initialize( recs_or_stream, sep: nil,
                                header_converter: nil )
   ## todo/check:  check if arg is a stream/enumarator - why? why not??
   if recs_or_stream.is_a?( String )
     @recs = CsvReader.new( recs_or_stream, sep: sep, parser: CsvReader::Parser.human )     ## note: returns an enumarator-like object
   else
     @recs = recs_or_stream
   end

   @header = []
   @tags   = nil   ## add tags = nil  -- why? why not?
   @cols   = nil   ## column mappings (used "internally")

   if header_converter.nil?
       @header_converter = HEADER_CONVERTERS[:default]
   elsif header_converter.is_a?( Symbol )
       ## todo/fix: check if converter is nil (not found) - raise except!!!!
       @header_converter = HEADER_CONVERTERS[header_converter]
   else  ## assume proc; todo/fix: check it's a proc!!!
       @header_converter = header_converter
   end
end
open( path, mode=nil, sep: nil, header_converter: nil, &block ) click to toggle source

Q: how to deal with un-tagged fields?

- skip / ignore

Q: how to deal duplicate fields (e.g. sex+#targeted,#sex+#targeted)?

- value (auto-magically) turned into an array / list
# File lib/csvhuman/reader.rb, line 12
def self.open( path, mode=nil, sep: nil,
                               header_converter: nil, &block )   ## rename path to filename or name - why? why not?

   ## note: default mode (if nil/not passed in) to 'r:bom|utf-8'
   ## f = File.open( path, mode ? mode : 'r:bom|utf-8' )
   csv = CsvReader.open( path, mode, sep: sep, parser: CsvReader::Parser.human )     ## note: returns an enumarator-like object
   human = new( csv, header_converter: header_converter )

   # handle blocks like Ruby's open(), not like the (old old) CSV library
   if block_given?
     begin
       block.call( human )
     ensure
       csv.close
     end
   else
     human
   end
end
parse( str_or_readable, sep: nil, header_converter: nil, &block ) click to toggle source
# File lib/csvhuman/reader.rb, line 59
def self.parse( str_or_readable, sep: nil,
                                 header_converter: nil, &block )
      human = new( str_or_readable, sep: sep,
                                    header_converter: header_converter )

      if block_given?
        human.each( &block )  ## note: caller (responsible) must close file!!! - add autoclose - why? why not?
      else  # slurp contents, if no block is given
        human.read            ## note: caller (responsible) must close file!!! - add autoclose - why? why not?
      end
end
read( path, sep: nil, header_converter: nil ) click to toggle source
# File lib/csvhuman/reader.rb, line 33
def self.read( path, sep: nil,
                     header_converter: nil )
    open( path, sep: sep, header_converter: header_converter ) { |human| human.read }
end
root() click to toggle source
# File lib/csvhuman/version.rb, line 20
def self.root
  "#{File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )}"
end
version() click to toggle source
# File lib/csvhuman/version.rb, line 12
def self.version
  VERSION
end

Public Instance Methods

each( &block ) click to toggle source
# File lib/csvhuman/reader.rb, line 101
def each( &block )
  @header = []
  @tags   = nil   ## add tags = nil  -- why? why not?
  @cols   = nil   ## column mappings (used "internally")

  @recs.each do |values|
    ## pp values
    if @cols.nil?
      if values.any? { |value| value && value.strip.start_with?('#') }
        @cols = Columns.build( values, @header_converter )
        @tags = values
      else
        @header << values
      end
    else

      ## data row
      ##  strip non-tagged - how?
      record = {}
      @cols.each_with_index do |col,i|
        if col.tagged?
          key   = col.key
          value = col.tag.typecast( values[i] )
          if col.list?
            record[ key ] ||= []
            record[ key ] << value
          else
            ## add "regular" single value
            record[ key ] = value
          end
        end
      end
      ## pp record
      block.call( record )
    end
  end
end
read() click to toggle source
# File lib/csvhuman/reader.rb, line 139
def read() to_a; end