class CsvHuman
Constants
- HEADER_CONVERTERS
- MAJOR
- MINOR
- PATCH
- TYPE_CONVERTERS
- TYPE_MAPPINGS
convert
guess_type
to proc (is there a better/idomatic way)?->(name, attributes) { guess_type( name, attributes ) }
TYPE_MAPPING_GUESS = Kernel.method( :guess_type )
- VERSION
Attributes
header[R]
Public Class Methods
convert_to_date( value )
click to toggle source
# File lib/csvhuman/converter.rb, line 103 def self.convert_to_date( value ) if value.nil? || value.empty? nil ## return nil - why? why not? else ## todo/fix: add support for more formats ## how to deal with conversion errors (throw exception? ignore? why? why not?) if value =~ /\d{4}-\d{1,2}-\d{1,2}/ ### todo: check if 2014-1-9 works for strptime too (leading zero rquired)? Date.strptime( value, "%Y-%m-%d" ) # 2014-11-09 elsif value =~ /\d{1,2}\/\d{1,2}\/\d{4}/ Date.strptime( value, "%d/%m/%Y" ) # 09/11/2014 else ## todo/fix: throw argument/value error - why? why not nil end end end
convert_to_f( value )
click to toggle source
# File lib/csvhuman/converter.rb, line 93 def self.convert_to_f( value ) if value.nil? || value.empty? nil ## return nil - why? why not? else ## todo/fix: add support for NaN, Inf, -Inf etc. ## how to deal with conversion errors (throw exception? ignore? NaN? why? why not?) Float( value ) end end
convert_to_i( value )
click to toggle source
# File lib/csvhuman/converter.rb, line 85 def self.convert_to_i( value ) if value.nil? || value.empty? nil ## return nil - why? why not? else Integer( value ) end end
foreach( path, sep: nil, header_converter: nil, &block )
click to toggle source
# File lib/csvhuman/reader.rb, line 39 def self.foreach( path, sep: nil, header_converter: nil, &block ) csv = CsvReader.open( path, sep: sep, parser: CsvReader::Parser.human ) ## note: returns an enumarator-like object human = new( csv, header_converter: header_converter ) if block_given? begin human.each( &block ) ensure csv.close end else human.to_enum ## note: caller (responsible) must close file!!! ## remove version without block given - why? why not? ## use Csv.open().to_enum or Csv.open().each ## or Csv.new( File.new() ).to_enum or Csv.new( File.new() ).each ??? end end
guess_type( name, attributes )
click to toggle source
# File lib/csvhuman/converter.rb, line 24 def self.guess_type( name, attributes ) if name == 'date' if attributes.include?( 'year' ) Integer ## just the year (e.g. 2011); use an integer number else Date end ## todo/fix: add more well-known names with num required!!! elsif ['affected', 'inneed', 'targeted', 'reached', 'population'].include?( name ) Integer else ## check attributes if attributes.nil? || attributes.empty? String ## assume (default to) string elsif attributes.include?( 'num' ) || attributes.include?( 'id') ## assume id is (always) a rowid - why? why not? Integer elsif attributes.include?( 'date' ) ### todo/check: exists +date? Date elsif name == 'geo' && (attributes.include?('lat') || attributes.include?('lon') || attributes.include?('elevation')) Float elsif attributes.include?( 'killed' ) || attributes.include?( 'injured' ) || attributes.include?( 'infected' ) || attributes.include?( 'displaced' ) || attributes.include?( 'idps' ) || attributes.include?( 'refugees' ) || attributes.include?( 'abducted' ) || attributes.include?( 'threatened' ) || attributes.include?( 'affected' ) || attributes.include?( 'inneed' ) || attributes.include?( 'targeted' ) || attributes.include?( 'reached' ) Integer else String ## assume (default to) string end end end
new( recs_or_stream, sep: nil, header_converter: nil )
click to toggle source
# File lib/csvhuman/reader.rb, line 75 def initialize( recs_or_stream, sep: nil, header_converter: nil ) ## todo/check: check if arg is a stream/enumarator - why? why not?? if recs_or_stream.is_a?( String ) @recs = CsvReader.new( recs_or_stream, sep: sep, parser: CsvReader::Parser.human ) ## note: returns an enumarator-like object else @recs = recs_or_stream end @header = [] @tags = nil ## add tags = nil -- why? why not? @cols = nil ## column mappings (used "internally") if header_converter.nil? @header_converter = HEADER_CONVERTERS[:default] elsif header_converter.is_a?( Symbol ) ## todo/fix: check if converter is nil (not found) - raise except!!!! @header_converter = HEADER_CONVERTERS[header_converter] else ## assume proc; todo/fix: check it's a proc!!! @header_converter = header_converter end end
open( path, mode=nil, sep: nil, header_converter: nil, &block )
click to toggle source
Q: how to deal with un-tagged fields?
- skip / ignore
Q: how to deal duplicate fields (e.g. sex+#targeted,#sex+#targeted)?
- value (auto-magically) turned into an array / list
# File lib/csvhuman/reader.rb, line 12 def self.open( path, mode=nil, sep: nil, header_converter: nil, &block ) ## rename path to filename or name - why? why not? ## note: default mode (if nil/not passed in) to 'r:bom|utf-8' ## f = File.open( path, mode ? mode : 'r:bom|utf-8' ) csv = CsvReader.open( path, mode, sep: sep, parser: CsvReader::Parser.human ) ## note: returns an enumarator-like object human = new( csv, header_converter: header_converter ) # handle blocks like Ruby's open(), not like the (old old) CSV library if block_given? begin block.call( human ) ensure csv.close end else human end end
parse( str_or_readable, sep: nil, header_converter: nil, &block )
click to toggle source
# File lib/csvhuman/reader.rb, line 59 def self.parse( str_or_readable, sep: nil, header_converter: nil, &block ) human = new( str_or_readable, sep: sep, header_converter: header_converter ) if block_given? human.each( &block ) ## note: caller (responsible) must close file!!! - add autoclose - why? why not? else # slurp contents, if no block is given human.read ## note: caller (responsible) must close file!!! - add autoclose - why? why not? end end
read( path, sep: nil, header_converter: nil )
click to toggle source
# File lib/csvhuman/reader.rb, line 33 def self.read( path, sep: nil, header_converter: nil ) open( path, sep: sep, header_converter: header_converter ) { |human| human.read } end
root()
click to toggle source
# File lib/csvhuman/version.rb, line 20 def self.root "#{File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )}" end
version()
click to toggle source
# File lib/csvhuman/version.rb, line 12 def self.version VERSION end
Public Instance Methods
each( &block )
click to toggle source
# File lib/csvhuman/reader.rb, line 101 def each( &block ) @header = [] @tags = nil ## add tags = nil -- why? why not? @cols = nil ## column mappings (used "internally") @recs.each do |values| ## pp values if @cols.nil? if values.any? { |value| value && value.strip.start_with?('#') } @cols = Columns.build( values, @header_converter ) @tags = values else @header << values end else ## data row ## strip non-tagged - how? record = {} @cols.each_with_index do |col,i| if col.tagged? key = col.key value = col.tag.typecast( values[i] ) if col.list? record[ key ] ||= [] record[ key ] << value else ## add "regular" single value record[ key ] = value end end end ## pp record block.call( record ) end end end
read()
click to toggle source
# File lib/csvhuman/reader.rb, line 139 def read() to_a; end