class Reckon::CSVParser
Attributes
csv_data[RW]
date_column[RW]
date_column_index[RW]
description_column_indices[RW]
money_column[RW]
money_column_indices[RW]
options[RW]
Public Class Methods
new(options = {})
click to toggle source
# File lib/reckon/csv_parser.rb, line 7 def initialize(options = {}) self.options = options self.options[:currency] ||= '$' @csv_data = parse(options[:string] || File.read(options[:file]), options[:file]) filter_csv detect_columns end
Public Instance Methods
columns()
click to toggle source
# File lib/reckon/csv_parser.rb, line 15 def columns @columns ||= begin last_row_length = nil csv_data.inject([]) do |memo, row| unless row.all? { |i| i.nil? || i.length == 0 } row.each_with_index do |entry, index| memo[index] ||= [] memo[index] << (entry || '').strip end last_row_length = row.length end memo end end end
date_for(index)
click to toggle source
# File lib/reckon/csv_parser.rb, line 32 def date_for(index) @date_column.for(index) end
description_for(index)
click to toggle source
# File lib/reckon/csv_parser.rb, line 55 def description_for(index) description_column_indices.map { |i| columns[i][index].to_s.strip } .reject(&:empty?) .join("; ") .squeeze(" ") .gsub(/(;\s+){2,}/, '') .strip end
money_for(index)
click to toggle source
# File lib/reckon/csv_parser.rb, line 40 def money_for(index) @money_column[index] end
pretty_date_for(index)
click to toggle source
# File lib/reckon/csv_parser.rb, line 36 def pretty_date_for(index) @date_column.pretty_for( index ) end
pretty_money(amount, negate = false)
click to toggle source
# File lib/reckon/csv_parser.rb, line 44 def pretty_money(amount, negate = false) Money.new( amount, @options ).pretty( negate ) end
pretty_money_for(index, negate = false)
click to toggle source
# File lib/reckon/csv_parser.rb, line 48 def pretty_money_for(index, negate = false) money = money_for(index) return 0 if money.nil? money.pretty(negate) end
row(index)
click to toggle source
# File lib/reckon/csv_parser.rb, line 64 def row(index) csv_data[index].join(", ") end
Private Instance Methods
detect_columns()
click to toggle source
# File lib/reckon/csv_parser.rb, line 162 def detect_columns results = evaluate_columns(columns) if options[:money_column] self.money_column_indices = [options[:money_column] - 1] else self.money_column_indices = results.select { |n| n[:is_money_column] }.map { |n| n[:index] } if self.money_column_indices.length == 1 puts "Using column #{money_column_indices.first + 1} as the money column. Use --money-colum to specify a different one." elsif self.money_column_indices.length == 2 found_double_money_column(*self.money_column_indices) else puts "Unable to determine a money column, use --money-column to specify the column reckon should use." end end results.reject! { |i| money_column_indices.include?(i[:index]) } if options[:date_column] @date_column_index = options[:date_column] - 1 else # sort by highest score followed by lowest index @date_column_index = results.max_by { |n| [n[:date_score], -n[:index]] }[:index] end results.reject! { |i| i[:index] == date_column_index } @date_column = DateColumn.new(columns[date_column_index], @options) @money_column = MoneyColumn.new(columns[money_column_indices[0]], @options) if money_column_indices.length == 1 detect_sign_column if @money_column.positive? else @money_column.merge! MoneyColumn.new(columns[money_column_indices[1]], @options) end self.description_column_indices = results.map { |i| i[:index] } end
detect_sign_column()
click to toggle source
Some csv files negative/positive amounts are indicated in separate account
# File lib/reckon/csv_parser.rb, line 137 def detect_sign_column return if columns[0].length <= 2 # This test needs requires more than two rows otherwise will lead to false positives signs = [] if @money_column_indices[0] > 0 column = columns[ @money_column_indices[0] - 1 ] signs = column.uniq end if (signs.length != 2 && (@money_column_indices[0] + 1 < columns.length)) column = columns[ @money_column_indices[0] + 1 ] signs = column.uniq end if signs.length == 2 negative_first = true negative_first = false if signs[0] == "Bij" || signs[0].downcase =~ /^cr/ # look for known debit indicators @money_column.each_with_index do |money, i| if negative_first && column[i] == signs[0] @money_column[i] = -money elsif !negative_first && column[i] == signs[1] @money_column[i] = -money end end end end
evaluate_columns(cols)
click to toggle source
# File lib/reckon/csv_parser.rb, line 80 def evaluate_columns(cols) results = [] found_likely_money_column = false cols.each_with_index do |column, index| money_score = date_score = possible_neg_money_count = possible_pos_money_count = 0 last = nil column.reverse.each_with_index do |entry, row_from_bottom| row = csv_data[csv_data.length - 1 - row_from_bottom] entry = entry.strip money_score += Money::likelihood( entry ) possible_neg_money_count += 1 if entry =~ /^\$?[\-\(]\$?\d+/ possible_pos_money_count += 1 if entry =~ /^\+?\$?\+?\d+/ date_score += DateColumn.likelihood(entry) # Try to determine if this is a balance column entry_as_num = entry.gsub(/[^\-\d\.]/, '').to_f if last && entry_as_num != 0 && last != 0 row.each do |row_entry| row_entry = row_entry.to_s.gsub(/[^\-\d\.]/, '').to_f if row_entry != 0 && last + row_entry == entry_as_num money_score -= 10 break end end end last = entry_as_num end if possible_neg_money_count > (column.length / 5.0) && possible_pos_money_count > (column.length / 5.0) money_score += 10 * column.length found_likely_money_column = true end results << { :index => index, :money_score => money_score, :date_score => date_score } end results.sort_by! { |n| -n[:money_score] } # check if it looks like a 2-column file with a balance field if results.length >= 3 && results[1][:money_score] + results[2][:money_score] >= results[0][:money_score] results[1][:is_money_column] = true results[2][:is_money_column] = true else results[0][:is_money_column] = true end return results.sort_by { |n| n[:index] } end
filter_csv()
click to toggle source
# File lib/reckon/csv_parser.rb, line 70 def filter_csv if options[:ignore_columns] new_columns = [] columns.each_with_index do |column, index| new_columns << column unless options[:ignore_columns].include?(index + 1) end @columns = new_columns end end
found_double_money_column(id1, id2)
click to toggle source
# File lib/reckon/csv_parser.rb, line 129 def found_double_money_column(id1, id2) self.money_column_indices = [id1, id2] puts "It looks like this CSV has two seperate columns for money, one of which shows positive" puts "changes and one of which shows negative changes. If this is true, great. Otherwise," puts "please report this issue to us so we can take a look!\n" end
parse(data, filename=nil)
click to toggle source
# File lib/reckon/csv_parser.rb, line 198 def parse(data, filename=nil) # Use force_encoding to convert the string to utf-8 with as few invalid characters # as possible. data.force_encoding(try_encoding(data, filename)) data = data.encode('UTF-8', invalid: :replace, undef: :replace, replace: '?') data.sub!("\xEF\xBB\xBF", '') # strip byte order marker, if it exists rows = [] data.each_line.with_index do |line, i| next if i < (options[:contains_header] || 0) rows << CSV.parse_line(line, col_sep: options[:csv_separator] || ',') end rows end
try_encoding(data, filename = nil)
click to toggle source
# File lib/reckon/csv_parser.rb, line 214 def try_encoding(data, filename = nil) encoding = try_encoding_from_file(filename) cd = CharDet.detect(data) encoding ||= cd['encoding'] encoding ||= 'BINARY' LOGGER.info("suggested file encoding: #{encoding}") options[:encoding] || encoding end
try_encoding_from_file(filename = nil)
click to toggle source
# File lib/reckon/csv_parser.rb, line 227 def try_encoding_from_file(filename = nil) return unless filename m = nil os = Gem::Platform.local.os if os == 'linux' m = `file -i #{filename}`.match(/charset=(\S+)/) elsif os == 'darwin' m = `file -I #{filename}`.match(/charset=(\S+)/) end m && m[1] end