class Quandl::Babelfish::NumberMaid
responsible for number cleaning
Public Class Methods
cell_to_number(num)
click to toggle source
# File lib/quandl/babelfish/number_maid.rb, line 32 def cell_to_number(num) return nil if num.nil? # Remove annotations # if there is something in parenthesis and a number elsewhere, nuke the parenthesis temp = num.gsub(/[\(\[\{].*[\)\}\]]/, '') num = temp if temp.match(/\d/) num.gsub!("est.", '') #check for exponents by searching for 'e' 'E' or variations of 'x 10' '*10' and 'X10^' is_exp = false expmultiplier = 1 m = /(\s)*(E|e|[X|x|\*](\s)*10(\^)?)(\s)*/.match(num) #check if match is made, preceeded by a number/decimal, and succeeded by a digit or a plus/minus sign if !m.nil? and m.pre_match =~ /[0-9#{@escaped_decimal}]$/ and m.post_match =~ /^([\-+0-9])/ is_exp = true num = m.pre_match expmultiplier = 10 ** /^[0-9\-+]*/.match(m.post_match)[0].to_i end is_million = (num =~ /million/i) is_billion = (num =~ /billion/i) is_negative = (num =~ /-[\d]/ or (!@settings[:ignore_brackets] and num =~ /\([\d]/)) # watch out for two numbers, like a range eg "27.3 - 33.9" # how: if you a see a number followed by a non number char that is not the decimal marker, kill everything to the right of that num.gsub!(/(\d) (\d)/, '\1\2') if m = num.match(/-?\s*[,\d\.]+/) num = m[0] end # only keep #s and decimal mark num.gsub!(/[^0-9#{@escaped_decimal}]/, '') num.gsub!(/[^0-9]/, '.') return nil if num.nil? || num !~ /[\d]/ return nil if num.end_with?(".") return nil if num.count(".") > 1 cell = num.nil? ? 0.0 : Float("%.#{14}g" % num) cell *= 1e6 if is_million cell *= 1e9 if is_billion cell *= -1 if is_negative cell *= expmultiplier if is_exp cell end
clean(dirty_numbers)
click to toggle source
cleans each number one by one
# File lib/quandl/babelfish/number_maid.rb, line 22 def clean(dirty_numbers) return nil if dirty_numbers.nil? numbers=[] Array(dirty_numbers).each do |cell| numbers << cell_to_number(cell.to_s) end (numbers.size == 1) ? numbers[0] : numbers end
init(user_settings)
click to toggle source
# File lib/quandl/babelfish/number_maid.rb, line 16 def init(user_settings) @settings=@defaults.merge(user_settings) @escaped_decimal = Regexp.escape(@settings[:decimal_mark]) end