class GoodData::Data::Guesser

Utility class to guess data types of a data stream by looking at first couple of rows

Constants

TYPES_PRIORITY

Attributes

headers[R]

Public Class Methods

new(reader) click to toggle source
# File lib/gooddata/data/guesser.rb, line 29
def initialize(reader)
  @reader = reader
  @headers = reader.shift.map!(&:to_s) || fail('Empty data set')
  @pros = {}
  @cons = {}
  @seen = {}

  @headers.map do |h|
    @cons[h.to_s] = {}
    @pros[h.to_s] = {}
    @seen[h.to_s] = {}
  end
end
sort_types(types) click to toggle source
# File lib/gooddata/data/guesser.rb, line 22
def sort_types(types)
  types.sort do |x, y|
    TYPES_PRIORITY.index(x) <=> TYPES_PRIORITY.index(y)
  end
end

Public Instance Methods

guess(limit) click to toggle source
# File lib/gooddata/data/guesser.rb, line 43
def guess(limit)
  count = 0
  while (row = @reader.shift)
    break unless row && !row.empty? && count < limit
    fail '%i fields in row %i, %i expected' % [row.size, count + 1, @headers.size] if row.size != @headers.size
    row.each_with_index do |value, j|
      header = @headers[j]
      number = check_number(header, value)
      date = check_date(header, value)
      store_guess header, @pros => :attribute unless number || date
      hash_increment @seen[header], value
    end
    count += 1
  end
  # fields with unique values are connection point candidates
  @seen.each do |header, values|
    store_guess header, @pros => :connection_point if values.size == count
  end
  guess_result
end

Private Instance Methods

check_date(header, value) click to toggle source
# File lib/gooddata/data/guesser.rb, line 89
def check_date(header, value)
  return store_guess(header, @pros => [:date, :attribute, :fact]) if value.nil? || value == '0000-00-00'
  begin
    DateTime.parse value
    return store_guess(header, @pros => [:date, :attribute])
  rescue ArgumentError => e
    raise e
  end
  store_guess header, @cons => :date
end
check_number(header, value) click to toggle source
# File lib/gooddata/data/guesser.rb, line 82
def check_number(header, value)
  if value.nil? || value =~ /^[\+-]?\d*(\.\d*)?$/
    return store_guess(header, @pros => [:fact, :attribute])
  end
  store_guess header, @cons => :fact
end
guess_result() click to toggle source
# File lib/gooddata/data/guesser.rb, line 66
def guess_result
  result = {}
  @headers.each do |header|
    result[header] = Guesser.sort_types @pros[header].keys.select { |type| @cons[header][type].nil? }
  end
  result
end
hash_increment(hash, key) click to toggle source
# File lib/gooddata/data/guesser.rb, line 74
def hash_increment(hash, key)
  if hash[key]
    hash[key] += 1
  else
    hash[key] = 1
  end
end
store_guess(header, guess) click to toggle source

Stores a guess about given header.

Returns true if the @pros key is present, false otherwise

Parameters

  • header - A header name

  • guess - A hash with optional @pros and @cons keys

# File lib/gooddata/data/guesser.rb, line 110
def store_guess(header, guess)
  result = guess[@pros]
  [@pros, @cons].each do |hash|
    if guess[hash]
      guess[hash] = [guess[hash]] unless guess[hash].is_a? Array
      guess[hash].each { |type| hash_increment hash[header], type }
    end
  end
  result
end