class DataPackage::Interpreter

Constants

DATETIME_PATTERN
DATE_PATTERN
DEFAULT_TYPE_FORMAT
INFER_CONFIDENCE
INFER_THRESHOLD
INTEGER_PATTERN
TIME_PATTERN
YEAR_PATTERN

Attributes

csv[R]
threshold[R]

Public Class Methods

new(csv) click to toggle source
# File lib/datapackage/interpreter.rb, line 14
def initialize(csv)
  @csv = csv
  @threshold = [csv.length, INFER_THRESHOLD].min
end

Public Instance Methods

inspect_value(value) click to toggle source
# File lib/datapackage/interpreter.rb, line 40
def inspect_value(value)
  return DEFAULT_TYPE_FORMAT unless value.is_a?(String)

  if value.length == 4 && value.match(YEAR_PATTERN)
    return { 'type' => 'year', 'format' => 'default' }
  end

  if value.match(DATETIME_PATTERN)
    return { 'type' => 'datetime', 'format' => 'default' }
  end

  if value.match(DATE_PATTERN)
    return { 'type' => 'date', 'format' => 'default' }
  end

  if value.match(TIME_PATTERN)
    return { 'type' => 'time', 'format' => 'default' }
  end

  if value.match(INTEGER_PATTERN)
    return { 'type' => 'integer', 'format' => 'default' }
  end

  DEFAULT_TYPE_FORMAT
end
type_and_format_at(header) click to toggle source
# File lib/datapackage/interpreter.rb, line 19
def type_and_format_at(header)
  values = csv.values_at(header).flatten
  counter = {}
  type_and_format = DEFAULT_TYPE_FORMAT

  values.each_with_index do |value, i|
    inspection_count = i + 1

    inspection = inspect_value(value)
    counter[inspection] = (counter[inspection] || 0) + 1
    if inspection_count >= threshold
      if counter[inspection] / inspection_count >= INFER_CONFIDENCE
        type_and_format = inspection
        break
      end
    end
  end

  type_and_format
end