class DataPackage::Interpreter
Constants
- DATETIME_PATTERN
- DATE_PATTERN
- DEFAULT_TYPE_FORMAT
- INFER_CONFIDENCE
- INFER_THRESHOLD
- INTEGER_PATTERN
- TIME_PATTERN
- YEAR_PATTERN
Attributes
csv[R]
threshold[R]
Public Class Methods
new(csv)
click to toggle source
# File lib/datapackage/interpreter.rb, line 14 def initialize(csv) @csv = csv @threshold = [csv.length, INFER_THRESHOLD].min end
Public Instance Methods
inspect_value(value)
click to toggle source
# File lib/datapackage/interpreter.rb, line 40 def inspect_value(value) return DEFAULT_TYPE_FORMAT unless value.is_a?(String) if value.length == 4 && value.match(YEAR_PATTERN) return { 'type' => 'year', 'format' => 'default' } end if value.match(DATETIME_PATTERN) return { 'type' => 'datetime', 'format' => 'default' } end if value.match(DATE_PATTERN) return { 'type' => 'date', 'format' => 'default' } end if value.match(TIME_PATTERN) return { 'type' => 'time', 'format' => 'default' } end if value.match(INTEGER_PATTERN) return { 'type' => 'integer', 'format' => 'default' } end DEFAULT_TYPE_FORMAT end
type_and_format_at(header)
click to toggle source
# File lib/datapackage/interpreter.rb, line 19 def type_and_format_at(header) values = csv.values_at(header).flatten counter = {} type_and_format = DEFAULT_TYPE_FORMAT values.each_with_index do |value, i| inspection_count = i + 1 inspection = inspect_value(value) counter[inspection] = (counter[inspection] || 0) + 1 if inspection_count >= threshold if counter[inspection] / inspection_count >= INFER_CONFIDENCE type_and_format = inspection break end end end type_and_format end