class JsonTableSchema::Infer

Attributes

schema[R]

Public Class Methods

new(headers, rows, opts = {}) click to toggle source
# File lib/jsontableschema/infer.rb, line 8
def initialize(headers, rows, opts = {})
  @headers = headers
  @rows = rows
  @explicit = opts[:explicit]
  @primary_key = opts[:primary_key]
  @row_limit = opts[:row_limit]

  @schema = {
    'fields' => fields
  }
  @schema['primaryKey'] = @primary_key if @primary_key
  infer!
end

Public Instance Methods

available_types() click to toggle source
# File lib/jsontableschema/infer.rb, line 126
def available_types
  [
    'any',
    'string',
    'boolean',
    'number',
    'integer',
    'null',
    'date',
    'time',
    'datetime',
    'array',
    'object',
    'geopoint',
    'geojson'
  ]
end
fields() click to toggle source
# File lib/jsontableschema/infer.rb, line 22
def fields
  @headers.map do |header|
    descriptor = {
      'name' => header,
      'title' => '',
      'description' => '',
    }

    constraints = {}
    constraints['required'] = @explicit === true
    constraints['unique'] = (header == @primary_key)
    constraints.delete_if { |k,v| v == false } unless @explicit === true
    descriptor['constraints'] = constraints if constraints.count > 0
    descriptor
  end
end
guess_format(converter, col) click to toggle source
# File lib/jsontableschema/infer.rb, line 88
def guess_format(converter, col)
  guessed_format = 'default'
  converter.class.instance_methods.grep(/cast_/).each do |method|
    begin
      format = method.to_s
      format.slice!('cast_')
      next if format == 'default'
      converter.send(method, col)
      guessed_format = format
      break
    rescue JsonTableSchema::Exception
    end
  end
  guessed_format
end
guess_type(col, index) click to toggle source
# File lib/jsontableschema/infer.rb, line 66
def guess_type(col, index)
  guessed_type = 'string'
  guessed_format = 'default'

  unless col.nil? || col == ""
    available_types.reverse_each do |type|
      klass = get_class_for_type(type)
      converter = Kernel.const_get(klass).new(@schema['fields'][index])
      if converter.test(col) === true
        guessed_type = type
        guessed_format = guess_format(converter, col)
        break
      end
    end
  end

  {
    'type' => guessed_type,
    'format' => guessed_format
  }
end
infer!() click to toggle source
# File lib/jsontableschema/infer.rb, line 39
def infer!
  type_matches = []
  @rows.each_with_index do |row, i|
    break if @row_limit && i > @row_limit
    row = row.fields if row.class == CSV::Row

    row_length = row.count
    headers_length = @headers.count

    if row_length > headers_length
      row = row[0..headers_length]
    elsif row_length < headers_length
      diff = headers_length - row_length
      fill = [''] * diff
      row = row.push(fill).flatten
    end

    row.each_with_index do |col, i|
      type_matches[i] ||= []
      type_matches[i] << guess_type(col, i)
    end

  end
  resolve_types(type_matches)
  @schema = JsonTableSchema::Schema.new(@schema)
end
resolve_types(results) click to toggle source
# File lib/jsontableschema/infer.rb, line 104
def resolve_types(results)
  results.each_with_index do |result,v|
    result.uniq!

    if result.count == 1
      rv = result[0]
    else
      counts = {}
      result.each do |r|
        counts[r] ||= 0
        counts[r] += 1
      end

      sorted_counts = counts.sort_by {|_key, value| value}
      rv = sorted_counts[0][0]
    end

    @schema['fields'][v].merge!(rv)
  end

end