class Mirlo::Dataset

Public: Dataset class to store a set of samples with their associated targets.

Constants

DEFAULT_LABELS

Attributes

feature_names[R]
samples[R]
title[R]

Public Class Methods

new(samples: [], targets: nil, feature_names: [], title: "Dataset", add_bias: true, labels: DEFAULT_LABELS) click to toggle source
# File lib/mirlo/dataset.rb, line 17
def initialize(samples: [], targets: nil, feature_names: [], title: "Dataset", add_bias: true, labels: DEFAULT_LABELS)
  @feature_names ||= feature_names
  @title         ||= title
  @labels        ||= labels

  @samples = if targets.nil?
    samples
  else
    build_from_samples_and_targets(samples, targets)
  end
end

Public Instance Methods

each(*args, &block) click to toggle source
# File lib/mirlo/dataset.rb, line 64
def each(*args, &block)
  @samples.each(*args, &block)
end
feature(feature_name_or_index) click to toggle source
# File lib/mirlo/dataset.rb, line 29
def feature(feature_name_or_index)
  index = if feature_names.include?(feature_name_or_index)
    feature_names.index(feature_name_or_index)
  else
    feature_name_or_index
  end

  samples.collect { |sample| sample[index] }
end
input_matrix() click to toggle source
# File lib/mirlo/dataset.rb, line 76
def input_matrix
  @input_matrix ||= Matrix.rows(samples.collect(&:features), false)
end
label_for(val) click to toggle source
# File lib/mirlo/dataset.rb, line 48
def label_for(val)
  @labels[val] || val
end
num_features() click to toggle source
# File lib/mirlo/dataset.rb, line 68
def num_features
  @num_features ||= samples.first.feature_size
end
num_outputs() click to toggle source
# File lib/mirlo/dataset.rb, line 72
def num_outputs
  @num_outputs ||= samples.first.target_size
end
shuffle!() click to toggle source
# File lib/mirlo/dataset.rb, line 84
def shuffle!
  @input_matrix = @target_matrix = nil

  shuffled_positions = (0..size-1).to_a.shuffle

  shuffled_samples = shuffled_positions.collect { |i| samples[i] }
  shuffled_targets = shuffled_positions.collect { |i| targets[i] }

  @samples, @targets = shuffled_samples, shuffled_targets
end
size() click to toggle source
# File lib/mirlo/dataset.rb, line 60
def size
  @samples.size
end
subset_with_target(target) click to toggle source
# File lib/mirlo/dataset.rb, line 39
def subset_with_target(target)
  matching_samples = samples.find_all { |s| s.target == target }
  Dataset.new(samples: matching_samples, feature_names: feature_names, title: target)
end
target_matrix() click to toggle source
# File lib/mirlo/dataset.rb, line 80
def target_matrix
  @target_matrix ||= Matrix.rows(samples.collect(&:target), false)
end
target_set() click to toggle source
# File lib/mirlo/dataset.rb, line 52
def target_set
  targets.uniq.sort
end
targets() click to toggle source
# File lib/mirlo/dataset.rb, line 56
def targets
  samples.collect(&:target)
end
targets_for(feature_values) click to toggle source
# File lib/mirlo/dataset.rb, line 44
def targets_for(feature_values)
  samples.select { |s| s.has_features?(feature_values) }.collect(&:target)
end

Private Instance Methods

build_from_samples_and_targets(samples, targets) click to toggle source
# File lib/mirlo/dataset.rb, line 97
def build_from_samples_and_targets(samples, targets)
  samples.each_with_index.collect do |sample, index|
    SampleWithBias.new(target: targets[index], features: sample)
  end
end