class Mirlo::Dataset
Public: Dataset
class to store a set of samples with their associated targets.
Constants
- DEFAULT_LABELS
Attributes
feature_names[R]
samples[R]
title[R]
Public Class Methods
new(samples: [], targets: nil, feature_names: [], title: "Dataset", add_bias: true, labels: DEFAULT_LABELS)
click to toggle source
# File lib/mirlo/dataset.rb, line 17 def initialize(samples: [], targets: nil, feature_names: [], title: "Dataset", add_bias: true, labels: DEFAULT_LABELS) @feature_names ||= feature_names @title ||= title @labels ||= labels @samples = if targets.nil? samples else build_from_samples_and_targets(samples, targets) end end
Public Instance Methods
each(*args, &block)
click to toggle source
# File lib/mirlo/dataset.rb, line 64 def each(*args, &block) @samples.each(*args, &block) end
feature(feature_name_or_index)
click to toggle source
# File lib/mirlo/dataset.rb, line 29 def feature(feature_name_or_index) index = if feature_names.include?(feature_name_or_index) feature_names.index(feature_name_or_index) else feature_name_or_index end samples.collect { |sample| sample[index] } end
input_matrix()
click to toggle source
# File lib/mirlo/dataset.rb, line 76 def input_matrix @input_matrix ||= Matrix.rows(samples.collect(&:features), false) end
label_for(val)
click to toggle source
# File lib/mirlo/dataset.rb, line 48 def label_for(val) @labels[val] || val end
num_features()
click to toggle source
# File lib/mirlo/dataset.rb, line 68 def num_features @num_features ||= samples.first.feature_size end
num_outputs()
click to toggle source
# File lib/mirlo/dataset.rb, line 72 def num_outputs @num_outputs ||= samples.first.target_size end
shuffle!()
click to toggle source
# File lib/mirlo/dataset.rb, line 84 def shuffle! @input_matrix = @target_matrix = nil shuffled_positions = (0..size-1).to_a.shuffle shuffled_samples = shuffled_positions.collect { |i| samples[i] } shuffled_targets = shuffled_positions.collect { |i| targets[i] } @samples, @targets = shuffled_samples, shuffled_targets end
size()
click to toggle source
# File lib/mirlo/dataset.rb, line 60 def size @samples.size end
subset_with_target(target)
click to toggle source
# File lib/mirlo/dataset.rb, line 39 def subset_with_target(target) matching_samples = samples.find_all { |s| s.target == target } Dataset.new(samples: matching_samples, feature_names: feature_names, title: target) end
target_matrix()
click to toggle source
# File lib/mirlo/dataset.rb, line 80 def target_matrix @target_matrix ||= Matrix.rows(samples.collect(&:target), false) end
target_set()
click to toggle source
# File lib/mirlo/dataset.rb, line 52 def target_set targets.uniq.sort end
targets()
click to toggle source
# File lib/mirlo/dataset.rb, line 56 def targets samples.collect(&:target) end
targets_for(feature_values)
click to toggle source
# File lib/mirlo/dataset.rb, line 44 def targets_for(feature_values) samples.select { |s| s.has_features?(feature_values) }.collect(&:target) end
Private Instance Methods
build_from_samples_and_targets(samples, targets)
click to toggle source
# File lib/mirlo/dataset.rb, line 97 def build_from_samples_and_targets(samples, targets) samples.each_with_index.collect do |sample, index| SampleWithBias.new(target: targets[index], features: sample) end end