module TrainTestSplit::Split
Public Class Methods
train_test_split(total_data_set, test_size = 0.25)
click to toggle source
# File lib/train_test_split.rb, line 4 def self.train_test_split(total_data_set, test_size = 0.25) if test_size > 1.0 test_size = 1.0 elsif test_size < 0 test_size = 0.0 end test_set_count = (total_data_set.length * test_size).floor if test_set_count == 0 raise StandardError, "Test size resulted in a test set of 0. Increase the test size." elsif test_set_count == total_data_set.length raise StandardError, "Test size resulted in a training set of 0. Decrease the test size." end # shuffle data total_data_set.shuffle! test_set = total_data_set[0..test_set_count] training_set = total_data_set[test_set_count+1..total_data_set.length] # make train data X and Y training_set_Y = training_set.map(&:last) training_set.map{|row| row.pop} # make test data X and Y test_set_Y = test_set.map(&:last) test_set.map{|row| row.pop} return training_set, training_set_Y, test_set, test_set_Y end
train_validation_test_split(total_data_set, validation_size = 0.15, test_size = 0.10)
click to toggle source
# File lib/train_test_split.rb, line 29 def self.train_validation_test_split(total_data_set, validation_size = 0.15, test_size = 0.10) if test_size > 1.0 test_size = 1.0 elsif validation_size > 1.0 validation_size = 1.0 elsif validation_size < 0 validation_size = 0.0 elsif test_size < 0 test_size = 0.0 end test_set_count = (total_data_set.length * test_size).floor validation_size_count = (total_data_set.length * validation_size).floor if test_set_count == 0 raise StandardError, "Test size resulted in a test set of 0. Increase the test size." elsif test_set_count == total_data_set.length raise StandardError, "Test size resulted in a training set of 0. Decrease the test size." end if validation_size_count == 0 raise StandardError, "validation size resulted in a test set of 0. Increase the validation data size." elsif test_set_count == total_data_set.length raise StandardError, "validation size resulted in a training set of 0. Decrease the validation data size." end # shuffle data total_data_set.shuffle! val_count = test_set_count + validation_size_count test_set = total_data_set[0..test_set_count] validation_set = total_data_set[test_set_count+1..val_count] training_set = total_data_set[val_count+1..total_data_set.length] # make train data X and Y training_set_Y = training_set.map(&:last) training_set.map{|row| row.pop} # make test data X and Y test_set_Y = test_set.map(&:last) test_set.map{|row| row.pop} # make validation data X and Y validation_set_Y = validation_set.map(&:last) validation_set.map{|row| row.pop} return training_set, training_set_Y, validation_set, validation_set_Y, test_set, test_set_Y end