class Datasets::LIBSVM
Public Class Methods
new(name, note: nil, default_feature_value: 0)
click to toggle source
Calls superclass method
Datasets::Dataset::new
# File lib/datasets/libsvm.rb, line 34 def initialize(name, note: nil, default_feature_value: 0) super() @libsvm_dataset_metadata = fetch_dataset_info(name) @file = choose_file(note) @default_feature_value = default_feature_value @metadata.id = "libsvm-#{normalize_name(name)}" @metadata.name = "LIBSVM dataset: #{name}" @metadata.url = "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/" end
Public Instance Methods
each() { |record| ... }
click to toggle source
# File lib/datasets/libsvm.rb, line 46 def each return to_enum(__method__) unless block_given? open_data do |input| n_features = @libsvm_dataset_metadata.n_features csv = CSV.new(input, col_sep: " ") csv.each do |row| label = parse_label(row.shift) features = [@default_feature_value] * n_features row.each do |column| next if column.nil? index, value = column.split(":", 2) features[Integer(index, 10) - 1] = parse_value(value) end yield(Record.new(label, features)) end end end
Private Instance Methods
choose_file(note)
click to toggle source
# File lib/datasets/libsvm.rb, line 82 def choose_file(note) files = @libsvm_dataset_metadata.files return files.first if note.nil? available_notes = [] @libsvm_dataset_metadata.files.find do |file| return file if file.note == note available_notes << file.note if file.note end name = @libsvm_dataset_metadata.name message = "unavailable note: #{name}: #{note.inspect}: " message << "available notes: [" message << available_notes.collect(&:inspect).join(", ") message << "]" raise ArgumentError, message end
fetch_dataset_info(name)
click to toggle source
# File lib/datasets/libsvm.rb, line 66 def fetch_dataset_info(name) list = LIBSVMDatasetList.new available_datasets = [] list.each do |record| available_datasets << record.name if record.name == name return record end end message = "unavailable LIBSVM dataset: #{name.inspect}: " message << "available datasets: [" message << available_datasets.collect(&:inspect).join(", ") message << "]" raise ArgumentError, message end
normalize_name(name)
click to toggle source
# File lib/datasets/libsvm.rb, line 112 def normalize_name(name) name.gsub(/[()]/, "").gsub(/[ _;]+/, "-").downcase end
open_data(&block)
click to toggle source
# File lib/datasets/libsvm.rb, line 100 def open_data(&block) data_path = cache_dir_path + @file.name unless data_path.exist? download(data_path, @file.url) end if data_path.extname == ".bz2" extract_bz2(data_path, &block) else File.open(data_path, &block) end end
parse_label(label)
click to toggle source
# File lib/datasets/libsvm.rb, line 116 def parse_label(label) labels = label.split(",").collect do |value| parse_value(value) end if labels.size == 1 labels[0] else labels end end
parse_value(value)
click to toggle source
# File lib/datasets/libsvm.rb, line 127 def parse_value(value) if value.include?(".") Float(value) else Integer(value, 10) end end