class Datasets::LIBSVMDatasetList

Constants

File

Public Class Methods

new() click to toggle source
Calls superclass method Datasets::Dataset::new
# File lib/datasets/libsvm-dataset-list.rb, line 26
def initialize
  super()
  @metadata.id = "libsvm-dataset-list"
  @metadata.name = "LIBSVM dataset list"
  @metadata.url = "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/"
  @metadata.description = lambda do
    extract_description
  end
end

Public Instance Methods

each() { |record| ... } click to toggle source
# File lib/datasets/libsvm-dataset-list.rb, line 36
def each(&block)
  return to_enum(__method__) unless block_given?

  open_data do |input|
    catch do |abort_tag|
      listener = IndexListener.new(abort_tag) do |href, record|
        parse_detail(href, record)
        yield(record)
      end
      parser = REXML::Parsers::StreamParser.new(input, listener)
      parser.parse
    end
  end
end

Private Instance Methods

extract_description() click to toggle source
# File lib/datasets/libsvm-dataset-list.rb, line 62
def extract_description
  open_data do |input|
    description = []
    catch do |abort_tag|
      listener = DescriptionListener.new(abort_tag, description)
      parser = REXML::Parsers::StreamParser.new(input, listener)
      parser.parse
    end
    description.join("\n\n")
  end
end
extract_text(element) click to toggle source
# File lib/datasets/libsvm-dataset-list.rb, line 74
def extract_text(element)
  texts = REXML::XPath.match(element, ".//text()")
  texts.join("").gsub(/[ \t\n]+/, " ").strip
end
open_data() { |input| ... } click to toggle source
# File lib/datasets/libsvm-dataset-list.rb, line 52
def open_data
  data_path = cache_dir_path + "index.html"
  unless data_path.exist?
    download(data_path, @metadata.url)
  end
  ::File.open(data_path) do |input|
    yield(input)
  end
end
open_detail(detail) { |input| ... } click to toggle source
# File lib/datasets/libsvm-dataset-list.rb, line 79
def open_detail(detail)
  data_path = cache_dir_path + detail
  unless data_path.exist?
    download(data_path, @metadata.url + detail)
  end
  ::File.open(data_path) do |input|
    yield(input)
  end
end
parse_detail(href, record) click to toggle source
# File lib/datasets/libsvm-dataset-list.rb, line 89
def parse_detail(href, record)
  path, id = href.split("#")
  open_detail(path) do |input|
    catch do |abort_tag|
      listener = DetailListener.new(abort_tag, id, @metadata.url, record)
      parser = REXML::Parsers::StreamParser.new(input, listener)
      parser.parse
    end
  end
end