class Datasets::Rdatasets

Public Class Methods

new(package_name, dataset_name) click to toggle source
Calls superclass method Datasets::Dataset::new
# File lib/datasets/rdatasets.rb, line 59
def initialize(package_name, dataset_name)
  list = RdatasetsList.new

  info = list.filter(package: package_name, dataset: dataset_name).first
  unless info
    raise ArgumentError, "Unable to locate dataset #{package_name}/#{dataset_name}"
  end

  super()
  @metadata.id = "rdatasets-#{package_name}-#{dataset_name}"
  @metadata.name = "Rdatasets: #{package_name}: #{dataset_name}"
  @metadata.url = info.csv
  @metadata.licenses = ["GPL-3"]
  @metadata.description = info.title

  # Follow the original directory structure in the cache directory
  @data_path = cache_dir_path + (dataset_name + ".csv")

  @package_name = package_name
  @dataset_name = dataset_name
end

Public Instance Methods

each() { |record| ... } click to toggle source
# File lib/datasets/rdatasets.rb, line 81
def each(&block)
  return to_enum(__method__) unless block_given?

  download(@data_path, @metadata.url) unless @data_path.exist?
  CSV.open(@data_path, headers: :first_row, converters: :all) do |csv|
    csv.each do |row|
      record = row.to_h
      record.delete("")
      record.transform_keys!(&:to_sym)
      yield record
    end
  end
end