class Daru::IO::Importers::CSV

CSV Importer Class, that extends `read_csv` method to `Daru::DataFrame`

Constants

CONVERTERS

Public Class Methods

new() click to toggle source

Checks for required gem dependencies of CSV Importer

# File lib/daru/io/importers/csv.rb, line 21
def initialize
  require 'csv'
  require 'open-uri'
  require 'zlib'
end

Public Instance Methods

call(headers: nil, skiprows: 0, compression: :infer, clone: nil, index: nil, order: nil, name: nil, **options) click to toggle source

Imports a `Daru::DataFrame` from a CSV Importer instance

@param headers [Boolean] If this option is `true`, only those columns

will be used to import the `Daru::DataFrame` whose header is given.

@param skiprows [Integer] Skips the first `:skiprows` number of rows from

the CSV file. Defaults to 0

@param compression [Symbol] Defaults to `:infer`, to parse depending on file format

like `.csv.gz`. For explicitly parsing data from a `.csv.gz` file, set
`:compression` as `:gzip`.

@param clone [Boolean] Have a look at `:clone` option

[here](http://www.rubydoc.info/gems/daru/0.1.5/Daru%2FDataFrame:initialize)

@param index [Array or Daru::Index or Daru::MultiIndex] Have a look at

`:index` option
[here](http://www.rubydoc.info/gems/daru/0.1.5/Daru%2FDataFrame:initialize)

@param order [Array or Daru::Index or Daru::MultiIndex] Have a look at

`:order` option
[here](http://www.rubydoc.info/gems/daru/0.1.5/Daru%2FDataFrame:initialize)

@param name [String] Have a look at `:name` option

[here](http://www.rubydoc.info/gems/daru/0.1.5/Daru%2FDataFrame:initialize)

@param options [Hash] CSV standard library options such as `:col_sep`

(defaults to `','`), `:converters` (defaults to `:numeric`),
`:header_converters` (defaults to `:symbol`).

@return [Daru::DataFrame]

@example Calling with csv options

df = instance.call(col_sep: ' ', headers: true)

#=> #<Daru::DataFrame(99x3)>
#        image_reso        mls true_trans
#      0    6.55779          0 -0.2362347
#      1    2.14746          0 -0.1539447
#      2    8.31104          0 0.3832846,
#      3    3.47872          0 0.3832846,
#      4    4.16725          0 -0.2362347
#      5    5.79983          0 -0.2362347
#      6     1.9058          0 -0.895577,
#      7     1.9058          0 -0.2362347
#      8    4.11806          0 -0.895577,
#      9    6.26622          0 -0.2362347
#     10    2.57805          0 -0.1539447
#     11    4.76151          0 -0.2362347
#     12    7.11002          0 -0.895577,
#     13    5.40811          0 -0.2362347
#     14    8.19567          0 -0.1539447
#    ...        ...        ...        ...

@example Calling with csv.gz options

df = instance.call(compression: :gzip, col_sep: ' ', headers: true)

#=> #<Daru::DataFrame(99x3)>
#        image_reso        mls true_trans
#      0    6.55779          0 -0.2362347
#      1    2.14746          0 -0.1539447
#      2    8.31104          0 0.3832846,
#      3    3.47872          0 0.3832846,
#      4    4.16725          0 -0.2362347
#      5    5.79983          0 -0.2362347
#      6     1.9058          0 -0.895577,
#      7     1.9058          0 -0.2362347
#      8    4.11806          0 -0.895577,
#      9    6.26622          0 -0.2362347
#     10    2.57805          0 -0.1539447
#     11    4.76151          0 -0.2362347
#     12    7.11002          0 -0.895577,
#     13    5.40811          0 -0.2362347
#     14    8.19567          0 -0.1539447
#    ...        ...        ...        ...
# File lib/daru/io/importers/csv.rb, line 115
def call(headers: nil, skiprows: 0, compression: :infer,
  clone: nil, index: nil, order: nil, name: nil, **options)
  init_opts(headers: headers, skiprows: skiprows, compression: compression,
            clone: clone, index: index, order: order, name: name, **options)
  process_compression

  # Preprocess headers for detecting and correcting repetition in
  # case the :headers option is not specified.
  hsh =
    if @headers
      hash_with_headers
    else
      hash_without_headers.tap { |hash| @daru_options[:order] = hash.keys }
    end

  Daru::DataFrame.new(hsh, @daru_options)
end
read(path) click to toggle source

Reads data from a csv / csv.gz file

@!method self.read(path)

@param path [String] Path to csv / csv.gz file, where the dataframe is to be imported

from.

@return [Daru::IO::Importers::CSV]

@example Reading from csv file

instance = Daru::IO::Importers::CSV.read("matrix_test.csv")

@example Reading from csv.gz file

instance = Daru::IO::Importers::CSV.read("matrix_test.csv.gz")
# File lib/daru/io/importers/csv.rb, line 41
def read(path)
  @path      = path
  @file_data = open(@path)
  self
end

Private Instance Methods

compression?(algorithm, *formats) click to toggle source
# File lib/daru/io/importers/csv.rb, line 135
def compression?(algorithm, *formats)
  @compression == algorithm || formats.any? { |f| @path.end_with?(f) }
end
hash_with_headers() { |c| ... } click to toggle source
# File lib/daru/io/importers/csv.rb, line 139
def hash_with_headers
  ::CSV
    .parse(@file_data, @options)
    .tap { |c| yield c if block_given? }
    .by_col
    .map do |col_name, values|
      [col_name, values.nil? ? [] : values[@skiprows..-1]]
    end
    .to_h
end
hash_without_headers() { |c| ... } click to toggle source
# File lib/daru/io/importers/csv.rb, line 150
def hash_without_headers
  csv_as_arrays =
    ::CSV
    .parse(@file_data, @options)
    .tap { |c| yield c if block_given? }
    .to_a
  headers       = ArrayHelper.recode_repeated(csv_as_arrays.shift)
  csv_as_arrays = csv_as_arrays[@skiprows..-1].transpose
  headers
    .each_with_index
    .map do |h, i|
      [h, csv_as_arrays[i] || []]
    end
    .to_h
end
init_opts(headers: nil, skiprows: 0, compression: :infer, clone: nil, index: nil, order: nil, name: nil, **options) click to toggle source
# File lib/daru/io/importers/csv.rb, line 166
def init_opts(headers: nil, skiprows: 0, compression: :infer,
  clone: nil, index: nil, order: nil, name: nil, **options)
  @headers      = headers
  @skiprows     = skiprows
  @compression  = compression
  @daru_options = {clone: clone, index: index, order: order, name: name}
  @options      = {
    col_sep: ',', converters: [:numeric], header_converters: :symbol,
    headers: @headers, skip_blanks: true
  }.merge(options)

  @options[:converters] = @options[:converters].flat_map do |c|
    next ::CSV::Converters[c] if ::CSV::Converters[c]
    next CONVERTERS[c] if CONVERTERS[c]
    c
  end
end
process_compression() click to toggle source
# File lib/daru/io/importers/csv.rb, line 184
def process_compression
  @file_data = ::Zlib::GzipReader.new(@file_data).read if compression?(:gzip, '.csv.gz')
end