class Daru::IO::Importers::CSV
CSV
Importer Class, that extends `read_csv` method to `Daru::DataFrame`
Constants
- CONVERTERS
Public Class Methods
Checks for required gem dependencies of CSV
Importer
# File lib/daru/io/importers/csv.rb, line 21 def initialize require 'csv' require 'open-uri' require 'zlib' end
Public Instance Methods
Imports a `Daru::DataFrame` from a CSV
Importer instance
@param headers [Boolean] If this option is `true`, only those columns
will be used to import the `Daru::DataFrame` whose header is given.
@param skiprows [Integer] Skips the first `:skiprows` number of rows from
the CSV file. Defaults to 0
@param compression [Symbol] Defaults to `:infer`, to parse depending on file format
like `.csv.gz`. For explicitly parsing data from a `.csv.gz` file, set `:compression` as `:gzip`.
@param clone [Boolean] Have a look at `:clone` option
[here](http://www.rubydoc.info/gems/daru/0.1.5/Daru%2FDataFrame:initialize)
@param index [Array or Daru::Index or Daru::MultiIndex] Have a look at
`:index` option [here](http://www.rubydoc.info/gems/daru/0.1.5/Daru%2FDataFrame:initialize)
@param order [Array or Daru::Index or Daru::MultiIndex] Have a look at
`:order` option [here](http://www.rubydoc.info/gems/daru/0.1.5/Daru%2FDataFrame:initialize)
@param name [String] Have a look at `:name` option
[here](http://www.rubydoc.info/gems/daru/0.1.5/Daru%2FDataFrame:initialize)
@param options [Hash] CSV
standard library options such as `:col_sep`
(defaults to `','`), `:converters` (defaults to `:numeric`), `:header_converters` (defaults to `:symbol`).
@return [Daru::DataFrame]
@example Calling with csv options
df = instance.call(col_sep: ' ', headers: true) #=> #<Daru::DataFrame(99x3)> # image_reso mls true_trans # 0 6.55779 0 -0.2362347 # 1 2.14746 0 -0.1539447 # 2 8.31104 0 0.3832846, # 3 3.47872 0 0.3832846, # 4 4.16725 0 -0.2362347 # 5 5.79983 0 -0.2362347 # 6 1.9058 0 -0.895577, # 7 1.9058 0 -0.2362347 # 8 4.11806 0 -0.895577, # 9 6.26622 0 -0.2362347 # 10 2.57805 0 -0.1539447 # 11 4.76151 0 -0.2362347 # 12 7.11002 0 -0.895577, # 13 5.40811 0 -0.2362347 # 14 8.19567 0 -0.1539447 # ... ... ... ...
@example Calling with csv.gz options
df = instance.call(compression: :gzip, col_sep: ' ', headers: true) #=> #<Daru::DataFrame(99x3)> # image_reso mls true_trans # 0 6.55779 0 -0.2362347 # 1 2.14746 0 -0.1539447 # 2 8.31104 0 0.3832846, # 3 3.47872 0 0.3832846, # 4 4.16725 0 -0.2362347 # 5 5.79983 0 -0.2362347 # 6 1.9058 0 -0.895577, # 7 1.9058 0 -0.2362347 # 8 4.11806 0 -0.895577, # 9 6.26622 0 -0.2362347 # 10 2.57805 0 -0.1539447 # 11 4.76151 0 -0.2362347 # 12 7.11002 0 -0.895577, # 13 5.40811 0 -0.2362347 # 14 8.19567 0 -0.1539447 # ... ... ... ...
# File lib/daru/io/importers/csv.rb, line 115 def call(headers: nil, skiprows: 0, compression: :infer, clone: nil, index: nil, order: nil, name: nil, **options) init_opts(headers: headers, skiprows: skiprows, compression: compression, clone: clone, index: index, order: order, name: name, **options) process_compression # Preprocess headers for detecting and correcting repetition in # case the :headers option is not specified. hsh = if @headers hash_with_headers else hash_without_headers.tap { |hash| @daru_options[:order] = hash.keys } end Daru::DataFrame.new(hsh, @daru_options) end
Reads data from a csv / csv.gz file
@!method self.read(path)
@param path [String] Path to csv / csv.gz file, where the dataframe is to be imported
from.
@return [Daru::IO::Importers::CSV]
@example Reading from csv file
instance = Daru::IO::Importers::CSV.read("matrix_test.csv")
@example Reading from csv.gz file
instance = Daru::IO::Importers::CSV.read("matrix_test.csv.gz")
# File lib/daru/io/importers/csv.rb, line 41 def read(path) @path = path @file_data = open(@path) self end
Private Instance Methods
# File lib/daru/io/importers/csv.rb, line 135 def compression?(algorithm, *formats) @compression == algorithm || formats.any? { |f| @path.end_with?(f) } end
# File lib/daru/io/importers/csv.rb, line 139 def hash_with_headers ::CSV .parse(@file_data, @options) .tap { |c| yield c if block_given? } .by_col .map do |col_name, values| [col_name, values.nil? ? [] : values[@skiprows..-1]] end .to_h end
# File lib/daru/io/importers/csv.rb, line 150 def hash_without_headers csv_as_arrays = ::CSV .parse(@file_data, @options) .tap { |c| yield c if block_given? } .to_a headers = ArrayHelper.recode_repeated(csv_as_arrays.shift) csv_as_arrays = csv_as_arrays[@skiprows..-1].transpose headers .each_with_index .map do |h, i| [h, csv_as_arrays[i] || []] end .to_h end
# File lib/daru/io/importers/csv.rb, line 166 def init_opts(headers: nil, skiprows: 0, compression: :infer, clone: nil, index: nil, order: nil, name: nil, **options) @headers = headers @skiprows = skiprows @compression = compression @daru_options = {clone: clone, index: index, order: order, name: name} @options = { col_sep: ',', converters: [:numeric], header_converters: :symbol, headers: @headers, skip_blanks: true }.merge(options) @options[:converters] = @options[:converters].flat_map do |c| next ::CSV::Converters[c] if ::CSV::Converters[c] next CONVERTERS[c] if CONVERTERS[c] c end end
# File lib/daru/io/importers/csv.rb, line 184 def process_compression @file_data = ::Zlib::GzipReader.new(@file_data).read if compression?(:gzip, '.csv.gz') end