class Daru::IO::Importers::Excelx

Excelx Importer Class, that handles .xlsx files in the Excel Importer

@see Daru::IO::Importers::Excel For .xls format

Public Class Methods

new() click to toggle source

Checks for required gem dependencies of Excelx Importer

# File lib/daru/io/importers/excelx.rb, line 11
def initialize
  optional_gem 'roo', '~> 2.7.0'
end

Public Instance Methods

call(sheet: 0, skiprows: 0, skipcols: 0, order: true, index: false) click to toggle source

Imports a `Daru::DataFrame` from an Excelx Importer instance

@param sheet [Integer or String] Imports from a specific sheet @param skiprows [Integer] Skips the first `:skiprows` number of rows from the

sheet being parsed.

@param skipcols [Integer] Skips the first `:skipcols` number of columns from the

sheet being parsed.

@param order [Boolean] Defaults to true. When set to true, first row of the

given sheet is used as the order of the Daru::DataFrame and data of
the Dataframe consists of the remaining rows.

@param index [Boolean] Defaults to false. When set to true, first column of the

given sheet is used as the index of the Daru::DataFrame and data of
the Dataframe consists of the remaining columns.

When set to false, a default order (0 to n-1) is chosen for the DataFrame,
and the data of the DataFrame consists of all rows in the sheet.

@return [Daru::DataFrame]

@example Importing from specific sheet

df = local_instance.call(sheet: 'Example Stock Counts')

#=> <Daru::DataFrame(15x7)>
#           Status Stock coun  Item code        New Descriptio Stock coun Offset G/L
#     0          H          1        nil        nil New stock  2014-08-01        nil
#     1        nil          1  IND300654          2 New stock  2014-08-01      51035
#     2        nil          1   IND43201          5 New stock  2014-08-01      51035
#     3        nil          1   OUT30045          3 New stock  2014-08-01      51035
#    ...       ...        ...     ...           ...     ...       ...           ...

@example Importing from a remote URL and default sheet

df = remote_instance.call

#=> <Daru::DataFrame(15x7)>
#           Status Stock coun  Item code        New Descriptio Stock coun Offset G/L
#     0          H          1        nil        nil New stock  2014-08-01        nil
#     1        nil          1  IND300654          2 New stock  2014-08-01      51035
#     2        nil          1   IND43201          5 New stock  2014-08-01      51035
#     3        nil          1   OUT30045          3 New stock  2014-08-01      51035
#    ...       ...        ...     ...           ...     ...       ...           ...

@example Importing without headers

df = local_instance.call(sheet: 'Example Stock Counts', headers: false)

#=> <Daru::DataFrame(16x7)>
#                0           1          2          3          4          5        6
#     0      Status Stock coun  Item code        New Descriptio Stock coun Offset G/L
#     1          H          1        nil        nil New stock  2014-08-01        nil
#     2        nil          1  IND300654          2 New stock  2014-08-01      51035
#     3        nil          1   IND43201          5 New stock  2014-08-01      51035
#     4        nil          1   OUT30045          3 New stock  2014-08-01      51035
#    ...       ...        ...     ...           ...     ...       ...           ...
# File lib/daru/io/importers/excelx.rb, line 87
def call(sheet: 0, skiprows: 0, skipcols: 0, order: true, index: false)
  @order    = order
  @index    = index
  worksheet = @file_data.sheet(sheet)
  @data     = strip_html_tags(skip_data(worksheet.to_a, skiprows, skipcols))
  @index    = process_index
  @order    = process_order || (0..@data.first.length-1)
  @data     = process_data

  Daru::DataFrame.rows(@data, order: @order, index: @index)
end
read(path) click to toggle source

Reads from an excelx (xlsx) file

@!method self.read(path)

@param path [String] Local / Remote path of xlsx file, where the DataFrame is

to be imported from.

@return [Daru::IO::Importers::Excelx]

@example Reading from a local xlsx file

local_instance = Daru::IO::Importers::Excelx.read("Stock-counts-sheet.xlsx")

@example Reading from a remote xlsx file

url = "https://www.exact.com/uk/images/downloads/getting-started-excel-sheets/Stock-counts-sheet.xlsx"
remote_instance = Daru::IO::Importers::Excelx.read(url)
# File lib/daru/io/importers/excelx.rb, line 30
def read(path)
  @file_data = Roo::Excelx.new(path)
  self
end

Private Instance Methods

process_data() click to toggle source
# File lib/daru/io/importers/excelx.rb, line 101
def process_data
  return skip_data(@data, 1, 1) if @order && @index
  return skip_data(@data, 1, 0) if @order
  return skip_data(@data, 0, 1) if @index
  @data
end
process_index() click to toggle source
# File lib/daru/io/importers/excelx.rb, line 108
def process_index
  return nil unless @index
  @index = @data.transpose.first
  @index = skip_data(@index, 1) if @order
  @index
end
process_order() click to toggle source
# File lib/daru/io/importers/excelx.rb, line 115
def process_order
  return nil unless @order
  @order = @data.first
  @order = skip_data(@order, 1) if @index
  @order
end
skip_data(data, rows, cols=nil) click to toggle source
# File lib/daru/io/importers/excelx.rb, line 122
def skip_data(data, rows, cols=nil)
  return data[rows..-1].map { |row| row[cols..-1] } unless cols.nil?
  data[rows..-1]
end
strip_html_tags(data) click to toggle source
# File lib/daru/io/importers/excelx.rb, line 127
def strip_html_tags(data)
  data.map do |row|
    row.map do |ele|
      next ele unless ele.is_a?(String)
      ele.gsub(/<[^>]+>/, '')
    end
  end
end