module SpreadsheetImporter::Import

Public Class Methods

from_csv(file_path, options = {}, &block) click to toggle source
# File lib/spreadsheet_importer/import.rb, line 13
def self.from_csv(file_path, options = {}, &block)
  # Detect the encoding of the file and normalize it to UTF-8
  csv = File.read(file_path)
  encoding = CharlockHolmes::EncodingDetector.detect(csv)[:encoding]
  csv = CharlockHolmes::Converter.convert csv, encoding, 'UTF-8'

  # Get rid of the UTF-16LE BOM since Charlock doesn't do this for us
  csv.slice!(0) if csv[0].ord == 65279

  # Determine whether the column separator is a tab or comma
  col_sep = csv.count("\t") > 0 ? "\t" : ","

  spreadsheet = CSV.parse(csv, :col_sep => col_sep, :headers => true, :header_converters => :downcase).to_a
  from_spreadsheet(spreadsheet, options, &block)
end
from_spreadsheet(spreadsheet, options = {}, &block) click to toggle source

Returns 2D array of the spreadsheet, rows by columns If a block is given, yields each row to the block Exceptions during the iteration will collected along with their row number and re-raised at the end of processing

# File lib/spreadsheet_importer/import.rb, line 33
def self.from_spreadsheet(spreadsheet, options = {}, &block)
  options = {:start_row => 1, :schema => nil}.merge(options)

  # Remove intro rows
  (options[:start_row] - 1).times { spreadsheet.shift }

  headers = spreadsheet.first
  if options[:required_columns]
    assert_required_columns!(headers, options[:required_columns])
  end

  if options[:schema] # If a Conformist schema is provided, use that to prepare rows
    rows = options[:schema].conform(spreadsheet, :skip_first => true)
  else
    rows = spreadsheet
  end

  # Create an enumerator the standardizes the output from conformist or 2D array spreadsheet
  errors = []
  spreadsheet = Spreadsheet.new do |yielder|
    row_number = options[:start_row]
    rows.each_with_index do |row, index|
      begin
        yielder.yield row_to_attributes(row, headers), index, row_number
      rescue => e
        errors << "Row #{row_number}: #{e.message}\n#{e.backtrace.join("\n")}"
      end
      row_number += 1
    end
  end
  spreadsheet.errors = errors
  spreadsheet.each(&block) if block_given?

  return spreadsheet
end
from_xlsx(file_path, options = {}, &block) click to toggle source
# File lib/spreadsheet_importer/import.rb, line 3
def self.from_xlsx(file_path, options = {}, &block)
  options = {:sheet_name => nil}.merge(options)

  spreadsheet = []
  Roo::Excelx.new(file_path, :file_warning => :ignore).each_with_pagename do |name, sheet|
    spreadsheet.concat sheet.to_a unless options[:sheet_name] && name.downcase.strip != options[:sheet_name].downcase.strip
  end
  from_spreadsheet(spreadsheet, options, &block)
end

Private Class Methods

assert_required_columns!(headers, required_columns) click to toggle source
# File lib/spreadsheet_importer/import.rb, line 71
def self.assert_required_columns!(headers, required_columns)
  required_columns.each do |column_name|
    raise MissingRequiredColumn, "Spreadsheet must include a '#{column_name}' column" unless column_present?(headers, column_name)
  end
end
column_present?(headers, column_name) click to toggle source
# File lib/spreadsheet_importer/import.rb, line 77
def self.column_present?(headers, column_name)
  headers.collect{|c| c.downcase.squish}.include?(column_name.downcase.squish)
end
row_to_attributes(row, headers) click to toggle source

Returns an attributes hash for the given row

# File lib/spreadsheet_importer/import.rb, line 82
def self.row_to_attributes(row, headers)
  case row
  when Array
    Hash[ItemSchema.attribute_names(headers).zip(row)]
  else # Handle Conformist::HashStruct rows
    row.attributes
  end
end