module WorkerTools::CsvInput

Public Instance Methods

csv_input_columns() click to toggle source

If an array is provided, the names will be used as the row keys, the row values will be assign according to the columns order.

Ex: %w(tenant segment area) row => {

tenant: _value_at_first_column_,
segment: _value_at_second_column_,
area: _value_at_third_column_

}

If a hash if provided, the keys will turn into the row keys, the values will be used to find the corresponding columns (the order in the csv won't affect the import)

Ex: { tenant: 'Mandant', segment: 'Segment', area: 'Bereich') row => {

tenant: _value_at_column_Mandant,
segment: _value_at_column_Segment,
area: _value_at_column_Bereich

}

The name of the column is filtered using the csv_input_header_normalized method, which takes care of extra spaces and looks for a case insentive match (so 'Bereich' matches ' Bereich', 'bereich', etc.). You can override that method as well.

Besides matching the columns using strings, it is possible to use a regular expression or a proc: {

tenant: 'Mandant',
segment: /Segment/i,
area: ->(name) { name.downcase == 'area' }

}

# File lib/worker_tools/csv_input.rb, line 38
def csv_input_columns
  raise "csv_input_columns has to be defined in #{self}"
end
csv_input_columns_array_check(csv_rows_enum) click to toggle source
# File lib/worker_tools/csv_input.rb, line 61
def csv_input_columns_array_check(csv_rows_enum)
  expected_columns_length = csv_input_columns.length
  actual_columns_length = csv_rows_enum.first.length
  return if expected_columns_length == actual_columns_length

  raise "The number of columns (#{actual_columns_length}) is not the expected (#{expected_columns_length})"
end
csv_input_columns_check(csv_rows_enum) click to toggle source
# File lib/worker_tools/csv_input.rb, line 54
def csv_input_columns_check(csv_rows_enum)
  # override and return true if you do not want this check to be performed
  return csv_input_columns_array_check(csv_rows_enum) if csv_input_columns.is_a?(Array)

  csv_input_columns_hash_check(csv_rows_enum)
end
csv_input_columns_hash_check(csv_rows_enum) click to toggle source
# File lib/worker_tools/csv_input.rb, line 69
def csv_input_columns_hash_check(csv_rows_enum)
  expected_names = csv_input_columns.values
  filtered_actual_names = csv_rows_enum.first.map { |n| csv_input_header_normalized(n) }
  csv_input_columns_hash_check_duplicates(filtered_actual_names)
  csv_input_columns_hash_check_missing(filtered_actual_names, expected_names)
end
csv_input_columns_hash_check_duplicates(names) click to toggle source
# File lib/worker_tools/csv_input.rb, line 76
def csv_input_columns_hash_check_duplicates(names)
  dups = names.group_by(&:itself).select { |_, v| v.count > 1 }.keys
  raise "The file contains duplicated columns: #{dups}" if dups.present?
end
csv_input_columns_hash_check_missing(actual_names, expected_names) click to toggle source
# File lib/worker_tools/csv_input.rb, line 81
def csv_input_columns_hash_check_missing(actual_names, expected_names)
  missing = expected_names.reject do |name|
    matchable = name.is_a?(String) ? csv_input_header_normalized(name) : name
    actual_names.any? { |n| case n when matchable then true end } # rubocop does not like ===
  end
  raise "Some columns are missing: #{missing}" unless missing.empty?
end
csv_input_csv_options() click to toggle source
# File lib/worker_tools/csv_input.rb, line 89
def csv_input_csv_options
  # Ex: { col_sep: ';', encoding: Encoding::ISO_8859_1 }
  { col_sep: ';' }
end
csv_input_file_path() click to toggle source
# File lib/worker_tools/csv_input.rb, line 133
def csv_input_file_path
  model.attachment.path.to_s
end
csv_input_foreach() click to toggle source
# File lib/worker_tools/csv_input.rb, line 145
def csv_input_foreach
  @csv_input_foreach ||= begin
    csv_input_columns_check(csv_rows_enum)

    CsvInputForeach.new(
      rows_enum: csv_rows_enum,
      input_columns: csv_input_columns,
      mapping_order: csv_input_mapping_order(csv_rows_enum.first),
      cleanup_method: method(:cvs_input_value_cleanup),
      headers_present: csv_input_headers_present
    )
  end
end
csv_input_header_normalize?() click to toggle source
# File lib/worker_tools/csv_input.rb, line 98
def csv_input_header_normalize?
  true
end
csv_input_header_normalized(name) click to toggle source
# File lib/worker_tools/csv_input.rb, line 42
def csv_input_header_normalized(name)
  name = name.to_s.strip
  name = name.downcase if csv_input_header_normalize?
  name
end
csv_input_headers_present() click to toggle source
# File lib/worker_tools/csv_input.rb, line 141
def csv_input_headers_present
  true
end
csv_input_include_other_columns() click to toggle source
# File lib/worker_tools/csv_input.rb, line 94
def csv_input_include_other_columns
  false
end
csv_input_mapping_order(header_names) click to toggle source

Compares the first row (header names) with the csv_input_columns hash to find the corresponding positions.

Ex: csv_input_columns: {tenant: 'Mandant', area: 'Bereich'}

headers: ['Bereich', 'Mandant']
=>  { tenant: 1, area: 0}
# File lib/worker_tools/csv_input.rb, line 108
def csv_input_mapping_order(header_names)
  return csv_input_columns.map.with_index { |n, i| [n, i] }.to_h if csv_input_columns.is_a?(Array)

  csv_input_mapping_order_for_hash(header_names)
end
csv_input_mapping_order_for_hash(header_names) click to toggle source
# File lib/worker_tools/csv_input.rb, line 114
def csv_input_mapping_order_for_hash(header_names)
  filtered_column_names = header_names.map { |n| csv_input_header_normalized(n) }
  mapping = csv_input_columns.each_with_object({}) do |(k, v), h|
    matchable = v.is_a?(String) ? csv_input_header_normalized(v) : v
    h[k] = filtered_column_names.index { |n| case n when matchable then true end }
  end
  return mapping unless csv_input_include_other_columns

  csv_input_mapping_order_with_other_columns(mapping, filtered_column_names)
end
csv_input_mapping_order_with_other_columns(mapping, filtered_column_names) click to toggle source
# File lib/worker_tools/csv_input.rb, line 125
def csv_input_mapping_order_with_other_columns(mapping, filtered_column_names)
  positions_taken = mapping.values
  filtered_column_names.each_with_index do |header, index|
    mapping[header.to_sym] = index unless positions_taken.include?(index)
  end
  mapping
end
csv_rows_enum() click to toggle source
# File lib/worker_tools/csv_input.rb, line 137
def csv_rows_enum
  @csv_rows_enum ||= CSV.foreach(csv_input_file_path, **csv_input_csv_options)
end
cvs_input_value_cleanup(value) click to toggle source

Allows for some basic cleanup of the values, such as applying strip to the strings.

# File lib/worker_tools/csv_input.rb, line 50
def cvs_input_value_cleanup(value)
  value.is_a?(String) ? value.strip : value
end