module WorkerTools::CsvInput
Public Instance Methods
If an array is provided, the names will be used as the row keys, the row values will be assign according to the columns order.
Ex: %w(tenant segment area) row => {
tenant: _value_at_first_column_, segment: _value_at_second_column_, area: _value_at_third_column_
}
If a hash if provided, the keys will turn into the row keys, the values will be used to find the corresponding columns (the order in the csv won't affect the import)
Ex: { tenant: 'Mandant', segment: 'Segment', area: 'Bereich') row => {
tenant: _value_at_column_Mandant, segment: _value_at_column_Segment, area: _value_at_column_Bereich
}
The name of the column is filtered using the csv_input_header_normalized
method, which takes care of extra spaces and looks for a case insentive match (so 'Bereich' matches ' Bereich', 'bereich', etc.). You can override that method as well.
Besides matching the columns using strings, it is possible to use a regular expression or a proc: {
tenant: 'Mandant', segment: /Segment/i, area: ->(name) { name.downcase == 'area' }
}
# File lib/worker_tools/csv_input.rb, line 38 def csv_input_columns raise "csv_input_columns has to be defined in #{self}" end
# File lib/worker_tools/csv_input.rb, line 61 def csv_input_columns_array_check(csv_rows_enum) expected_columns_length = csv_input_columns.length actual_columns_length = csv_rows_enum.first.length return if expected_columns_length == actual_columns_length raise "The number of columns (#{actual_columns_length}) is not the expected (#{expected_columns_length})" end
# File lib/worker_tools/csv_input.rb, line 54 def csv_input_columns_check(csv_rows_enum) # override and return true if you do not want this check to be performed return csv_input_columns_array_check(csv_rows_enum) if csv_input_columns.is_a?(Array) csv_input_columns_hash_check(csv_rows_enum) end
# File lib/worker_tools/csv_input.rb, line 69 def csv_input_columns_hash_check(csv_rows_enum) expected_names = csv_input_columns.values filtered_actual_names = csv_rows_enum.first.map { |n| csv_input_header_normalized(n) } csv_input_columns_hash_check_duplicates(filtered_actual_names) csv_input_columns_hash_check_missing(filtered_actual_names, expected_names) end
# File lib/worker_tools/csv_input.rb, line 76 def csv_input_columns_hash_check_duplicates(names) dups = names.group_by(&:itself).select { |_, v| v.count > 1 }.keys raise "The file contains duplicated columns: #{dups}" if dups.present? end
# File lib/worker_tools/csv_input.rb, line 81 def csv_input_columns_hash_check_missing(actual_names, expected_names) missing = expected_names.reject do |name| matchable = name.is_a?(String) ? csv_input_header_normalized(name) : name actual_names.any? { |n| case n when matchable then true end } # rubocop does not like === end raise "Some columns are missing: #{missing}" unless missing.empty? end
# File lib/worker_tools/csv_input.rb, line 89 def csv_input_csv_options # Ex: { col_sep: ';', encoding: Encoding::ISO_8859_1 } { col_sep: ';' } end
# File lib/worker_tools/csv_input.rb, line 133 def csv_input_file_path model.attachment.path.to_s end
# File lib/worker_tools/csv_input.rb, line 145 def csv_input_foreach @csv_input_foreach ||= begin csv_input_columns_check(csv_rows_enum) CsvInputForeach.new( rows_enum: csv_rows_enum, input_columns: csv_input_columns, mapping_order: csv_input_mapping_order(csv_rows_enum.first), cleanup_method: method(:cvs_input_value_cleanup), headers_present: csv_input_headers_present ) end end
# File lib/worker_tools/csv_input.rb, line 98 def csv_input_header_normalize? true end
# File lib/worker_tools/csv_input.rb, line 42 def csv_input_header_normalized(name) name = name.to_s.strip name = name.downcase if csv_input_header_normalize? name end
# File lib/worker_tools/csv_input.rb, line 141 def csv_input_headers_present true end
# File lib/worker_tools/csv_input.rb, line 94 def csv_input_include_other_columns false end
Compares the first row (header names) with the csv_input_columns
hash to find the corresponding positions.
Ex: csv_input_columns
: {tenant: 'Mandant', area: 'Bereich'}
headers: ['Bereich', 'Mandant'] => { tenant: 1, area: 0}
# File lib/worker_tools/csv_input.rb, line 108 def csv_input_mapping_order(header_names) return csv_input_columns.map.with_index { |n, i| [n, i] }.to_h if csv_input_columns.is_a?(Array) csv_input_mapping_order_for_hash(header_names) end
# File lib/worker_tools/csv_input.rb, line 114 def csv_input_mapping_order_for_hash(header_names) filtered_column_names = header_names.map { |n| csv_input_header_normalized(n) } mapping = csv_input_columns.each_with_object({}) do |(k, v), h| matchable = v.is_a?(String) ? csv_input_header_normalized(v) : v h[k] = filtered_column_names.index { |n| case n when matchable then true end } end return mapping unless csv_input_include_other_columns csv_input_mapping_order_with_other_columns(mapping, filtered_column_names) end
# File lib/worker_tools/csv_input.rb, line 125 def csv_input_mapping_order_with_other_columns(mapping, filtered_column_names) positions_taken = mapping.values filtered_column_names.each_with_index do |header, index| mapping[header.to_sym] = index unless positions_taken.include?(index) end mapping end
# File lib/worker_tools/csv_input.rb, line 137 def csv_rows_enum @csv_rows_enum ||= CSV.foreach(csv_input_file_path, **csv_input_csv_options) end
Allows for some basic cleanup of the values, such as applying strip to the strings.
# File lib/worker_tools/csv_input.rb, line 50 def cvs_input_value_cleanup(value) value.is_a?(String) ? value.strip : value end