module CSVTools

Constants

DEFAULT_CSV_READ_OPTS
VERSION

Public Class Methods

csv_filter(path, column_index, field_value, out = STDOUT) click to toggle source
# File lib/csv_tools.rb, line 82
def CSVTools.csv_filter(path, column_index, field_value, out = STDOUT)
  csv = CSV.read(path, DEFAULT_CSV_READ_OPTS)

  out.puts csv.headers.join(',')
  csv.select {|row| row.fields[column_index].downcase.include? field_value.downcase} .each do |row|
    out.puts row.fields.join(',')
  end
end
csv_join(path1, path2, join_by_column, out = STDOUT) click to toggle source
# File lib/csv_tools.rb, line 31
def CSVTools.csv_join(path1, path2, join_by_column, out = STDOUT)
  csv1, csv2 = [path1, path2].map {|p| CSV.read(p, DEFAULT_CSV_READ_OPTS)}

  # Make sure the join-by column exists in both CSV's
  index1, index2 = [csv1, csv2].map {|csv| csv.headers.index(join_by_column) }
  raise "#{path1} does not have a column '#{join_by_column}'" if index1.nil?
  raise "#{path2} does not have a column '#{join_by_column}'" if index2.nil?

  # Print the header line (join-by column is first, and appears only once)
  h1, h2 = [csv1.headers , csv2.headers]
  h1.delete_at(index1)
  h2.delete_at(index2)
  out.puts join_by_column + ',' + h1.join(",") + ',' + h2.join(',')

  # Print the data rows ...
  hash1 = group_by_column(csv1, index1)
  hash2 = group_by_column(csv2, index2)

  # And now ... JOIN the two CSV's (adding nil's for missing rows)
  (hash1.keys + hash2.keys).to_set.each do |key|
    rows1 = hash1[key] || [ [nil] * (csv1.headers.length - 1)]
    rows2 = hash2[key] || [ [nil] * (csv2.headers.length - 1)]

    rows1.each do |r1|
      rows2.each do |r2|
        out.puts key + ',' + r1.join(',') + ',' + r2.join(',')
      end
    end
  end

end
csv_select(path, column_indices, out = STDOUT) click to toggle source
# File lib/csv_tools.rb, line 72
def CSVTools.csv_select(path, column_indices, out = STDOUT)
  csv = CSV.read(path, DEFAULT_CSV_READ_OPTS)

  out.puts select_values(csv.headers, column_indices).join(',')
  csv.each do |row|
    out.puts select_values(row.fields, column_indices).join(',')
  end
end
group_by_column(csv_table, column_index) click to toggle source

@param csv_table [CSV::Table] @param column_index [integer] @return Hash[string –> Array of string-arrays]

# File lib/csv_tools.rb, line 16
def CSVTools.group_by_column(csv_table, column_index)
  result = {}

  csv_table.group_by {|row| row.fields[column_index]} .each do |key, row_objects|
    result[key] = row_objects.map  do |row|
      row.fields[0...column_index] + row.fields[column_index + 1...row.fields.length]
    end
  end

  return result
end
select_values(values, indices) click to toggle source
# File lib/csv_tools.rb, line 66
def CSVTools.select_values(values, indices)
  raise "Invalid indices #{indices} for #{values}" if indices.any? {|i| i < 0 || i >= values.length}
  return indices.map {|i| values[i]}
end