module DataMix

Constants

VERSION

Public Instance Methods

*(other) click to toggle source
# File lib/datamix/refinements/array.rb, line 11
def *(other)
  math_operation(other) { |left, right| left * right }
end
+(other) click to toggle source
# File lib/datamix/refinements/array.rb, line 7
def +(other)
  math_operation(other) { |left, right| left + right }
end
-(other) click to toggle source
# File lib/datamix/refinements/array.rb, line 3
def -(other)
  math_operation(other) { |left, right| left - right }
end
/(other) click to toggle source
# File lib/datamix/refinements/array.rb, line 15
def /(other)
  math_operation(other) { |left, right| left / right.to_f }
end
delete_empty_rows() click to toggle source

Delete all rows that have one or more empty or nil values.

# File lib/datamix/refinements/csv_table.rb, line 7
def delete_empty_rows
  delete_if do |row|
    row.fields.include? nil or row.include? ''
  end
end
derive() { |index| ... } click to toggle source

Create a new column using a block. This method yields the given block row by row, providing the index to the block and returns an array suitable for assigning to a new column.

# File lib/datamix/refinements/csv_table.rb, line 16
def derive(&_block)
  by_row.each_with_index.map do |_value, index|
    yield index
  end
end
extract(pattern, from:) click to toggle source

Extract a regular expression pattern from a column and return a new column.

# File lib/datamix/refinements/csv_table.rb, line 24
def extract(pattern, from:)
  by_row.map { |row| row[from][pattern] }
end
file(filename) click to toggle source
# File lib/datamix/refinements/object.rb, line 3
def file(filename)
  CSV.table filename
end
iterate() { |index| ... } click to toggle source

Iterate over all rows, providing the index to the block.

# File lib/datamix/refinements/csv_table.rb, line 29
def iterate(&_block)
  each_with_index do |_value, index|
    yield index
  end
end
join(other, on:) click to toggle source

Join columns from another data table based on a mutual column

# File lib/datamix/refinements/csv_table.rb, line 36
def join(other, on:)
  raise CSVError, "No such column '#{on}' in source" unless headers.include? on
  raise CSVError, "No such column '#{on}' in other" unless other.headers.include? on
  raise CSVError, "source[#{on}] is not unique" unless by_col[on].uniq?
  raise CSVError, "other[#{on}] is not unique" unless other.by_col[on].uniq?

  original_headers = headers.dup

  by_row.each do |row|
    other_row = other.find { |r| r[on] == row[on] }
    other.headers.each do |col|
      next if col == on
      new_col = original_headers.include?(col) ? "_#{col}" : col
      row[new_col] = other_row ? other_row[col] : nil
    end
  end
end
keep(*desired_cols) click to toggle source

Keep one or more columns, and remove the rest

# File lib/datamix/refinements/csv_table.rb, line 55
def keep(*desired_cols)
  headers.each do |col|
    delete col unless desired_cols.include? col
  end
end
math_operation(other) { |val, other| ... } click to toggle source
# File lib/datamix/refinements/array.rb, line 101
def math_operation(other)
  copy = dup
  if other.respond_to? :each
    each_with_index do |val, index|
      copy[index] = other[index] ? yield(val, other[index]) : nil
    end
    copy
  else
    copy.map { |val| yield(val, other) }
  end
end
next(rows=1) click to toggle source
# File lib/datamix/refinements/array.rb, line 32
def next(rows=1)
  offset -rows
end
offset(rows) click to toggle source
# File lib/datamix/refinements/array.rb, line 19
def offset(rows)
  padding = Array.new rows.abs
  if rows >= 0
    Array.new(padding).concat self[0...(self.size-rows)]
  else
    dup[(rows.abs)...(self.size)].concat Array.new(padding)
  end
end
prev(rows=1) click to toggle source
# File lib/datamix/refinements/array.rb, line 28
def prev(rows=1)
  offset rows
end
preview() click to toggle source

Print the first 10 lines

# File lib/datamix/refinements/csv_table.rb, line 62
def preview
  show 10
end
remove(*desired_cols) click to toggle source

Remove one or more columns

# File lib/datamix/refinements/csv_table.rb, line 67
def remove(*desired_cols)
  desired_cols.each do |col|
    delete col
  end
end
rename(from, to:) click to toggle source

Rename a column

# File lib/datamix/refinements/csv_table.rb, line 74
def rename(from, to:)
  by_col[to] = by_col[from]
  delete from
end
resample(chunk_range, seed: nil) click to toggle source
# File lib/datamix/refinements/array.rb, line 36
def resample(chunk_range, seed: nil)
  generator = seed ? Random.new(seed) : Random.new

  # Save min value, we will adjust the result to it later
  min_value = min

  # Split the array to chunks
  chunks = []
  clone = dup
  while !clone.empty? do
    seam = generator.rand(chunk_range) - 1
    chunks.push clone.slice! 0..seam
  end

  # If the last chunk contains one element only, merge with the
  # previous chunk
  if chunks.last.size == 1
    chunks[chunks.size-2].push chunks.last.first
    chunks = chunks.first chunks.size-1
  end

  # Shuffle the chunks
  chunks = chunks.sample chunks.size, random: generator

  # Adjust each chunk so that its beginning connects with the previous
  # chunk normally. For this we calculate the median change value of the
  # array.
  chunks.each_with_index do |chunk, i|
    next if i == 0
    connector = chunks[i-1].last

    # Calculate an array of changes, and then take a random sample
    diff = chunk.each_cons(2).map { |a,b| b-a }.sample random: generator

    delta = diff + connector - chunk.first
    chunks[i].map! { |val| val+delta }
  end

  # Merge chunks to a flat array
  result = chunks.flatten

  # Move the entire array up or down so that its min value is equal to
  # the original min value recorded at the beginning.
  diff = result.min - min_value
  result.map! { |val| val-diff }
end
round(decimals=0) click to toggle source
# File lib/datamix/refinements/array.rb, line 83
def round(decimals=0)
  map { |val| val ? val.round(decimals) : nil }
end
save_as(filename) click to toggle source

Save to a CSV or TSV file

# File lib/datamix/refinements/csv_table.rb, line 96
def save_as(filename)
  ext = File.extname(filename).downcase
  data = ext == '.csv' ? to_s : to_tsv
  File.write filename, data
end
show(rows=:all) click to toggle source

Print some or all rows

# File lib/datamix/refinements/csv_table.rb, line 103
def show(rows=:all)
  puts to_ascii rows
end
to_ascii(rows=:all) click to toggle source

Returns a table string

# File lib/datamix/refinements/csv_table.rb, line 108
def to_ascii(rows=:all)
  table = rows == :all ? by_row : first(rows)
  rows = table.map { |row| row.fields }
  table = TTY::Table.new headers, rows
  result = table.render :ascii, padding: [0,1]
  "#{result}\n"
end
to_tsv() click to toggle source

Convert table to a TSV string

# File lib/datamix/refinements/csv_table.rb, line 117
def to_tsv
  result = [headers.join( "\t")]
  self.each do |row|
    result << row.fields.join("\t")
  end
  result.join "\n"
end
uniq?() click to toggle source
# File lib/datamix/refinements/array.rb, line 87
def uniq?
  self.length == self.uniq.length
end
window(window_size) { |self| ... } click to toggle source
# File lib/datamix/refinements/array.rb, line 91
def window(window_size, &_block)
  result = (0..(size-window_size)).map do |index|
    yield self[index...(index+window_size)]
  end

  Array.new(window_size-1).concat result
end