module DataMix
Constants
- VERSION
Public Instance Methods
*(other)
click to toggle source
# File lib/datamix/refinements/array.rb, line 11 def *(other) math_operation(other) { |left, right| left * right } end
+(other)
click to toggle source
# File lib/datamix/refinements/array.rb, line 7 def +(other) math_operation(other) { |left, right| left + right } end
-(other)
click to toggle source
# File lib/datamix/refinements/array.rb, line 3 def -(other) math_operation(other) { |left, right| left - right } end
/(other)
click to toggle source
# File lib/datamix/refinements/array.rb, line 15 def /(other) math_operation(other) { |left, right| left / right.to_f } end
delete_empty_rows()
click to toggle source
Delete all rows that have one or more empty or nil values.
# File lib/datamix/refinements/csv_table.rb, line 7 def delete_empty_rows delete_if do |row| row.fields.include? nil or row.include? '' end end
derive() { |index| ... }
click to toggle source
Create a new column using a block. This method yields the given block row by row, providing the index to the block and returns an array suitable for assigning to a new column.
# File lib/datamix/refinements/csv_table.rb, line 16 def derive(&_block) by_row.each_with_index.map do |_value, index| yield index end end
extract(pattern, from:)
click to toggle source
Extract a regular expression pattern from a column and return a new column.
# File lib/datamix/refinements/csv_table.rb, line 24 def extract(pattern, from:) by_row.map { |row| row[from][pattern] } end
file(filename)
click to toggle source
# File lib/datamix/refinements/object.rb, line 3 def file(filename) CSV.table filename end
iterate() { |index| ... }
click to toggle source
Iterate over all rows, providing the index to the block.
# File lib/datamix/refinements/csv_table.rb, line 29 def iterate(&_block) each_with_index do |_value, index| yield index end end
join(other, on:)
click to toggle source
Join columns from another data table based on a mutual column
# File lib/datamix/refinements/csv_table.rb, line 36 def join(other, on:) raise CSVError, "No such column '#{on}' in source" unless headers.include? on raise CSVError, "No such column '#{on}' in other" unless other.headers.include? on raise CSVError, "source[#{on}] is not unique" unless by_col[on].uniq? raise CSVError, "other[#{on}] is not unique" unless other.by_col[on].uniq? original_headers = headers.dup by_row.each do |row| other_row = other.find { |r| r[on] == row[on] } other.headers.each do |col| next if col == on new_col = original_headers.include?(col) ? "_#{col}" : col row[new_col] = other_row ? other_row[col] : nil end end end
keep(*desired_cols)
click to toggle source
Keep one or more columns, and remove the rest
# File lib/datamix/refinements/csv_table.rb, line 55 def keep(*desired_cols) headers.each do |col| delete col unless desired_cols.include? col end end
math_operation(other) { |val, other| ... }
click to toggle source
# File lib/datamix/refinements/array.rb, line 101 def math_operation(other) copy = dup if other.respond_to? :each each_with_index do |val, index| copy[index] = other[index] ? yield(val, other[index]) : nil end copy else copy.map { |val| yield(val, other) } end end
next(rows=1)
click to toggle source
# File lib/datamix/refinements/array.rb, line 32 def next(rows=1) offset -rows end
offset(rows)
click to toggle source
# File lib/datamix/refinements/array.rb, line 19 def offset(rows) padding = Array.new rows.abs if rows >= 0 Array.new(padding).concat self[0...(self.size-rows)] else dup[(rows.abs)...(self.size)].concat Array.new(padding) end end
prev(rows=1)
click to toggle source
# File lib/datamix/refinements/array.rb, line 28 def prev(rows=1) offset rows end
preview()
click to toggle source
Print the first 10 lines
# File lib/datamix/refinements/csv_table.rb, line 62 def preview show 10 end
remove(*desired_cols)
click to toggle source
Remove one or more columns
# File lib/datamix/refinements/csv_table.rb, line 67 def remove(*desired_cols) desired_cols.each do |col| delete col end end
rename(from, to:)
click to toggle source
Rename a column
# File lib/datamix/refinements/csv_table.rb, line 74 def rename(from, to:) by_col[to] = by_col[from] delete from end
resample(chunk_range, seed: nil)
click to toggle source
# File lib/datamix/refinements/array.rb, line 36 def resample(chunk_range, seed: nil) generator = seed ? Random.new(seed) : Random.new # Save min value, we will adjust the result to it later min_value = min # Split the array to chunks chunks = [] clone = dup while !clone.empty? do seam = generator.rand(chunk_range) - 1 chunks.push clone.slice! 0..seam end # If the last chunk contains one element only, merge with the # previous chunk if chunks.last.size == 1 chunks[chunks.size-2].push chunks.last.first chunks = chunks.first chunks.size-1 end # Shuffle the chunks chunks = chunks.sample chunks.size, random: generator # Adjust each chunk so that its beginning connects with the previous # chunk normally. For this we calculate the median change value of the # array. chunks.each_with_index do |chunk, i| next if i == 0 connector = chunks[i-1].last # Calculate an array of changes, and then take a random sample diff = chunk.each_cons(2).map { |a,b| b-a }.sample random: generator delta = diff + connector - chunk.first chunks[i].map! { |val| val+delta } end # Merge chunks to a flat array result = chunks.flatten # Move the entire array up or down so that its min value is equal to # the original min value recorded at the beginning. diff = result.min - min_value result.map! { |val| val-diff } end
round(decimals=0)
click to toggle source
# File lib/datamix/refinements/array.rb, line 83 def round(decimals=0) map { |val| val ? val.round(decimals) : nil } end
save_as(filename)
click to toggle source
Save to a CSV or TSV file
# File lib/datamix/refinements/csv_table.rb, line 96 def save_as(filename) ext = File.extname(filename).downcase data = ext == '.csv' ? to_s : to_tsv File.write filename, data end
show(rows=:all)
click to toggle source
Print some or all rows
# File lib/datamix/refinements/csv_table.rb, line 103 def show(rows=:all) puts to_ascii rows end
to_ascii(rows=:all)
click to toggle source
Returns a table string
# File lib/datamix/refinements/csv_table.rb, line 108 def to_ascii(rows=:all) table = rows == :all ? by_row : first(rows) rows = table.map { |row| row.fields } table = TTY::Table.new headers, rows result = table.render :ascii, padding: [0,1] "#{result}\n" end
to_tsv()
click to toggle source
Convert table to a TSV string
# File lib/datamix/refinements/csv_table.rb, line 117 def to_tsv result = [headers.join( "\t")] self.each do |row| result << row.fields.join("\t") end result.join "\n" end
uniq?()
click to toggle source
# File lib/datamix/refinements/array.rb, line 87 def uniq? self.length == self.uniq.length end
window(window_size) { |self| ... }
click to toggle source
# File lib/datamix/refinements/array.rb, line 91 def window(window_size, &_block) result = (0..(size-window_size)).map do |index| yield self[index...(index+window_size)] end Array.new(window_size-1).concat result end