class Slicing::Base
Public Instance Methods
add(path, output, *headers)
click to toggle source
# File lib/slicing.rb, line 40 def add path, output, *headers index = 0 CSV.foreach(path) do |row| CSV.open(output, "a+") do |csv| if index == 0 csv << headers end csv << row end index = index +1 end end
clean(path, output, name, value)
click to toggle source
# File lib/slicing.rb, line 35 def clean path, output, name, value # puts "add header" end
count(csv_file)
click to toggle source
# File lib/slicing.rb, line 214 def count csv_file data = CSV.read(csv_file, :headers => false, encoding: "ISO8859-1:utf-8") puts "#{data.count} rows #{data[0].count} columns" puts "---" puts "#{data[0]}" puts "---" print_header(data[0]) end
first(csv_file)
click to toggle source
# File lib/slicing.rb, line 180 def first csv_file #, value=100 stop = options[:line] counter = 0 CSV.foreach(csv_file, :headers => false, encoding: "ISO8859-1:utf-8") do |row| exit if counter == stop begin counter = counter + 1 puts row rescue end end end
freq(path, column_name, output_path)
click to toggle source
# File lib/slicing.rb, line 99 def freq path, column_name, output_path file_to_count = "./#{path}.csv" output = "./#{path}-counted.csv" file_to_count_csv = CSV.read(file_to_count,:headers=> true, :encoding => "ISO8859-1:utf-8") unique_nric_array = file_to_count_csv[column_name] unique_nric = [] unique_nric_array.each_with_index do |value, index| unique_nric.push(value) if index !=0 end final_hash = score(unique_nric) CSV.open(output, "a+") do |csv| final_hash.each do |value| csv << [value[0], value[1]] end end end
head(csv_file)
click to toggle source
# File lib/slicing.rb, line 194 def head csv_file CSV.foreach(csv_file, :headers => false, encoding: "ISO8859-1:utf-8") do |row| puts row puts "----" puts "#{row.count} columns" puts "----" print_header(row) exit end end
list(path, name)
click to toggle source
# File lib/slicing.rb, line 66 def list path, name file_csv = CSV.read(path,:headers=> true, :encoding => "ISO8859-1:utf-8") array = file_csv[name] puts array.uniq puts "--" puts "#{array.uniq.count} items" end
mask(path, column_name, output_path)
click to toggle source
# File lib/slicing.rb, line 118 def mask path, column_name, output_path original = CSV.read(path, { headers: true, return_headers: true, :encoding => "ISO8859-1:utf-8"}) CSV.open(output_path, 'a+') do |csv| original.each do |row| csv << array end end end
reduce(path, output, start)
click to toggle source
# File lib/slicing.rb, line 75 def reduce path, output, start index = 0 CSV.foreach(path) do |csv| CSV.open(output, "a+") do |row| if start.to_i > index #dangerous csv << row end end index = index +1 end end
retain(path, output, *names)
click to toggle source
# File lib/slicing.rb, line 128 def retain path, output, *names value = "" CSV.foreach(path) do |data| value = data break end array = [] names.each do |each_name| if value.index(each_name) == nil puts "#{each_name} is not a column name." puts "--" puts value exit end array.push(value.index(each_name)) if value.index(each_name) != nil end # puts array.count answer = CSV.open(output,"a+") do |csv| CSV.foreach(path) do |row| answer = [] array.each do |each| answer.push(row[each]) end csv << answer end end end
rm(path, column_name, output)
click to toggle source
# File lib/slicing.rb, line 163 def rm path, column_name, output # headers, rowsep, utf = process_options(options[:headers], options[:rowsep], options[:utf]) if options[:rowsep] != nil original = CSV.read(path, { headers: options[:headers], return_headers: options[:headers], :row_sep=> options[:rowsep], :encoding => options[:utf]}) else original = CSV.read(path, { headers: options[:headers], return_headers: options[:headers], :encoding => options[:utf]}) end original.delete(column_name) CSV.open(output, 'a+') do |csv| original.each do |row| csv << row end end end
sample(path, output_path, size)
click to toggle source
# File lib/slicing.rb, line 88 def sample path, output_path, size file_csv = CSV.read(path,:headers=> true, :encoding => "ISO8859-1:utf-8") sample = file_csv.sample(size) CSV.open(output_path, "a+") do |csv| sample.each do |value| csv << value end end end
show(path, output, start)
click to toggle source
# File lib/slicing.rb, line 54 def show path, output, start index = 1 CSV.foreach(path) do |csv| if index == start.to_i puts csv break end index = index + 1 end end
subset(csv_file, output)
click to toggle source
# File lib/slicing.rb, line 225 def subset(csv_file, output) path = csv_file output_directory = output #"/Users/ytbryan/Desktop/output/subset-2015.csv" #output directory # options[:num] == nil ? (stop = 10) : (stop = options[:num]) stop = options[:line] counter = 0 CSV.foreach(path, :headers => false, encoding: "ISO8859-1:utf-8") do |row| exit if counter == stop begin counter = counter + 1 CSV.open(output_directory, "a+") do |csv| csv << row end rescue end end end
unique(path, column_name)
click to toggle source
# File lib/slicing.rb, line 206 def unique path, column_name data = CSV.read(path, :headers => true, return_headers: true, encoding: "ISO8859-1:utf-8") array = data[column_name] puts array.uniq.count if array != nil end
Private Instance Methods
masking(value)
click to toggle source
# File lib/slicing.rb, line 276 def masking(value) value != nil ? answer = Digest::MD5.hexdigest(value) : answer end
print_header(array)
click to toggle source
desc :subsetagain, “” def subsetagain csv_file, output, value=10
path = csv_file output_directory = output #"/Users/ytbryan/Desktop/output/subset-2015.csv" #output directory stop = value counter = 0 CSV.foreach(path, :headers => false, :row_sep => "\r\n", encoding: "ISO8859-1:utf-8") do |row| exit if counter == stop begin counter = counter + 1 CSV.open(output_directory, "a+") do |csv| csv << row end rescue end end
end
# File lib/slicing.rb, line 263 def print_header array puts array.join(",") if array != nil end
print_progress(current, total)
click to toggle source
# File lib/slicing.rb, line 286 def print_progress current, total percent = current/total * 100 STDOUT.write "\r #{index} - #{percent}% completed." end
process_options(headers, rowsep, utf)
click to toggle source
# File lib/slicing.rb, line 267 def process_options headers, rowsep, utf if headers == nil headers = true else headers = headers end return true, "\r\n" , "ISO8859-1:utf-8" end
score( array )
click to toggle source
# File lib/slicing.rb, line 280 def score( array ) hash = Hash.new(0) array.each{|key| hash[key] += 1} hash end