class CADataFrame
ARRANGER
Class methods
BASIC Comparison
BASIC Manipulations
GROUPING
Copyright © 2014, Sameer Deshmukh All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
-
Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
-
Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
PIVOT TABLE
Attributes
Public Class Methods
# File lib/carray-dataframe/dataframe.rb, line 954 def self.concat (*args) ref = args.first column_names = ref.column_names new_columns = {} column_names.each do |name| list = args.map{|t| t.column(name) } if list.first.is_a?(CATimeIndex) new_columns[name] = CATimeIndex.concat(*list) else data_type = list.first.data_type new_columns[name] = CArray.bind(data_type, list, 0) end end list = args.map(&:row_index) if list.all? if list.first.is_a?(CATimeIndex) new_row_index = CATimeIndex.concat(*list) else new_row_index = CArray.join(*list).flatten end else new_row_index = nil end return CADataFrame.new(new_columns, index: new_row_index) end
# File lib/carray-dataframe/io.rb, line 78 def self.from_csv (file, sep: ",", rs: $/, quote_char: '"', index: nil, &block) warn "CADataFrame.from_csv will be obsolete, use CADataFrame.parse_csv" self.parse_csv(file, sep: sep, rs: rs, index: index, &block) end
# File lib/carray-dataframe/dataframe.rb, line 1321 def self.load (filename) out = open(filename, "r") {|io| Marshal.load(io) } raise "invalid data" unless out.is_a?(CADataFrame) return out end
# File lib/carray-dataframe/io.rb, line 60 def self.load_csv (file, sep: ",", rs: $/, encoding: nil, quote_char: '"', index: nil, &block) warn "CADataFrame.load_csv will be obsolete, use CADataFrame.read_csv" self.read_csv(file, sep: sep, rs: rs, quote_char: quote_char, encoding: encoding, &block) end
# File lib/carray-dataframe/io.rb, line 34 def self.load_sqlite3 (*args) df = CArray.load_sqlite3(*args).to_dataframe if df return df.arrange{ column_names.each do |name| mask name, nil end } else return nil end end
# File lib/carray-dataframe/dataframe.rb, line 943 def self.merge (*args) ref = args.first new_columns = {} args.each do |table| table.column_names.each do |name| new_columns[name] = table.col(name) end end return CADataFrame.new(new_columns, index: ref.row_index) end
Constructor
# File lib/carray-dataframe/dataframe.rb, line 13 def initialize (data, index: nil, columns: nil, order: nil, clone: false, &block) # @column_names = Array holds column names and its order # @column_data = Hash holds data entities # @row_number = Integer holds number of rows # @row_index = CArray stores row index (any object) # @__methods__ = ... # Stores data entity case data when Hash raise "columns option is not needed for hash data" if columns @column_data = columns_to_columns(data) @column_names = @column_data.keys when CArray if columns @column_names = columns.map(&:to_s) else if data.respond_to?(:column_names) if data.column_names.is_a?(Array) @column_names = data.column_names.map(&:to_s) else @column_names = data.dim1.times.map{|i| "c#{i}" } end elsif order @column_names = order.map(&:to_s) else raise "can't determin column names use columns or order option" end end if @column_names.size != data.dim1 raise "mismatch between 'column_names' and table columns" end @column_data = table_to_columns(data) when Array case data.first when Hash @column_data = {} dummy = {} data.each do |hash| dummy.update(hash) end @column_names = [] dummy.each_key do |k| list = [] data.each do |hash| list << (hash[k] || UNDEF) end name = k.to_s @column_names << name @column_data[name] = list.to_ca end else if columns @column_names = columns.map(&:to_s) elsif order @column_names = order.map(&:to_s) else raise "columns or order option should be given" end @column_data = array_to_columns(data) end else raise "unknown data" end if order if @column_names.size != order.size raise 'invalid order option' end new_column_data = {} order.each do |key| if @column_data.has_key?(key.to_s) new_column_data[key.to_s] = @column_data[key.to_s] else raise 'invalid column name '#{key.to_s}' in order option' end end @column_data = new_column_data @column_names = new_column_data.keys end # Sets @row_number and check column length if @column_data.empty? and index @row_number = index.size else @row_number = @column_data.first[1].size if @column_names.any?{ |key| @column_data[key].size != @row_number } raise "column sizes mismatch" end end # Processing option 'index' set_index(index, inplace: true) @__methods__ = {} if clone raise NotImplementedError, "copy option is not implemented" end if block_given? arrange(&block) end end
# File lib/carray-dataframe/io.rb, line 65 def self.parse_csv (file, sep: ",", rs: $/, quote_char: '"', index: nil, &block) df = CArray.parse_csv(file, sep: sep, rs: rs, quote_char: quote_char, &block).to_dataframe(index: index) if df return df.arrange{ column_names.each do |name| mask name, nil end } else return nil end end
# File lib/carray-dataframe/io.rb, line 47 def self.read_csv (file, sep: ",", rs: $/, quote_char: '"', encoding: nil, index: nil, &block) df = CArray.read_csv(file, sep: sep, rs: rs, quote_char: quote_char, encoding: encoding, &block).to_dataframe(index: index) if df return df.arrange{ column_names.each do |name| mask name, nil end } else return nil end end
Public Instance Methods
# File lib/carray-dataframe/dataframe.rb, line 988 def -@ return cmp(:-@) end
# File lib/carray-dataframe/dataframe.rb, line 992 def < (other) return cmp(:<, other) end
# File lib/carray-dataframe/dataframe.rb, line 996 def <= (other) return cmp(:<=, other) end
# File lib/carray-dataframe/dataframe.rb, line 1000 def > (other) return cmp(:>, other) end
# File lib/carray-dataframe/dataframe.rb, line 1004 def >= (other) return cmp(:>=, other) end
# File lib/carray-dataframe/reference.rb, line 73 def [] (arg, opt = :__dummy__) if opt != :__dummy__ return loc[arg, opt] else case arg when Range if arg.begin.is_a?(Integer) return iloc[arg] unless @row_index end return loc[arg] when CArray if arg.rank == 1 return loc[arg] else raise "index should be 1-dim array" end when String, Symbol return column(arg.to_s) else column_selector = select_columns(arg) new_columns = {} column_selector.each do |key| new_columns[key] = @column_data[key] end return CADataFrame.new(new_columns, index: @row_index) end end end
# File lib/carray-dataframe/reference.rb, line 102 def []= (arg, opt = :__dummy__, value) if opt != :__dummy__ loc[arg, opt] = value else case arg when Range if arg.begin.is_a?(Integer) iloc[arg] = value unless @row_index end loc[arg] = value when CArray loc[arg] = value when String, Symbol if column(arg.to_s) column(arg.to_s)[] = value else arrange { append arg, value } end else case value when CADataFrame column_selector = select_columns(arg) values = column_selector.each_index.map { |i| value.column(i).to_ca } column_selector.each_with_index do |key, i| column(key)[] = values[i] end else column_selector = select_columns(arg) column_selector.each do |key| column(key)[] = value end end end end end
# File lib/carray-dataframe/dataframe.rb, line 123 def __methods__ return @__methods__ end
# File lib/carray-dataframe/dataframe.rb, line 728 def add_suffix (suf) new_columns = {} each_column_name do |name| new_name = (name.to_s + suf).to_s new_columns[new_name] = column(name) end return CADataFrame.new(new_columns, index: @row_index) end
# File lib/carray-dataframe/dataframe.rb, line 460 def append_column (name, new_column = nil, &block) name = name.to_s if new_column # do nothing elsif block new_column = instance_exec(self, &block) else new_column = @column_data.first[1].template(:object) end unless new_column.is_a?(CArray) new_column = new_column.to_ca end if new_column.rank != 1 or new_column.size != @row_number raise "invalid shape of appended column" end @column_names.push(name) @column_data[name] = new_column return new_column end
Arrange
# File lib/carray-dataframe/dataframe.rb, line 433 def arrange (&block) return Arranger.new(self).arrange(&block) end
# File lib/carray-dataframe/dataframe.rb, line 833 def ascii_table (rowmax = :full, time_format: nil, index: true) columns = @column_data.clone @column_names.each do |name| if columns[name].is_a?(CATimeIndex) if time_format columns[name] = columns[name].time.time_strftime(time_format) else columns[name] = columns[name].time.time_strftime("%F %T%:z") end end end if index if @row_index namelist = [" "] + @column_names if @row_index.is_a?(CATimeIndex) if time_format row_index = @row_index.time.time_strftime(time_format) else row_index = @row_index.time.time_strftime("%F %T%:z") end else row_index = @row_index end tbl = CADFArray.new(namelist, columns.update(" " => row_index)) else namelist = [" "] + @column_names tbl = CADFArray.new(namelist, columns.update(" " => CArray.int(@row_number).seq)) end else namelist = @column_names tbl = CADFArray.new(namelist, columns) end if rowmax.is_a?(Integer) and @row_number > rowmax list = tbl[0..(rowmax/2),nil].to_a list.push namelist.map { "..." } list.push *(tbl[-rowmax/2+1..-1,nil].to_a) tbl = list.to_ca end datastr = tbl.convert {|c| __obj_to_string__(c) }.unmask("") datamb = datastr.convert(:boolean, &:"ascii_only?").not.sum(0).ne(0) namemb = namelist.to_ca.convert(:boolean) {|c| c.to_s.ascii_only? }.eq(0) mb = datamb.or(namemb) namelen = namelist.map(&:length).to_ca datalen = datastr.convert(&:length) if mb.max == 0 if datalen.size == 0 lengths = namelen.to_a else lengths = datalen.max(0).pmax(namelen).to_a end hrule = "-" + lengths.map {|len| "-"*len}.join("--") + "-" header = " " + [namelist, lengths].transpose.map{|name, len| "#{name.to_s.ljust(len)}" }.join(" ") + " " ary = [hrule, header, hrule] if datalen.size > 0 datastr[:i,nil].each_with_index do |blk, i| list = blk.flatten.to_a ary << " " + [list, lengths].transpose.map{|value, len| "#{value.ljust(len)}"}.join(" ") + " " end end ary << hrule return "DataFrame: rows#=#{@row_number}: \n" + ary.join("\n") else namewidth = namelist.to_ca.convert{|c| __strwidth__(c.to_s) } if datalen.size == 0 maxwidth = namewidth else datawidth = datastr.convert{|c| __strwidth__(c.to_s) } maxwidth = datawidth.max(0).pmax(namewidth) end len = maxwidth[:*,nil] - datawidth + datalen hrule = "-" + maxwidth.map {|len| "-"*len}.join("--") + "-" header = " " + [namelist, maxwidth.to_a].transpose.map{|name, len| "#{name.to_s.ljust(len-__strwidth__(name.to_s)+name.to_s.length)}" }.join(" ") + " " ary = [hrule, header, hrule] if datalen.size > 0 datastr[:i,nil].each_with_addr do |blk, i| list = blk.flatten.to_a ary << " " + list.map.with_index {|value, j| "#{value.ljust(len[i,j])}"}.join(" ") + " " end end ary << hrule return "DataFrame: row#=#{@row_number}: \n" + ary.join("\n") end end
# File lib/carray-dataframe/dataframe.rb, line 756 def ca (*names) if names.empty? return CADFArray.new(@column_names, @column_data) else return CADFArray.new(names.map(&:to_s), @column_data) end end
# File lib/carray-dataframe/dataframe.rb, line 574 def calculate (label, &block) hash = {} each_column_name do |name| begin if block hash[name] = [yield(name, column(name))] else hash[name] = [column(name).send(label.intern)] end rescue hash[name] = [UNDEF] end end return CADataFrame.new(hash, index: [label]) end
# File lib/carray-dataframe/dataframe.rb, line 1070 def classify (name, scale = nil, opt = {}) if not scale column = @column_data[name.to_s] mids = column.uniq mapper = {} mids.each_with_index do |v,i| mapper[v] = i end cls = columns.convert(:int32) {|v| mapper[v] } hash = { "#{name}_M" => mids, "#{name}_L" => mids, "#{name}_R" => mids, "#{name}_CLASS" => cls } else option = { :include_upper => false, :include_lowest => true, :offset => 0, }.update(opt) column = @column_data[name.to_s] cls = scale.bin(column, option[:include_upper], option[:include_lowest], option[:offset]) mids = ((scale + scale.shifted(-1))/2)[0..-2].to_ca left = scale[0..-2] right = scale.shift(-1)[0..-2] hash = { "#{name}_M" => mids.project(cls).to_ca, "#{name}_L" => left.project(cls).to_ca, "#{name}_R" => right.project(cls).to_ca, "#{name}_CLASS" => cls } end return CADataFrame.new(hash) end
Column, Row Access
# File lib/carray-dataframe/dataframe.rb, line 247 def column (spec) case spec when Integer return @column_data[@column_names[spec]] when String, Symbol return @column_data[spec.to_s] else raise "invalid column specifier" end end
# File lib/carray-dataframe/dataframe.rb, line 239 def column_types return @column_names.map{|name| @column_data[name].data_type_name } end
# File lib/carray-dataframe/dataframe.rb, line 119 def columns @column_data end
# File lib/carray-dataframe/dataframe.rb, line 781 def columns_to_hash (key_name, value_names) hash = {} unless @column_names.include?(key_name) raise ArgumentError, "include invalid key column name #{key_name}" end case value_names when String unless @column_names.include?(value_names) raise ArgumentError, "invalid key column name #{value_names}" end key_columns = @column_data[key_name] value_columns = @column_data[value_names] @row_number.times do |i| hash[key_columns[i]] = value_columns[i] end when Array unless value_names.all?{|s| @column_names.include?(s) } raise ArgumentError, "include invalid column name in #{value_names.join(' ')}" end key_columns = @column_data[key_name] value_columns = @column_data.values_at(*value_names) @row_number.times do |i| hash[key_columns[i]] = value_columns.map{|c| c[i]} end else raise ArgumentError, "invalud argument" end return hash end
# File lib/carray-dataframe/dataframe.rb, line 1109 def cross (name1, name2) col1 = column(name1) col2 = column(name2) var1 = col1.uniq.sort var2 = col2.uniq.sort hash = {} count = Hash.new {0} var1.each do |v1| var2.each do |v2| hash[[v1,v2]] = 0 end end list = CArray.join([col1, col2]).to_a list.each do |item| hash[item] += 1 end out = CArray.object(var1.size, var2.size) { 0 } var1.each_with_index do |v1, i| var2.each_with_index do |v2, j| out[i,j] = hash[[v1,v2]] end end return CADataFrame.new(out, index: var1, columns: var2) end
# File lib/carray-dataframe/dataframe.rb, line 657 def delete_masked_rows not_masked = @column_data.first[1].template(:boolean) { true } @column_names.each do |name| not_masked &= @column_data[name].is_not_masked end columns = {} @column_names.each_with_index do |name, i| columns[name] = @column_data[name].to_ca[not_masked] end return CADataFrame.new(columns) end
# File lib/carray-dataframe/dataframe.rb, line 669 def delete_rows (&block) masked = instance_eval(&block) columns = {} @column_names.each_with_index do |name, i| columns[name] = @column_data[name].to_ca[masked] end return CADataFrame.new(columns) end
# File lib/carray-dataframe/dataframe.rb, line 1243 def describe list = [] @column_data.each do |name, column| list << column.describe end CADataFrame.new(list, index: @column_names).transpose end
# File lib/carray-dataframe/dataframe.rb, line 646 def detouch @column_data = @column_data.clone each_column_name do |name| @column_data[name] = @column_data[name].to_ca end if @row_index @row_index = @row_index.clone end return self end
# File lib/carray-dataframe/dataframe.rb, line 448 def downcase new_column_names = [] new_columns = {} each_column_name do |name| new_column_names << name.downcase new_columns[name.downcase] = @column_data[name] end @column_names = new_column_names @column_data = new_columns return self end
# File lib/carray-dataframe/dataframe.rb, line 524 def drop_column (*columns) if columns.empty? return self else names = [] columns.each do |c| case c when String names << c when Symbol names << c.to_s when Regexp names.push *@column_names.grep(c) else raise "invalid column specification" end end end new_columns = {} each_column_name do |name| unless names.include?(name) new_columns[name] = column(name) end end return replace CADataFrame.new(new_columns, index: @row_index) end
Iterators
# File lib/carray-dataframe/dataframe.rb, line 326 def each_column (&block) return @column_data.each(&block) end
# File lib/carray-dataframe/dataframe.rb, line 330 def each_column_name (&block) return @column_names.each(&block) end
# File lib/carray-dataframe/dataframe.rb, line 342 def each_row (with: Array, columns: nil, &block) case columns when Array column_names = columns when Regexp column_names = @column_names.grep(columns) else column_names = @column_names end if with == Array @row_number.times do |i| yield column_names.map{|n| @column_data[n][i] } end elsif with == Hash row = {} @row_number.times do |i| column_names.each do |c| row[c] = @column_data[c][i] end yield row end elsif with == CArray joined = CArray.join(@column_data.values_at(*column_names)) joined[:i,nil].each do |block| yield block.to_ca.compact end else raise "invalid data type for loop variable" end end
# File lib/carray-dataframe/dataframe.rb, line 334 def each_row_index (&block) if @row_index @row_index.each(&block) else @row_number.times(&block) end end
# File lib/carray-dataframe/dataframe.rb, line 373 def each_row_with_row_index (with: Array, &block) if with == Array if @row_index @row_index.each_with_index do |idx, i| yield @column_data.map{|n,c| c[i] }, idx end else @row_number.times do |i| yield @column_data.map{|n,c| c[i] }, i end end elsif with == Hash row = {} if @row_index @row_index.each_with_index do |idx, i| @column_names.each do |c| row[c] = @column_data[c][i] end yield row, @row_index[i] end else @row_number.times do |idx, i| @column_names.each do |c| row[c] = @column_data[c][i] end yield row, @row_index[i] end end else raise "invalid data type for loop variable" end end
# File lib/carray-dataframe/dataframe.rb, line 565 def execute (&block) case block.arity when 1 return instance_exec(self, &block) else return instance_exec(&block) end end
# File lib/carray-dataframe/dataframe.rb, line 420 def fill (*names, value) names.each do |name| if has_column?(name) column(name).fill(value) end end return self end
# File lib/carray-dataframe/dataframe.rb, line 1277 def get_dummies (*names, prefix: nil, prefix_sep: "_") keep_columns = {} new_columns = {} k = 0 @column_names.each do |name| unless names.include?(name) keep_columns[name] = @column_data[name] next end hash = @column_data[name].get_dummies case prefix when nil hash.each do |v, dummy| new_columns["#{name}#{prefix_sep}#{v}"] = dummy end when String hash.each do |v, dummy| new_columns["#{prefix}#{prefix_sep}#{v}"] = dummy end when Array hash.each do |v, dummy| new_columns["#{prefix[k]}#{prefix_sep}#{v}"] = dummy end when Hash hash.each do |v, dummy| new_columns["#{prefix[name]}#{prefix_sep}#{v}"] = dummy end end k += 1 end CADataFrame.new(keep_columns.update(new_columns), index: @row_index) end
# File lib/carray-dataframe/group.rb, line 9 def group_by (*names) if names.size == 1 return CADataFrameGroup.new(self, names[0]) else return CADataFrameGroupMulti.new(self, *names) end end
# File lib/carray-dataframe/dataframe.rb, line 230 def has_column? (name) case name.to_s when "index" return has_index? else return @column_names.include?(name) end end
# File lib/carray-dataframe/dataframe.rb, line 217 def has_index? @row_index ? true : false end
# File lib/carray-dataframe/dataframe.rb, line 290 def head (n=10) rmax = [@row_number, n].min return row[0..rmax-1] end
# File lib/carray-dataframe/dataframe.rb, line 1050 def histogram (name, scale = nil, options = nil) if scale.nil? return group_by(name).table{ { :count => col(name).count_valid } } else if options hist = CAHistogram.int(scale, options) else hist = CAHistogram.int(scale) end hist.increment(@column_data[name.to_s]) hash = { name.to_s => hist.midpoints[0], "#{name}_L".to_s => scale[0..-2], "#{name}_R".to_s => scale.shift(-1)[0..-2], :count => hist[0..-2].to_ca, } return CADataFrame.new(hash) end end
# File lib/carray-dataframe/dataframe.rb, line 276 def iloc (&block) @iloc ||= CADataFrame::ILocAccessor.new(self) return @iloc end
TO BE FIXED
# File lib/carray-dataframe/dataframe.rb, line 282 def index if @row_index return @row_index.to_ca else return CArray.int(@row_number).seq end end
# File lib/carray-dataframe/dataframe.rb, line 480 def insert_column (pos, name, new_column = nil, &block) name = name.to_s if new_column # do nothing elsif block new_column = instance_exec(self, &block) else new_column = @column_data.first[1].template(:object) end unless new_column.is_a?(CArray) new_column = new_column.to_ca end if new_column.rank != 1 or new_column.size != @row_number raise "invalid shape of appended column" end @column_names.insert(pos, name) @column_data[name] = new_column return new_column end
# File lib/carray-dataframe/dataframe.rb, line 923 def inspect return ascii_table(8) end
# File lib/carray-dataframe/dataframe.rb, line 1012 def is_finite return cmp(:is_finite) end
# File lib/carray-dataframe/dataframe.rb, line 1008 def is_masked return cmp(:is_masked) end
# File lib/carray-dataframe/join.rb, line 26 def join(other_df,opts={}) CADataFrame::Merge.join(self, other_df, opts) end
# File lib/carray-dataframe/dataframe.rb, line 271 def loc @loc ||= CADataFrame::LocAccessor.new(self) return @loc end
# File lib/carray-dataframe/dataframe.rb, line 1031 def matchup (keyname, reference) key = column(keyname) idx = reference.matchup(key) new_columns = {} each_column_name do |name| if name == keyname.to_s new_columns[name] = reference else new_columns[name] = column(name).project(idx) end end if @row_index new_row_index = @row_index.project(idx).unmask(nil) else new_row_index = nil end return CADataFrame.new(new_columns, index: new_row_index) end
# File lib/carray-dataframe/dataframe.rb, line 1150 def mean new_columns = {} each_column do |name, col| new_columns[name] = [col.mean] end return CADataFrame.new(new_columns, index: ["mean"]) end
# File lib/carray-dataframe/dataframe.rb, line 561 def merge (*args) return CADataFrame.merge(self, *args) end
# File lib/carray-dataframe/dataframe.rb, line 300 def method (hash) new_hash = {} hash.each do |key, value| new_hash[key.to_s] = value.to_s end @__methods__.update(new_hash) end
# File lib/carray-dataframe/dataframe.rb, line 308 def method_missing (name, *args) if args.size == 0 name = name.to_s if has_column?(name) return @column_data[name] elsif has_column?(name.gsub(/_/,'.')) ### For R return @column_data[name.gsub(/_/,'.')] elsif @__methods__.include?(name) return @column_data[@__methods__[name]] end end raise "no method '#{name}' for CADataFrame" end
# File lib/carray-dataframe/dataframe.rb, line 748 def objectify new_columns = {} each_column_name do |name| new_columns[name] = column(name).object end return CADataFrame.new(new_columns, index: @row_index) end
# File lib/carray-dataframe/dataframe.rb, line 690 def order_by (*names, &block) if names.empty? if block ret = instance_exec(&block) case ret when CArray list = [ret] when Array list = ret end end else list = @column_data.values_at(*names.map{|s| s.to_s}) end return reorder { CA.sort_addr(*list) } end
# File lib/carray-dataframe/pivot.rb, line 8 def pivot (name1, name2) return CADataFramePivot.new(self, name1, name2) end
# File lib/carray-dataframe/dataframe.rb, line 502 def prepend_column (name, new_column = nil, &block) name = name.to_s if new_column # do nothing elsif block new_column = instance_exec(self, &block) else new_column = @column_data.first[1].template(:object) end unless new_column.is_a?(CArray) new_column = new_column.to_ca end if new_column.rank != 1 or new_column.size != @row_number raise "invalid shape of appended column" end @column_names.unshift(name) @column_data[name] = new_column return new_column end
# File lib/carray-dataframe/dataframe.rb, line 437 def rename (name1, name2) if idx = @column_names.index(name1.to_s) @column_names[idx] = name2.to_s column = @column_data[name1.to_s] @column_data.delete(name1.to_s) @column_data[name2.to_s] = column else raise "unknown column name #{name1}" end end
Transformation
# File lib/carray-dataframe/dataframe.rb, line 681 def reorder (&block) index = instance_exec(&block) new_columns = {} each_column_name do |name| new_columns[name] = column(name)[index] end return CADataFrame.new(new_columns, index: @row_index ? @row_index[index] : nil) end
# File lib/carray-dataframe/dataframe.rb, line 221 def replace (other) @column_names = other.column_names @column_data = other.column_data @row_index = other.row_index @row_number = other.row_number @__methods__ = other.__methods__ return self end
# File lib/carray-dataframe/dataframe.rb, line 590 def resample (&block) new_columns = {} each_column_name do |name| begin new_columns[name] = yield(name, column(name)) rescue end end return CADataFrame.new(new_columns) end
# File lib/carray-dataframe/dataframe.rb, line 707 def reverse new_columns = {} each_column_name do |name| new_columns[name] = column(name).reverse end return CADataFrame.new(new_columns, index: @row_index ? @row_index.reverse : nil) end
# File lib/carray-dataframe/dataframe.rb, line 1315 def save (filename) open(filename, "w") {|io| Marshal.dump(self, io) } end
# File lib/carray-dataframe/dataframe.rb, line 601 def select (*columns, &block) if columns.empty? names = @column_names else names = [] columns.each do |c| case c when String names << c when Symbol names << c.to_s when Regexp names.push *@column_names.grep(c) else raise "invalid column specification" end end end if block row = instance_exec(&block) else row = nil end new_columns = {} names.map(&:to_s).each do |name| raise "unknown column '#{name}'" unless column(name) new_columns[name] = column(name)[row] end return CADataFrame.new(new_columns, index: @row_index ? @row_index[row] : nil) end
# File lib/carray-dataframe/reference.rb, line 4 def select_columns (selector = nil) case selector when nil ### all return @column_names when Integer name = @column_names[selector] raise "invalid column index" unless selector return [name] when String, Symbol ### "AAA" if @column_names.include?(selector.to_s) return [selector.to_s] else raise "invalid column specified #{selector}" end when Array ### ["AAA", "BBB"] if selector.size == 1 && selector.first.is_a?(Hash) return select_columns(selector.first) else selector.each do |name| unless @column_names.include?(name.to_s) raise "invalid column specified #{name}" end end return selector.map(&:to_s) end when Range ### "AAA".."BBB", 0..1 case selector.begin when nil idx1 = 0 when Integer idx1 = selector.begin when String, Symbol idx1 = @column_names.search(selector.begin.to_s) raise "can't find column #{selector.begin}" unless idx1 else raise "invalid column specified #{selector.begin}" end if selector.exclude_end? case selector.end when nil idx2 = -2 when Integer idx2 = selector.end - 1 when String idx2 = @column_names.search(selector.end.to_s) raise "can't find column #{selector.end}" unless idx2 idx2 = idx2 - 1 else raise "invalid column specified #{selector.end}" end else case selector.end when nil idx2 = -1 when Integer idx2 = selector.end when String idx2 = @column_names.search(selector.end.to_s) raise "can't find column #{selector.end}" unless idx2 else raise "invalid column specified #{selector.end}" end end return @column_names[idx1..idx2] else raise "invalid column selector #{selector}" end end
# File lib/carray-dataframe/dataframe.rb, line 195 def set_index (index, drop: true, inplace: false) if inplace case index when nil when String, Symbol index = index.to_s raise "can't find column named '#{index}'" unless @column_names.include?(index) if drop @row_index = @column_data.delete(index) @column_names.delete(index) else @row_index = @column_data[index] end else @row_index = index.to_ca end self else return to_df.set_index(index, drop: drop, inplace: true) end end
# File lib/carray-dataframe/dataframe.rb, line 1142 def sum new_columns = {} each_column do |name, col| new_columns[name] = [col.sum] end return CADataFrame.new(new_columns, index: ["sum"]) end
# File lib/carray-dataframe/dataframe.rb, line 1251 def summary (*names) data = [] names.each do |name| data << @column_data[name].summary end CADataFrame.new(data, index: names).transpose end
# File lib/carray-dataframe/dataframe.rb, line 295 def tail (n=10) rmin = -([@row_number, n].min) return row[rmin..-1] end
# File lib/carray-dataframe/converter.rb, line 31 def to_a (with_index: true) if @row_index and with_index namelist = [""] + @column_names tbl = CADFArray.new(namelist, @column_data.clone.update("" => index)) else tbl = ca.to_ca end return tbl.to_a end
# File lib/carray-dataframe/dataframe.rb, line 931 def to_ary return [to_s] end
# File lib/carray-dataframe/dataframe.rb, line 764 def to_ca (*names) return ca(*names).to_ca end
# File lib/carray-dataframe/converter.rb, line 41 def to_csv (io = "", rs: $/, sep: ",", fill: "", with_index: true, time_format: nil, &block) if @row_index and with_index namelist = ["index"] + @column_names columns = @column_data.clone.update("index" => index) else namelist = @column_names columns = @column_data.clone end columns.each do |k, v| if v.is_a?(CATimeIndex) if time_format columns[k] = v.time.time_format(time_format) else columns[k] = v.time.convert(:object){|t| t.to_s} end end end tbl = CADFArray.new(namelist, columns) return tbl.to_csv(io, **{rs: rs, sep: sep, fill: fill}, &block) end
# File lib/carray-dataframe/converter.rb, line 62 def to_daru require "daru" columns = {} each_column_name do |name| columns[name] = column(name).object.unmask(nil).to_a end if @row_index return Daru::DataFrame.new(columns, index: @row_index.to_a, order: @column_names) else return Daru::DataFrame.new(columns, order: @column_names) end end
Conversions
# File lib/carray-dataframe/dataframe.rb, line 740 def to_df new_columns = {} each_column_name do |name| new_columns[name] = column(name) end return CADataFrame.new(new_columns, index: @row_index).detouch end
# File lib/carray-dataframe/dataframe.rb, line 768 def to_hash hash = {} if @row_index hash["index"] = @row_index end @column_data.each do |k,v| hash[k] = v.to_a end return hash end
# File lib/carray-dataframe/to_html.rb, line 3 def to_html (threshold = 8, time_format: nil, index: true) columns = @column_data.clone @column_names.each do |name| if columns[name].is_a?(CATimeIndex) if time_format columns[name] = columns[name].time.time_strftime(time_format) else columns[name] = columns[name].time.time_format("%F %T%:z") end end end if index if @row_index namelist = [" "] + @column_names if @row_index.is_a?(CATimeIndex) if time_format row_index = @row_index.time.time_strftime(time_format) else row_index = @row_index.time.time_format("%F %T%:z") end else row_index = @row_index end tbl = CADFArray.new(namelist, columns.update(" " => row_index)) else namelist = [" "] + @column_names tbl = CADFArray.new(namelist, columns.update(" " => CArray.int(@row_number).seq)) end else namelist = @column_names tbl = CADFArray.new(namelist, columns) end if threshold.is_a?(Integer) and @row_number > threshold list = tbl[0..(threshold/2),nil].to_a list.push namelist.map { "..." } list.push *(tbl[-threshold/2+1..-1,nil].to_a) tbl = list.to_ca end datastr = tbl.convert {|c| __obj_to_string__(c) }.unmask("") datamb = datastr.convert(:boolean, &:"ascii_only?").not.sum(0).ne(0) namemb = namelist.to_ca.convert(:boolean) {|c| c.to_s.ascii_only? }.eq(0) mb = datamb.or(namemb) namelen = namelist.map(&:length).to_ca datalen = datastr.convert(&:length) if mb.max == 0 if datalen.size == 0 lengths = namelen.to_a else lengths = datalen.max(0).pmax(namelen).to_a end table_in = "<table>" header = "<thead><tr>" + [namelist, lengths].transpose.map{|name, len| "<th>#{name.to_s.ljust(len)}</th>" }.join() + "</tr></thead>" body_in = "<tbody>" ary = [table_in, header, body_in] if datalen.size > 0 datastr[:i,nil].each_with_index do |blk, i| list = blk.flatten.to_a ary << "<tr>" + [list, lengths].transpose.map {|value, len| "<td>#{value.ljust(len)}</td>" }.join() + "</tr>" end end ary << "</tbody>" ary << "</table>" return "DataFrame: rows#=#{@row_number}: \n" + ary.join("\n") else namewidth = namelist.to_ca.convert{|c| __strwidth__(c.to_s) } if datalen.size == 0 maxwidth = namewidth else datawidth = datastr.convert{|c| __strwidth__(c.to_s) } maxwidth = datawidth.max(0).pmax(namewidth) end len = maxwidth[:*,nil] - datawidth + datalen table_in = "<table>" header = "<thead><tr>" + [namelist, maxwidth.to_a].transpose.map{|name, len| "<th>#{name.to_s.ljust(len-__strwidth__(name.to_s)+name.to_s.length)}</th>" }.join() + "</tr></thead>" body_in = "<tbody>" ary = [table_in, header, body_in] if datalen.size > 0 datastr[:i,nil].each_with_addr do |blk, i| list = blk.flatten.to_a ary << "<tr>" + list.map.with_index {|value, j| "<td>#{value.ljust(len[i,j])}</td>" }.join() + "</tr>" end end ary << "</tbody>" ary << "</table>" return "DataFrame: row#=#{@row_number}: \n" + ary.join("\n") end end
# File lib/carray-dataframe/dataframe.rb, line 927 def to_s return ascii_table end
# File lib/carray-dataframe/io.rb, line 88 def to_sql (tablename) if @column_names.any?{ |s| s =~ /[\. \-]/ } columns = {} each_column_name do |name| name2 = name.gsub(/[\. \-]/, '_') columns[name2] = column(name) end df = CADataFrame.new(columns) return df.to_sqlite3(database: ":memory:", table: tablename) else return to_sqlite3(database: ":memory:", table: tablename) end end
# File lib/carray-dataframe/io.rb, line 84 def to_sqlite3 (**args) self.to_ca.to_sqlite3(**args) end
# File lib/carray-dataframe/converter.rb, line 75 def to_xlsx (filename, sheet_name: 'Sheet1', with_row_index: false, &block) require "caxlsx" xl = Axlsx::Package.new xl.use_shared_strings = true sheet = xl.workbook.add_worksheet(name: sheet_name) df = self.to_df.objectify.unmask("=NA()") if with_row_index sheet.add_row([""] + column_names) df.each_row_with_row_index(with: Array) do |list, i| sheet.add_row([i] + list) end else sheet.add_row(column_names) df.each_row(with: Array) do |list| sheet.add_row(list) end end if block_given? yield sheet end xl.serialize(filename) end
# File lib/carray-dataframe/dataframe.rb, line 715 def transpose (columns: nil) if columns columns = columns.map(&:to_s) else if @row_index columns = @row_index.convert(:object) {|v| v.to_s } else columns = CArray.object(@row_number).seq("a",:succ) end end return CADataFrame.new(ca.transpose, index: @column_names.to_ca, columns: columns) end
# File lib/carray-dataframe/dataframe.rb, line 642 def unmask (value = nil) return to_df.unmask!(value) end
Maintenance
# File lib/carray-dataframe/dataframe.rb, line 635 def unmask! (value = nil) each_column_name do |name| column(name).unmask(value) end return self end
# File lib/carray-dataframe/dataframe.rb, line 553 def vacant_copy new_columns = {} each_column_name do |key| new_columns[key] = CArray.object(0) end return CADataFrame.new(new_columns) end
# File lib/carray-dataframe/dataframe.rb, line 406 def where (mask, *args) mask.column_names.each do |key| if has_column?(key) case args.size when 1 column(key)[mask.column(key).boolean.not] = args[0] when 2 column(key)[mask.column(key).boolean.not] = args[0] column(key)[mask.column(key).boolean] = args[1] end end end end
Private Instance Methods
# File lib/carray-dataframe/dataframe.rb, line 813 def __obj_to_string__ (obj) case obj when Float "%.6g" % obj when nil "nil" else obj.to_s end end
# File lib/carray-dataframe/dataframe.rb, line 824 def __strwidth__ (string) if string.ascii_only? return string.length else return string.each_char.inject(0){|s,c| s += c.bytesize > 1 ? 2 : 1 } end end
# File lib/carray-dataframe/dataframe.rb, line 170 def array_to_columns (array) new_columns = {} case array.first when CArray if @column_names.size != data.size raise "mismatch between 'columns' and table columns" end @column_names.each_with_index do |key, k| column = array[k] column = column.flatten unless column.rank == 1 new_columns[key.to_s] = column end when Array table = array.transpose @column_names.each_with_index do |key, k| new_columns[key] = table[k].to_ca end else raise "invalid array content for CADataFrame" end return new_columns end
# File lib/carray-dataframe/dataframe.rb, line 1018 def cmp (method, *argv) return CADataFrame.new(ca.send(method,*argv), columns: @column_names) end
# File lib/carray-dataframe/dataframe.rb, line 137 def columns_to_columns (columns) new_columns = {} row_number = [] columns.each_with_index do |(key, col), k| case col when CArray column = col column = column.flatten unless column.rank == 1 when Array column = col.to_ca unless column.rank == 1 list = col.clone column = CArray.object(list.size).convert { list.shift } end else begin column = col.to_ca column = column.flatten unless column.rank == 1 rescue raise "#{k}-th column can't be converted to CArray" end end if key == "" @row_index = column else new_columns[key.to_s] = column end end return new_columns end
# File lib/carray-dataframe/dataframe.rb, line 259 def set_column (spec, col) case spec when Integer return @column_data[@column_names[spec]] = col when String, Symbol return @column_data[spec.to_s] = col else raise "invalid column specifier" end end
# File lib/carray-dataframe/dataframe.rb, line 127 def table_to_columns (table) new_columns = {} @column_names.each_with_index do |name, i| new_columns[name.to_s] = table[nil,i].to_ca end return new_columns end