module SolveBio::Tabulate
Constants
- DataRow
- FORMAT_DEFAULTS
- GRID_LINE
- INVISIBILE_CODES
- INVTYPES
- Line
- PIPE_DATAROW
- SIMPLE_DATAROW
- SIMPLE_LINE
- TABLE_FORMATS
- TYPES
- TableFormat
Public Class Methods
# File lib/solvebio/tabulate.rb, line 7 def self.tty_cols cols = (ENV['COLUMNS'].to_i || 80 rescue 80) return cols > 0 ? cols : 80 end
Public Instance Methods
Return a segment of a horizontal line with optional colons which indicate column's alignment (as in `pipe` output format).
# File lib/solvebio/tabulate.rb, line 327 def _line_segment_with_colons(linefmt, align, colwidth) fill = linefmt.hline w = colwidth if ['right', 'decimal'].member?(align) return (fill[0] * (w - 1)) + ":" elsif align == "center" return ":" + (fill[0] * (w - 2)) + ":" elsif align == "left" return ":" + (fill[0] * (w - 1)) else return fill[0] * w end end
The least generic type, one of NilClass, Fixnum
, Float, or String
. _type(nil) => NilClass _type(“foo”) => String
_type(“1”) => Fixnum
_type(“x1b[31m42x1b[0m”) => Fixnum
# File lib/solvebio/tabulate.rb, line 126 def _type(obj, has_invisible=true) obj = obj.strip_invisible if obj.kind_of?(String) and has_invisible if obj.nil? return NilClass elsif obj.kind_of?(Fixnum) or obj.int? return Fixnum elsif obj.kind_of?(Float) or obj.number? return Float else return String end end
- string
-
-> [padded_string]
align_column( ["12.345", "-1234.5", "1.23", "1234.5", "1e+234", "1.0e234"], "decimal") => [' 12.345 ', '-1234.5 ', ' 1.23 ', ' 1234.5 ', ' 1e+234 ', ' 1.0e234']
# File lib/solvebio/tabulate.rb, line 148 def align_column(strings, alignment, minwidth=0, has_invisible=true) if alignment == "right" strings = strings.map{|s| s.to_s.strip} padfn = :padleft elsif alignment == 'center' strings = strings.map{|s| s.to_s.strip} padfn = :padboth elsif alignment == 'decimal' decimals = strings.map{|s| s.to_s.afterpoint} maxdecimals = decimals.max zipped = strings.zip(decimals) strings = zipped.map{|s, decs| s.to_s + " " * ((maxdecimals - decs)) } padfn = :padleft else strings = strings.map{|s| s.to_s.strip} padfn = :padright end if has_invisible width_fn = :visible_width else width_fn = :size end maxwidth = [strings.map{|s| s.send(width_fn)}.max, minwidth].max strings.map{|s| s.send(padfn, maxwidth, has_invisible) } end
# File lib/solvebio/tabulate.rb, line 231 def align_header(header, alignment, width) if alignment == "left" return header.padright(width) elsif alignment == "center" return header.padboth(width) else return header.padleft(width) end end
Return a string which represents a horizontal line.
# File lib/solvebio/tabulate.rb, line 319 def build_line(colwidths, padding, first, fill, sep, last) cells = colwidths.map{|w| fill * (w + 2 * padding)} return build_row(cells, 0, first, sep, last) end
Return a string which represents a row of data cells.
# File lib/solvebio/tabulate.rb, line 297 def build_row(cells, padding, first, sep, last) pad = ' ' * padding padded_cells = cells.map{|cell| pad + cell + pad } rendered_cells = (first + padded_cells.join(sep) + last).rstrip # Enforce that we don't wrap lines by setting a max # limit on row width which is equal to TTY_COLS (see printing) if rendered_cells.size > Tabulate.tty_cols if not cells[-1].end_with?(' ') and not cells[-1].end_with?('-') terminating_str = ' ... ' else terminating_str = '' end prefix = rendered_cells[0..Tabulate.tty_cols - terminating_str.size - 2] rendered_cells = "%s%s%s" % [prefix, terminating_str, last] end return rendered_cells end
The least generic type all column values are convertible to.
column_type(["1", "2"]) => Fixnum column_type(["1", "2.3"]) => Float column_type(["1", "2.3", "four"]) => String column_type(["four", '\u043f\u044f\u0442\u044c']) => String column_type([nil, "brux"]) => String column_type([1, 2, nil]) => Fixnum
# File lib/solvebio/tabulate.rb, line 193 def column_type(strings, has_invisible=true) types = strings.map{|s| _type(s, has_invisible)} # require 'trepanning'; debugger return types.reduce(Fixnum){ |t, result| more_generic(result, t) } end
Format a value accoding to its type.
Unicode is supported: >>> hrow = ["\u0431\u0443\u043a\u0432\u0430", "\u0446\u0438\u0444\u0440\u0430"] tbl = [["\u0430\u0437", 2], ["\u0431\u0443\u043a\u0438", 4]] expected = "\\u0431\\u0443\\u043a\\u0432\\u0430 \n \\u0446\\u0438\\u0444\\u0440\\u0430\\n-------\n -------\\n\\u0430\\u0437 \n 2\\n\\u0431\\u0443\\u043a\\u0438 4' tabulate(tbl, hrow) => good_result true
# File lib/solvebio/tabulate.rb, line 216 def format(val, valtype, floatfmt, missingval="") if val.nil? return missingval end if [Fixnum, String, Fixnum].member?(valtype) return "%s" % val.to_s elsif valtype.kind_of?(Float) return "%#{floatfmt}" % Float(val) else return "%s" % val end end
Produce a plain-text representation of the table.
# File lib/solvebio/tabulate.rb, line 343 def format_table(fmt, headers, rows, colwidths, colaligns) lines = [] hidden = headers ? fmt.with_header_hide : fmt.without_header_hide pad = fmt.padding || 0 datarow = fmt.datarow ? fmt.datarow : SIMPLE_DATAROW headerrow = fmt.headerrow ? fmt.headerrow : fmt.datarow if fmt.lineabove and hidden and hidden.member?("lineabove") lines << build_line(colwidths, pad, *fmt.lineabove) end unless headers.empty? lines << build_row(headers, pad, headerrow.start, headerrow.sep, headerrow.last) end if fmt.linebelowheader and not hidden.member?("linebelowheader") first, _, sep, last = fmt.linebelowheader if fmt.usecolons segs = [ colwidths.zip(colaligns).map do |w, a| _line_segment_with_colons(fmt.linebelowheader, a, w + 2 * pad) end ] lines << build_row(segs, 0, first, sep, last) else lines << build_line(colwidths, pad, fmt.linebelowheader.start, fmt.linebelowheader.hline, fmt.linebelowheader.sep, fmt.linebelowheader.last) end end if rows and fmt.linebetweenrows and hidden.member?('linebetweenrows') # initial rows with a line below rows[1..-1].each do |row| lines << build_row(row, pad, fmt.datarow.start, fmt.datarow.sep, fmt.datarow.last) lines << build_line(colwidths, pad, fmt.linebetweenrows.start, fmt.linebelowheader.hline, fmt.linebetweenrows.sep, fmt.linebetweenrows.last) end # the last row without a line below lines << build_row(rows[-1], pad, datarow.start, datarow.sep, datarow.last) else rows.each do |row| lines << build_row(row, pad, datarow.start, datarow.sep, datarow.last) if fmt.linebelow and hidden.member?('linebelow') lines << build_line(colwidths, pad, fmt.linebelow.start, fmt.linebelowheader.hline, fmt.linebelow.sep, fmt.linebelow.last) end end end return lines.join("\n") end
# File lib/solvebio/tabulate.rb, line 179 def more_generic(type1, type2) moregeneric = [TYPES[type1] || 4, TYPES[type2] || 4].max return INVTYPES[moregeneric] end
Transform a supported data type to an Array of Arrays, and an Array of headers.
Supported tabular data types: * Array-of-Arrays or another Enumerable of Enumerables * Hash of Enumerables The first row can be used as headers if headers="firstrow", column indices can be used as headers if headers="keys".
# File lib/solvebio/tabulate.rb, line 254 def normalize_tabular_data(tabular_data, headers) if tabular_data.respond_to?(:keys) and tabular_data.respond_to?(:values) # likely a Hash keys = tabular_data.keys ## FIXME: what's different in the Python code? # columns have to be transposed # rows = list(izip_longest(*tabular_data.values())) # rows = vals[0].zip(*vals[1..-1]) rows = tabular_data.values if headers == "keys" # headers should be strings headers = keys.map{|k| k.to_s} end elsif tabular_data.kind_of?(Enumerable) # Likely an Enumerable of Enumerables rows = tabular_data.to_a if headers == "keys" and not rows.empty? # keys are column indices headers = (0..rows[0]).map {|i| i.to_s} end else raise(ValueError, "tabular data doesn't appear to be a Hash" + " or Array") end # take headers from the first row if necessary if headers == "firstrow" and not rows.empty? headers = rows[0].map{|row| [_text_type(row)]} rows.shift end # pad with empty headers for initial columns if necessary if not headers.empty? and not rows.empty? nhs = headers.size ncols = rows[0].size if nhs < ncols headers = [''] * (ncols - nhs) + headers end end return rows, headers end
Simulate Python's multi-parameter zip function. Ruby's zip function, like Perl's, expects each arg to have dimension 2.
# File lib/solvebio/tabulate.rb, line 96 def python_zip(args) result = args.first.reduce([]){|r, i| r << []} args.each_with_index do |ary, i| ary.each_with_index {|v, j| result[j][i] = v} end result end
# File lib/solvebio/tabulate.rb, line 104 def simple_separated_format(separator) # FIXME? python code hard-codes separator = "\n" below. return TableFormat .new( :lineabove => nil, :linebelowheader => nil, :linebetweenrows => nil, :linebelow => nil, :headerrow => nil, :datarow => DataRow.new('', separator, ''), :padding => 0, :usecolons => false, :with_header_hide => [], :without_header_hide => [], ) end
Construct a simple TableFormat
with columns separated by a separator.
tsv = simple_separated_format("\t") tabulate([["foo", 1], ["spam", 23]], [], true, tsv) => "foo 1\nspam 23"
# File lib/solvebio/tabulate.rb, line 410 def tabulate(tabular_data, headers=[], aligns=[], sort=true, tablefmt=TABLE_FORMATS[:orgmode], floatfmt="g", missingval='') tabular_data = tabular_data.sort_by{|x| x[0]} if sort list_of_lists, headers = normalize_tabular_data(tabular_data, headers) # optimization: look for ANSI control codes once, # enable smart width functions only if a control code is found plain_rows = [headers.map{|h| h.to_s}.join("\t")] row_text = list_of_lists.map{|row| row.map{|r| r.to_s}.join("\t") } plain_rows += row_text plain_text = plain_rows.join("\n") has_invisible = INVISIBILE_CODES.match(plain_text) if has_invisible width_fn = :visible_width else width_fn = :size end # format rows and columns, convert numeric values to strings if list_of_lists.size == 1 cols = [[list_of_lists[0][0]], [list_of_lists[0][1]]] else cols = list_of_lists[0].zip(*list_of_lists[1..-1]) end coltypes = cols.map{|c| column_type(c)} cols = cols.zip(coltypes).map do |c, ct| c.map{|v| format(v, ct, floatfmt, missingval)} end # align columns if aligns.empty? # dynamic alignment by col type aligns = coltypes.map do |ct| [Fixnum, Float].member?(ct) ? 'decimal' : 'left' end end minwidths = if headers.empty? then [0] * cols.size else headers.map{|h| h.send(width_fn) + 2} end cols = cols.zip(aligns, minwidths).map do |c, a, minw| align_column(c, a, minw, has_invisible) end if headers.empty? minwidths = cols.map{|c| c[0].send(width_fn)} else # align headers and add headers minwidths = minwidths.zip(cols).map{|minw, c| [minw, c ? c[0].send(width_fn) : 0].max} headers = headers.zip(aligns, minwidths).map{|h, a, minw| align_header(h, a, minw)} end rows = python_zip(cols) tablefmt = TABLE_FORMATS[:orgmode] unless tablefmt.kind_of?(TableFormat) # make sure values don't have newlines or tabs in them rows.each do |r| r.each_with_index do |c, i| r[i] = c.gsub("\n", '').gsub("\t", '').gsub("\r", '') end end return format_table(tablefmt, headers, rows, minwidths, aligns) end