class FlatKit::Command::Stats
Public Class Methods
description()
click to toggle source
# File lib/flat_kit/command/stats.rb, line 9 def self.description "Collect and report stats on the inputfile" end
name()
click to toggle source
# File lib/flat_kit/command/stats.rb, line 5 def self.name "stats" end
parser()
click to toggle source
# File lib/flat_kit/command/stats.rb, line 13 def self.parser ::Optimist::Parser.new do banner "#{Sort.description}" banner "" banner <<~BANNER Given an input file collect basic statistics. The statistics can vary based upon the datatype of the field. Numeric fields will report the basic count, min, max, mean, standard deviation and sum. Non-numeric fields that are comparable, like dates, will report count, min and max. Other non-numeric fields will only report the count. Adding --cardinality will report the count, and frequency of distinct values in the result. This will allow for reporting the median value. The fields upon which stats are collected may be selected with the --fields parameter. By default statistics are collected on all fields. The flatfile type(s) will be automatically determined by the file name. The output can be dumped as a CSV, JSON or a a formated ascii table. BANNER banner <<~USAGE Usage: fk stats --everything file.json fk stats --select surname,given_name file.csv fk stats --select surname,given_name --output-format json file.csv > stats.json fk stats --select field1,field2 --output-format json input.csv fk stats --select field1 file.json.gz -o stats.csv gunzip -c file.json.gz | fk stats --input-format json --output-format text USAGE banner <<~OPTIONS Options: OPTIONS opt :output, "Send the output to the given path instead of standard out.", default: "<stdout>" opt :input_format, "Input format, csv or json", default: "auto", short: :none opt :output_format, "Output format, csv or json", default: "auto", short: :none opt :select, "The comma separted list of field(s) to report stats on", required: false, type: :string opt :everything, "Show all statistics that are possible", default: false opt :cardinality, "Show the cardinality of the fields, this requires additional memory", default: false end end
Public Instance Methods
call()
click to toggle source
# File lib/flat_kit/command/stats.rb, line 89 def call @stats.call end
parse()
click to toggle source
# File lib/flat_kit/command/stats.rb, line 66 def parse parser = self.class.parser ::Optimist::with_standard_exception_handling(parser) do begin opts = parser.parse(argv) fields = ::FlatKit::Stats::AllFields fields = CSV.parse_line(opts[:select]) if opts[:select] stats = [FieldStats::CORE_STATS] stats << FieldStats::CARDINALITY_STATS if opts[:cardinality] || opts[:everything] paths = parser.leftovers raise ::Optimist::CommandlineError, "1 and only 1 input file is allowed" if paths.size > 1 path = paths.first || "-" # default to stdin @stats = ::FlatKit::Stats.new(input: path, input_fallback: opts[:input_format], output: opts[:output], output_fallback: opts[:output_format], fields_to_stat: fields, stats_to_collect: stats) rescue ::FlatKit::Error => e raise ::Optimist::CommandlineError, e.message end end end