module Charty::Statistics

Public Class Methods

bootstrap(vector, n_boot: 2000, func: :mean, units: nil, random: nil) click to toggle source
# File lib/charty/statistics.rb, line 38
def self.bootstrap(vector, n_boot: 2000, func: :mean, units: nil, random: nil)
  n = vector.size
  random = Charty::Plotters::RandomSupport.check_random(random)
  func = Charty::Plotters::EstimationSupport.check_estimator(func)

  if units
    return structured_bootstrap(vector, n_boot, units, func, random)
  end

  if defined?(Pandas::Series) || defined?(Numpy::NDArray)
    boot_dist = bootstrap_optimized_for_pycall(vector, n_boot, random, func)
    return boot_dist if boot_dist
  end

  boot_dist = Array.new(n_boot) do |i|
    resampler = Array.new(n) { random.rand(n) }

    w ||= vector.values_at(*resampler)

    case func
    when :mean
      mean(w)
    end
  end

  boot_dist
end
bootstrap_ci(*vectors, width, n_boot: 2000, func: :mean, units: nil, random: nil) click to toggle source
# File lib/charty/statistics.rb, line 95
def self.bootstrap_ci(*vectors, width, n_boot: 2000, func: :mean, units: nil, random: nil)
  boot = bootstrap(*vectors, n_boot: n_boot, func: func, units: units, random: random)
  q = [50 - width / 2, 50 + width / 2]
  if boot.respond_to?(:percentile)
    boot.percentile(q)
  else
    percentile(boot, q)
  end
end
histogram(ary, *args, **kwargs) click to toggle source
# File lib/charty/statistics.rb, line 14
def self.histogram(ary, *args, **kwargs)
  ary.histogram(*args, **kwargs)
end
mean(enum) click to toggle source
# File lib/charty/statistics.rb, line 6
def self.mean(enum)
  enum.mean
end
percentile(a, q) click to toggle source

TODO: optimize with introselect algorithm

# File lib/charty/statistics.rb, line 106
def self.percentile(a, q)
  return mean(a) if a.size == 0

  a = a.sort
  n = a.size
  q.map do |x|
    x = (n-1) * (x / 100.0)
    i = x.floor
    if i == x
      a[i]
    else
      t = x - i
      (1-t)*a[i] + t*a[i+1]
    end
  end
end
stdev(enum, population: false) click to toggle source
# File lib/charty/statistics.rb, line 10
def self.stdev(enum, population: false)
  enum.stdev(population: population)
end

Private Class Methods

bootstrap_optimized_for_pycall(vector, n_boot, random, func) click to toggle source
# File lib/charty/statistics.rb, line 66
                     def self.bootstrap_optimized_for_pycall(vector, n_boot, random, func)
  case
  when vector.is_a?(Charty::Vector)
    bootstrap_optimized_for_pycall(vector.data, n_boot, random, func)

  when defined?(Pandas::Series) && vector.is_a?(Pandas::Series) || vector.is_a?(Numpy::NDArray)
    # numpy is also available when pandas is available
    n = vector.size
    resampler = Numpy.empty(n, dtype: Numpy.intp)
    Array.new(n_boot) do |i|
      # TODO: Use Numo and MemoryView to reduce execution time
      # resampler = Numo::Int64.new(n).rand(n)
      # w = Numpy.take(vector, resampler)
      n.times {|i| resampler[i] = random.rand(n) }
      w = vector.take(resampler)

      case func
      when :mean
        w.mean
      end
    end
  end
end
structured_bootstrap(vector, n_boot, units, func, random) click to toggle source
# File lib/charty/statistics.rb, line 90
                     def self.structured_bootstrap(vector, n_boot, units, func, random)
  raise NotImplementedError,
    "structured bootstrapping has not been supported yet"
end