class Spark::StatCounter

Attributes

count[R]
m2[R]
max[R]
max_value[R]
mean[R]
min[R]
min_value[R]
mu[R]
n[R]

Public Class Methods

new(iterator) click to toggle source
# File lib/spark/stat_counter.rb, line 10
def initialize(iterator)
  @n = 0
  @mu = 0.0
  @m2 = 0.0
  @max = -Float::INFINITY
  @min = Float::INFINITY

  merge(iterator)
end

Public Instance Methods

merge(other) click to toggle source
# File lib/spark/stat_counter.rb, line 20
def merge(other)
  if other.is_a?(Spark::StatCounter)
    merge_stat_counter(other)
  elsif other.respond_to?(:each)
    merge_array(other)
  else
    merge_value(other)
  end

  self
end
sampleStdev()
Alias for: sample_stdev
sampleVariance()
Alias for: sample_variance
sample_stdev() click to toggle source

Return the sample standard deviation of the values, which corrects for bias in estimating the variance by dividing by N-1 instead of N.

# File lib/spark/stat_counter.rb, line 62
def sample_stdev
  Math.sqrt(sample_variance)
end
Also aliased as: sampleStdev
sample_variance() click to toggle source

Return the sample variance, which corrects for bias in estimating the variance by dividing by N-1 instead of N.

# File lib/spark/stat_counter.rb, line 47
def sample_variance
  if @n <= 1
    Float::NAN
  else
    @m2 / (@n - 1)
  end
end
Also aliased as: sampleVariance
stdev() click to toggle source

Return the standard deviation of the values.

# File lib/spark/stat_counter.rb, line 56
def stdev
  Math.sqrt(variance)
end
sum() click to toggle source
# File lib/spark/stat_counter.rb, line 32
def sum
  @n * @mu
end
to_s() click to toggle source
# File lib/spark/stat_counter.rb, line 66
def to_s
  "(count: #{count}, mean: #{mean}, stdev: #{stdev}, max: #{max}, min: #{min})"
end
variance() click to toggle source

Return the variance of the values.

# File lib/spark/stat_counter.rb, line 37
def variance
  if @n == 0
    Float::NAN
  else
    @m2 / @n
  end
end

Private Instance Methods

merge_array(array) click to toggle source
# File lib/spark/stat_counter.rb, line 109
def merge_array(array)
  array.each do |item|
    merge_value(item)
  end
end
merge_stat_counter(other) click to toggle source
# File lib/spark/stat_counter.rb, line 79
def merge_stat_counter(other)
  if other == self
    other = self.deep_copy
  end

  if @n == 0
    @n = other.n
    @mu = other.mu
    @m2 = other.m2
    @max = other.max
    @min = other.min
  elsif other.n != 0
    delta = other.mu - @mu

    if other.n * 10 < @n
      @mu = @mu + (delta * other.n) / (@n + other.n)
    elsif @n * 10 < other.n
      @mu = other.mu - (delta * @n) / (@n + other.n)
    else
      @mu = (@mu * @n + other.mu * other.n) / (@n + other.n)
    end

    @max = [@max, other.max].max
    @min = [@min, other.min].min

    @m2 += other.m2 + (delta * delta * @n * other.n) / (@n + other.n)
    @n += other.n
  end
end
merge_value(value) click to toggle source
# File lib/spark/stat_counter.rb, line 115
def merge_value(value)
  delta = value - @mu
  @n += 1
  @mu += delta / @n
  @m2 += delta * (value - @mu)
  @max = [@max, value].max
  @min = [@min, value].min
end