class Spark::Command::Histogram


Histogram

Public Instance Methods

lazy_run(iterator, *) click to toggle source
# File lib/spark/command/statistic.rb, line 75
def lazy_run(iterator, *)
  run(iterator)
end
run(iterator, *) click to toggle source
# File lib/spark/command/statistic.rb, line 58
def run(iterator, *)
  counters = Array.new(counter_size) { 0 }
  iterator.each do |item|
    if item.nil? || (item.is_a?(Float) && !item.finite?) || item > max || item < min
      next
    end

    x = bucket_function.call(item)
    if x.nil?
      # next
    else
      counters[x] += 1
    end
  end
  [counters]
end

Private Instance Methods

_bucket_function() click to toggle source
# File lib/spark/command/statistic.rb, line 103
def _bucket_function
  if @even
    fast_bucket_function
  else
    basic_bucket_function
  end
end
basic_bucket_function() click to toggle source

Basic bucket function. Same as right bisect.

# File lib/spark/command/statistic.rb, line 129
def basic_bucket_function
  Proc.new do |item|
    bucket_number = bisect_right(@buckets, item) - 1

    # Counters is @buckets.size - 1
    # [bucket_number, counter_size-1].min

    if bucket_number > counter_size-1
      counter_size-1
    else
      bucket_number
    end
  end
end
bucket_function() click to toggle source

Decide which bucket function to pass. We decide here rather than having a general function so that the decission need only be made once.

# File lib/spark/command/statistic.rb, line 99
def bucket_function
  @bucket_function ||= _bucket_function
end
counter_size() click to toggle source
# File lib/spark/command/statistic.rb, line 89
def counter_size
  @buckets.size-1
end
fast_bucket_function() click to toggle source

Determine the bucket function in constant time. Requires that buckets are evenly spaced

# File lib/spark/command/statistic.rb, line 113
def fast_bucket_function
  Proc.new do |item|
    if item.is_a?(Float) && item.nan?
      nil
    else
      bucket_number = (item - min)/increment
      if bucket_number > counter_size || bucket_number < 0
        nil
      else
        [bucket_number.to_i, counter_size-1].min
      end
    end
  end
end
increment() click to toggle source
# File lib/spark/command/statistic.rb, line 93
def increment
  @buckets[1]-@buckets[0]
end
max() click to toggle source
# File lib/spark/command/statistic.rb, line 85
def max
  @buckets.last
end
min() click to toggle source
# File lib/spark/command/statistic.rb, line 81
def min
  @buckets.first
end