class SamplingHash::Hash

Public Class Methods

new(size, seed = size, sampler = nil, xxhash = XXhash::XXhashInternal::StreamingHash64.new(seed)) click to toggle source
# File lib/sampling-hash/hash.rb, line 3
def initialize(size, seed = size, sampler = nil, xxhash = XXhash::XXhashInternal::StreamingHash64.new(seed))
  @sampler = sampler || Sampler.new(size)
  @xxhash = xxhash
  
  # Position in data stream.
  @position = 0

  # Current sample.
  @current_sample        = nil # The data.
  @current_sample_offset = 0   # The offset (within the stream).
  @current_sample_size   = 0   # The sample size.
  @next                  = 0   # The next sample index.

  # Start.
  next_sample
end

Public Instance Methods

digest() click to toggle source
# File lib/sampling-hash/hash.rb, line 30
def digest
  @xxhash.digest
end
update(chunk) click to toggle source
# File lib/sampling-hash/hash.rb, line 20
def update(chunk)
  pos = 0
  while pos < chunk.size
    len = chunk.size - pos
    used = advance(chunk, pos, len)
    @position += used
    pos += used
  end
end

Private Instance Methods

advance(chunk, pos, len) click to toggle source
# File lib/sampling-hash/hash.rb, line 36
def advance(chunk, pos, len)
  if in_sample?
    # Use some bytes.
    msb = missing_sample_bytes
    if msb > len
      update_sample chunk[pos..(pos + len - 1)]
      len
    else
      finish_sample chunk[pos..(pos + msb - 1)]
      msb
    end
  elsif samples_left?
    # Discard some bytes until the next sample starts.
    mgb = missing_gap_bytes
    if mgb > len
      len
    else
      mgb
    end
  else
    # Discard the rest.
    len
  end
end
finish_sample(data) click to toggle source
# File lib/sampling-hash/hash.rb, line 81
def finish_sample(data)
  @current_sample += data
  @xxhash.update(@current_sample)
  next_sample
end
in_sample?() click to toggle source
# File lib/sampling-hash/hash.rb, line 61
def in_sample?
  samples_left? && @position >= @current_sample_offset && @position < @current_sample_offset + @current_sample_size
end
missing_gap_bytes() click to toggle source
# File lib/sampling-hash/hash.rb, line 73
def missing_gap_bytes
  @current_sample_offset - @position
end
missing_sample_bytes() click to toggle source
# File lib/sampling-hash/hash.rb, line 69
def missing_sample_bytes
  @current_sample_size - @current_sample.size
end
next_sample() click to toggle source
# File lib/sampling-hash/hash.rb, line 87
def next_sample
  if @next < @sampler.samples.size
    @current_sample = String.new
    @current_sample_offset, @current_sample_size = @sampler.samples[@next]
    @next += 1
  else
    @current_sample = nil
  end
end
samples_left?() click to toggle source
# File lib/sampling-hash/hash.rb, line 65
def samples_left?
  !!@current_sample
end
update_sample(data) click to toggle source
# File lib/sampling-hash/hash.rb, line 77
def update_sample(data)
  @current_sample += data
end