class Spark::Serializer::AutoBatched

AutoBatched serializator

Batch size is computed automatically. Simillar to Python's AutoBatchedSerializer.

Constants

MAX_RATIO

Public Class Methods

new(serializer, best_size=65536) click to toggle source
# File lib/spark/serializer/auto_batched.rb, line 12
def initialize(serializer, best_size=65536)
  @serializer = serializer
  @best_size = best_size.to_i

  error('Batch size must be greater than 1') if @best_size < 2
end

Public Instance Methods

batched?() click to toggle source
# File lib/spark/serializer/auto_batched.rb, line 19
def batched?
  true
end
dump_to_io(data, io) click to toggle source
# File lib/spark/serializer/auto_batched.rb, line 30
def dump_to_io(data, io)
  check_each(data)

  # Only Array have .slice
  data = data.to_a

  index = 0
  batch = 2
  max = @best_size * MAX_RATIO

  loop do
    chunk = data.slice(index, batch)
    if chunk.nil? || chunk.empty?
      break
    end

    serialized = @serializer.dump(chunk)
    io.write_string(serialized)

    index += batch

    size = serialized.bytesize
    if size < @best_size
      batch *= 2
    elsif size > max && batch > 1
      batch /= 2
    end
  end

  io.flush
end
name() click to toggle source
# File lib/spark/serializer/auto_batched.rb, line 26
def name
  "AutoBatched(#{@best_size})"
end
unbatch!() click to toggle source
# File lib/spark/serializer/auto_batched.rb, line 23
def unbatch!
end