class Spark::Serializer::AutoBatched
AutoBatched
serializator
Batch size is computed automatically. Simillar to Python's AutoBatchedSerializer.
Constants
- MAX_RATIO
Public Class Methods
new(serializer, best_size=65536)
click to toggle source
# File lib/spark/serializer/auto_batched.rb, line 12 def initialize(serializer, best_size=65536) @serializer = serializer @best_size = best_size.to_i error('Batch size must be greater than 1') if @best_size < 2 end
Public Instance Methods
batched?()
click to toggle source
# File lib/spark/serializer/auto_batched.rb, line 19 def batched? true end
dump_to_io(data, io)
click to toggle source
# File lib/spark/serializer/auto_batched.rb, line 30 def dump_to_io(data, io) check_each(data) # Only Array have .slice data = data.to_a index = 0 batch = 2 max = @best_size * MAX_RATIO loop do chunk = data.slice(index, batch) if chunk.nil? || chunk.empty? break end serialized = @serializer.dump(chunk) io.write_string(serialized) index += batch size = serialized.bytesize if size < @best_size batch *= 2 elsif size > max && batch > 1 batch /= 2 end end io.flush end
name()
click to toggle source
# File lib/spark/serializer/auto_batched.rb, line 26 def name "AutoBatched(#{@best_size})" end
unbatch!()
click to toggle source
# File lib/spark/serializer/auto_batched.rb, line 23 def unbatch! end