class CBOR::Packer

Public Class Methods

from_item(item) click to toggle source
# File lib/cbor-packed.rb, line 12
def self.from_item(item)
  count = Hash.new(0)
  item.cbor_visit do |o|
    (count[o] += 1) == 1
    # if the count gets > 1, we can stop visiting, so we return false in the block
  end
  # pp count
  # count is now a Hash with all data items as keys and the number of times they occur as values

  # choose those matches that are occurring > 1, make first rough estimate of saving
  good_count = count.select {|k, v| v > 1}.map {|k, v| [k, v, l = k.to_cbor.length,
                                                        (v-1)*(l-1)]}
  # good_count is now an array of [k, v, length, savings] tuples

  # select those that potentially have savings (> 0) and sort by best saving first
  better_count = good_count.to_a.select {|a| a[3] > 0}.sort_by {|a| -a[3]}
  # pp better_count

  # now: take the best out???; re-visit that reducing by n; re-sort and filter???
  # sort by descending number of references we'll get -- the higher reference counts go first
  match_array = better_count.sort_by {|a| -a[1]}.map {|a| a[0]}
  # pp match_array

  # XXX the below needs to be done with arrays and (hard!) maps as well
  # do this on the reverse to find common suffixes
  # select all strings (ignoring reference counts) and sort them
  strings = count.select {|k, v| String === k}.map(&:first).sort
  if strings != []
    string_common = strings[1..-1].zip(strings).map{ |y, x|
      l = x.chars.zip(y.chars).take_while{|a, b| a == b}.length # should be bytes
      [x, l]
    } << [strings[-1], 0]
    # string_common: list of strings/counts of number of /bytes/ matching with next
    # pp string_common
  end
  translate = {}
  prefixes = []
  if string_common
  prefix_stack = [[0, false]] # sentinel
  pos = 0                     # mirror prefix_stack[-1][0]
  tag_no = REF_TAG
  string_common.each do |s, l|
    if l > pos + 2 + $compression_hack
      if t = prefix_stack[-1][1] # if we still have a prefix left
        prefixes << CBOR::Tagged.new(t, s[pos...l])
      else
        prefixes << s[0...l]
      end
      prefix_stack << [l, tag_no]
      pos = l
      tag_no += 1
      tag_no = 225 if tag_no == REF_TAG+1
      tag_no = 28704 if tag_no == 256
    end
    if t = prefix_stack[-1][1] # if we still have a viable prefix left
      translate[s] = CBOR::Tagged.new(t, s[pos..-1])
    end
    # pop the prefix stack
    while l < pos
      prefix_stack.pop
      pos = prefix_stack[-1][0]
    end
    # pp prefix_stack
    # pp pos
  end
    
  end
  # pp translate
  # XXX test replacing match_array here
  match_array = match_array.map do |v|
    if r = translate[v]
      # puts "*** replacing #{v.inspect} by #{r.inspect}"
      r
    else
      v
    end
  end
  # pp [:PREFIXES, prefixes]
  # pp translate
  new(match_array, prefixes, [], translate)
end
new(match_array, prefix_array, suffix_array, translate) click to toggle source
# File lib/cbor-packed.rb, line 93
def initialize(match_array, prefix_array, suffix_array, translate)
  @hit = translate
  # XXX: make sure we don't overwrite the existing prefix compression values!
  # (this should really be done downwards, ...) 16 x 1, 160 x 2, (512-48) x 3
  match_array[0...16].each_with_index do |o, i|
    @hit[o] = CBOR::Simple.new(i)
  end
  # if m = match_array[16...128]
  #   m.each_with_index do |o, i|
  #     @hit[o] = CBOR::Simple.new(i + 128)
  #   end
  # end
  if m = match_array[16..-1]
    m.each_with_index do |o, i|
      @hit[o] = CBOR::Tagged.new(REF_TAG, (i >> 1) ^ -(i & 1))
    end
  end
  # add one round of transitive matching
  @hit.each do |k, v|
    if r = @hit[v]
      @hit[k] = r
    end
  end
  # p @hit
  @match_array = match_array
  # @prefix = {} -- do that later
  @prefix_array = prefix_array
  @suffix_array = suffix_array
end

Public Instance Methods

has(o) click to toggle source
# File lib/cbor-packed.rb, line 122
def has(o)
  @hit[o]
end
pack(pa) click to toggle source
# File lib/cbor-packed.rb, line 125
def pack(pa)
  # Don't forget to pack the match_array!
  CBOR::Tagged.new(PACKED_TAG, [@match_array, @prefix_array, @suffix_array, pa])
end