class IdPack::UuidPacker

Public Instance Methods

alphanum_compress(arr, alphanum_string, order) click to toggle source

compress UUIDs array

# File lib/id_pack/uuid_packer.rb, line 56
def alphanum_compress(arr, alphanum_string, order)
  # length of UUID in bits
  uuid_bit_length = 128

  # compress without delta
  nresult        = ''
  alphanum_array = alphanum_to_array alphanum_string, false
  pow            = alphanum_array[0][2]
  lowhi          = alphanum_array[0][0]

  # first bit equal 0 => compress without delta
  achr = 0
  rest = 1

  # loop by UUIDs
  arr.each do |item|

    # remove '-' characters from UUID
    curr = item.delete('-').to_i(16)

    # get base binary code (BBC)
    achr += (curr << rest)

    # look for number of bits in BBC
    rest += uuid_bit_length

    # create symbols to compressed string
    until rest < pow

      power_c  = pow - 1
      code     = (
        (achr & (2**power_c - 1)) + 2**power_c
      ).to_s(2).reverse.to_i(2) >> 1

      power_c += 1 if code >= lowhi

      # decrease number of bits in BBC
      rest -= power_c

      # get reverse bits from the end of BBC to create new symbol
      code = (
        (achr & (2**power_c - 1)) + 2**power_c
      ).to_s(2).reverse.to_i(2) >> 1

      # add new symbol
      nresult += alphanum_array.assoc(code)[1]

      # remove used bits from BBC
      achr >>= power_c
    end
  end

  # check if we have tail of BBC
  if rest.positive?
    code   = ((achr & (2**rest - 1)) + 2**rest).to_s(2).reverse.to_i(2) >> 1
    code <<= pow - rest - 1
    code <<= 1 if code >= lowhi

    # add tail symbol
    nresult += alphanum_array.assoc(code)[1]
  end

  # compress with delta
  arr = arr.sort

  # first character is delimiter => compress with delta : delimiter (last
  # character in alphabet) always has code of all ones
  dresult        = alphanum_array[-1][1]
  alphanum_array = alphanum_to_array alphanum_string, true
  pow            = alphanum_array[0][2]

  if pow > 1
    lowhi = alphanum_array[0][0]
    prev  = 0

    # loop by UUIDs
    arr.each do |item|

      # remove '-' characters from UUID
      curr = item.delete('-').to_i(16)

      # calculate delta
      curr   -= prev
      prev    = item.delete('-').to_i(16)
      binlog  = bin_pow curr
      binlog  = uuid_bit_length if binlog >= uuid_bit_length - pow

      # get BBC for only current UUID
      achr = curr

      # look for number of bits in BBC (also for only current UUID)
      rest = binlog

      # create symbols to compressed string
      until rest < pow
        power_c  = pow - 1
        code     = (
          (achr & (2**power_c - 1)) +
          2**power_c
        ).to_s(2).reverse.to_i(2) >> 1

        power_c += 1 if code >= lowhi

        # decrease number of bits in BBC
        rest -= power_c

        # get reverse bits from the end of BBC to create new symbol
        code = (
          (achr & (2**power_c - 1)) +
          2**power_c
        ).to_s(2).reverse.to_i(2) >> 1

        # add new symbol
        dresult += alphanum_array.assoc(code)[1]

        # remove used bits from BBC
        achr >>= power_c
      end

      # check if we have tail of BBC for current UUID
      if rest.positive?
        code   = (
          (achr & (2**rest - 1)) + 2**rest
        ).to_s(2).reverse.to_i(2) >> 1

        code <<= pow - rest - 1
        code <<= 1 if code >= lowhi

        # add tail symbol for current UUID
        dresult += alphanum_array.assoc(code)[1]
      end

      # add delimiter if we use less symbols than for whole UUID
      dresult += alphanum_array[0][1] if binlog < uuid_bit_length
    end
  else
    order = true
  end

  result = nresult

  # get better result or non delta if we need to keep order
  result = dresult if dresult.length < nresult.length && !order
  result
end
alphanum_decompress(str, alphanum_string) click to toggle source

decompress UUIDs array

# File lib/id_pack/uuid_packer.rb, line 203
def alphanum_decompress(str, alphanum_string)
  # length of UUID in bits
  uuid_bit_length = 128

  result = []
  alphanum_array = alphanum_to_array alphanum_string, false

  # check if delta used when compress
  if (
      alphanum_array.rassoc(str[0])[0] &
      (2**(alphanum_array.rassoc(str[0])[2] - 1))
    ) != 0

    # delta used
    alphanum_array = alphanum_to_array alphanum_string, true
    pow            = alphanum_array[0][2]
    lowhi          = alphanum_array[0][0]
    prev           = 0
    item           = 1
    achr           = 0
    rest           = 0

    # loop by symbols of compressed string starting from second (the first is
    # header) to next after last (for BBC length processing after last)
    while item <= str.length

      # we catch delimiter or we get BBC with length equal whole UUID
      if str[item] == alphanum_array[0][1] || rest >= uuid_bit_length

        # if BBC length than we need to look to current symbol one more time
        # if it is delimiter
        item -= 1 if rest >= uuid_bit_length

        # calculate UUID from delta
        achr += prev
        prev = achr

        # transform UUID to hexadecimal
        curr = prev.to_s(16)

        # add first characters if UUID start with 0
        curr = '0' * (uuid_bit_length / 4 - curr.length) + curr

        # add '-' characters from UUID
        curr = [
          curr[0..7],
          curr[8..11],
          curr[12..15],
          curr[16..19],
          curr[20..31],
        ].join('-')

        # add new UUID to array
        result.push curr
        achr = 0
        rest = 0

      # if we become last symbol we need no to symbol processing
      elsif item < str.length

        # reverse symbol code to BBC bits
        code = (
          alphanum_array.rassoc(str[item])[0] + 2**alphanum_array.rassoc(str[item])[2]
        ).to_s(2).reverse.to_i(2) >> 1

        # add bits to BBC
        achr += code << rest

        # look for number of bits in BBC
        rest += pow
        rest -= 1 if code < lowhi

      end

      item += 1

    end

  else
    # delta not used
    achr  = 0
    rest  = 0
    pow   = alphanum_array[0][2]
    lowhi = alphanum_array[0][0]

    # for first bit processing
    frst = true
    item = 0

    # loop by symbols of compressed string
    while item < str.length

      # reverse symbol code to BBC bits
      code =
        (
          alphanum_array.rassoc(str[item])[0] +
          2**alphanum_array.rassoc(str[item])[2]
        )
          .to_s(2).reverse.to_i(2) >> 1

      # add bits to BBC
      achr += code << rest

      # look for number of bits in BBC
      rest += pow
      rest -= 1 if alphanum_array.rassoc(str[item])[0] < lowhi

      # first bit processing
      if frst
        frst = false
        achr >>= 1
        rest -= 1
      end

      # we get BBC with length equal whole UUID
      if rest >= uuid_bit_length

        # calculate number of bits in BBC
        rest -= uuid_bit_length

        # transform UUID to hexadecimal
        curr = (achr & (2**uuid_bit_length - 1)).to_s(16)

        # add first characters if UUID start with 0
        curr = '0' * (uuid_bit_length / 4 - curr.length) + curr

        # add '-' characters from UUID
        curr = [
          curr[0..7],
          curr[8..11],
          curr[12..15],
          curr[16..19],
          curr[20..31],
        ].join('-')

        # add new UUID to array
        result.push curr

        # remove used bits from BBC
        achr >>= uuid_bit_length

      end

      item += 1
    end
  end
  result
end
alphanum_to_array(alphanum_string, del) click to toggle source

transform string of valid characters to useful array (del = true if we need delimiter)

# File lib/id_pack/uuid_packer.rb, line 14
def alphanum_to_array(alphanum_string, del)

  alphanum_array = []
  el = alphanum_string.length

  # max number of bits coding by one character (some characters will be one
  # bit less)
  pow = bin_pow (el - 1)

  # how many characters will be one bit less
  lowhi = 2**pow - el

  # if delimited we can't use last characters
  if del
    el    -= 1
    pow    = bin_pow(el - 1)
    lowhi  = 2**pow - el
    lowhi  = -1 if lowhi.zero?
    # first element include main data about alphabet and delimiter character
    alphanum_array.push [lowhi, alphanum_string[el], pow]
  else
    # first element include main data about alphabet
    lowhi = -1 if lowhi.zero?
    alphanum_array.push [lowhi, '', pow]
  end

  lowhi     = 0 if lowhi == -1
  char_item = 0

  # loop by characters and get code for each one
  until char_item == el
    if char_item < lowhi
      alphanum_array.push [char_item, alphanum_string[char_item], pow - 1]
    else
      alphanum_array.push [lowhi + char_item, alphanum_string[char_item], pow]
    end
    char_item += 1
  end
  alphanum_array
end
bin_pow(num) click to toggle source

calculate bits in number

# File lib/id_pack/uuid_packer.rb, line 6
def bin_pow(num)
  pow = 0
  pow += 1 until (num >> pow).zero?
  pow
end