class ByteBoozer2::Cruncher

This class implements ByteBoozer's 2.0 crunching algorithm.

Constants

DECRUNCHER
DECRUNCHER_LENGTH
LEN_LONG_0
LEN_LONG_1
LEN_LONG_2
LEN_LONG_3
LEN_SHORT_0
LEN_SHORT_1
LEN_SHORT_2
LEN_SHORT_3
MAX_OFFSET
MAX_OFFSET_SHORT
MEM_SIZE
NUM_BITS_LONG_0
NUM_BITS_LONG_1
NUM_BITS_LONG_2
NUM_BITS_LONG_3
NUM_BITS_SHORT_0
NUM_BITS_SHORT_1
NUM_BITS_SHORT_2
NUM_BITS_SHORT_3

Attributes

address[R]
result[R]

Public Class Methods

crunch(*args) click to toggle source
# File lib/byteboozer2/cruncher.rb, line 18
def self.crunch(*args)
  new(*args).crunch
end
new(data, options = {}) click to toggle source
# File lib/byteboozer2/cruncher.rb, line 22
def initialize(data, options = {})
  @data       = data
  @executable = options[:executable] || false
  @relocated  = options[:relocated]  || false
  @address    = options[:address]    || 0x0000

  raise ArgumentError unless valid?
end

Public Instance Methods

crunch() click to toggle source
# File lib/byteboozer2/cruncher.rb, line 95
def crunch
  @result if crunch!
end
crunch!() click to toggle source
# File lib/byteboozer2/cruncher.rb, line 31
def crunch!
  @ibuf_size = @data.length - 2

  # Load ibuf and clear context
  @ibuf     = @data[2..]
  @context  = Array.new(@ibuf_size) { new_node }
  @link     = Array.new(@ibuf_size) { 0 }
  @rle_info = Array.new(@ibuf_size) { OpenStruct.new(value: 0, value_after: 0, length: 0) }

  setup_help_structures
  find_matches
  @obuf = Array.new(MEM_SIZE) { 0 }
  margin = write_output

  pack_len = @put
  file_len = @put
  decr_len = 0
  if @executable
    decr_len = DECRUNCHER_LENGTH
    file_len += decr_len + 2
  else
    file_len += 4
  end

  @result = Array.new(file_len) { 0 }

  if @executable
    start_address  = 0x10000 - pack_len
    transf_address = file_len + 0x6ff

    decr_code[0x1f] = transf_address & 0xff # Transfer from...
    decr_code[0x20] = transf_address >> 8
    decr_code[0xbc] = start_address & 0xff # Depack from...
    decr_code[0xbd] = start_address >> 8
    decr_code[0x85] = @data[0] # Depack to...
    decr_code[0x86] = @data[1]
    decr_code[0xca] = @address & 0xff # Jump to...
    decr_code[0xcb] = @address >> 8

    @result[0] = 0x01
    @result[1] = 0x08

    @result[2, decr_len] = decr_code

    @result[2 + decr_len, @put] = @obuf[0, @put]
  else # Not executable...
    # Experimantal decision of start address
    # start_address = 0xfffa - pack_len - 2
    start_address = (@data[1] << 8) | @data[0]
    start_address += (@ibuf_size - pack_len - 2 + margin)

    start_address = @address - pack_len - 2 if @relocated

    @result[0] = start_address & 0xff # Load address
    @result[1] = start_address >> 8
    @result[2] = @data[0] # Depack to address
    @result[3] = @data[1]

    @result[4, @put] = @obuf[0, @put]
  end

  true
end

Private Instance Methods

calculate_cost_of_literal(old_cost, lit_len) click to toggle source
# File lib/byteboozer2/cruncher.rb, line 165
def calculate_cost_of_literal(old_cost, lit_len)
  new_cost = old_cost + 8

  # FIXME, what if lit_len > 255?
  #
  # FIXME, cost model for literals does not work
  # Quick wins on short matches are prioritized before a longer
  # literal run, which in the end results in a worse result
  # Most obvious on files hard to crunch
  case lit_len
  when 1 then new_cost += 1
  when 128 then new_cost += 1
  when 2 then new_cost += 2
  when 4 then new_cost += 2
  when 8 then new_cost += 2
  when 16 then new_cost += 2
  when 32 then new_cost += 2
  when 64 then new_cost += 2
  end

  new_cost
end
calculate_cost_of_match(len, offset) click to toggle source
# File lib/byteboozer2/cruncher.rb, line 188
def calculate_cost_of_match(len, offset)
  cost = 1 # Copy-bit
  cost += cost_of_length(len - 1)
  cost += 2 # num offset bits
  cost += cost_of_offset(offset - 1, len - 1)
  cost
end
cond_long_0(o) click to toggle source
# File lib/byteboozer2/cruncher.rb, line 229
def cond_long_0(o)
  o >= 0 && o < LEN_LONG_0
end
cond_long_1(o) click to toggle source
# File lib/byteboozer2/cruncher.rb, line 233
def cond_long_1(o)
  o >= LEN_LONG_0 && o < LEN_LONG_1
end
cond_long_2(o) click to toggle source
# File lib/byteboozer2/cruncher.rb, line 237
def cond_long_2(o)
  o >= LEN_LONG_1 && o < LEN_LONG_2
end
cond_long_3(o) click to toggle source
# File lib/byteboozer2/cruncher.rb, line 241
def cond_long_3(o)
  o >= LEN_LONG_2 && o < LEN_LONG_3
end
cond_short_0(o) click to toggle source
# File lib/byteboozer2/cruncher.rb, line 213
def cond_short_0(o)
  o >= 0 && o < LEN_SHORT_0
end
cond_short_1(o) click to toggle source
# File lib/byteboozer2/cruncher.rb, line 217
def cond_short_1(o)
  o >= LEN_SHORT_0 && o < LEN_SHORT_1
end
cond_short_2(o) click to toggle source
# File lib/byteboozer2/cruncher.rb, line 221
def cond_short_2(o)
  o >= LEN_SHORT_1 && o < LEN_SHORT_2
end
cond_short_3(o) click to toggle source
# File lib/byteboozer2/cruncher.rb, line 225
def cond_short_3(o)
  o >= LEN_SHORT_2 && o < LEN_SHORT_3
end
cost_of_length(len) click to toggle source
# File lib/byteboozer2/cruncher.rb, line 142
def cost_of_length(len)
  if len == 1
    1
  elsif len >= 2 && len <= 3
    3
  elsif len >= 4 && len <= 7
    5
  elsif len >= 8 && len <= 15
    7
  elsif len >= 16 && len <= 31
    9
  elsif len >= 32 && len <= 63
    11
  elsif len >= 64 && len <= 127
    13
  elsif len >= 128 && len <= 255
    14
  else
    ByteBoozer2.logger.warn "cost_of_length got wrong value: #{len}"
    10_000
  end
end
cost_of_offset(offset, len) click to toggle source
# File lib/byteboozer2/cruncher.rb, line 196
def cost_of_offset(offset, len)
  if len == 1
    return NUM_BITS_SHORT_0 if cond_short_0(offset)
    return NUM_BITS_SHORT_1 if cond_short_1(offset)
    return NUM_BITS_SHORT_2 if cond_short_2(offset)
    return NUM_BITS_SHORT_3 if cond_short_3(offset)
  else
    return NUM_BITS_LONG_0 if cond_long_0(offset)
    return NUM_BITS_LONG_1 if cond_long_1(offset)
    return NUM_BITS_LONG_2 if cond_long_2(offset)
    return NUM_BITS_LONG_3 if cond_long_3(offset)
  end

  ByteBoozer2.logger.warn "cost_of_offset got wrong offset: #{offset}"
  10_000
end
decr_code() click to toggle source
# File lib/byteboozer2/cruncher.rb, line 245
def decr_code
  @decr_code ||= DECRUNCHER.dup
end
find_matches() click to toggle source
# File lib/byteboozer2/cruncher.rb, line 249
def find_matches
  matches = Array.new(256) { OpenStruct.new(length: 0, offset: 0) }

  last_node = new_node

  get = @ibuf_size - 1
  cur = @ibuf[get]

  while get >= 0
    # Clear matches for current position
    matches.each do |match|
      match.length = 0
      match.offset = 0
    end

    cur = (cur << 8) & 0xffff # Table 65536 lookup
    cur |= @ibuf[get - 1] if get.positive?
    scn = @first[cur]
    scn = @link[scn]

    longest_match = 0

    if @rle_info[get].length.zero? # No RLE-match here...
      # Scan until start of file, or max offset
      while get - scn <= MAX_OFFSET && scn.positive? && longest_match < 255
        # OK, we have a match of length 2 or longer, but max 255 or file start
        len = 2
        len += 1 while len < 255 && scn >= len && @ibuf[scn - len] == @ibuf[get - len]

        # Calc offset
        offset = get - scn

        # Store match only if it's the longest so far
        if len > longest_match
          longest_match = len

          # Store the match only if first (= best) of this length
          while len >= 2 && matches[len].length.zero?
            # If len == 2, check against short offset!
            if len > 2 || (len == 2 && offset <= MAX_OFFSET_SHORT)
              matches[len].length = len
              matches[len].offset = get - scn
            end

            len -= 1
          end
        end

        scn = @link[scn] # Table 65535 lookup
      end

      @first[cur] = @link[@first[cur]] # Waste first entry
    else # if RLE-match...
      rle_len = @rle_info[get].length
      rle_val_after = @rle_info[get].value_after

      # First match with self-RLE, which is always one byte shorter than the RLE itself
      len = rle_len - 1
      if len > 1
        len = 255 if len > 255
        longest_match = len

        # Store the match
        while len >= 2
          matches[len].length = len
          matches[len].offset = 1

          len -= 1
        end
      end

      # Search for more RLE-matches, scan until start of file, or max offset...
      while get - scn <= MAX_OFFSET && scn.positive? && longest_match < 255

        # Check for longer matches with same value and after...
        # FIXME: That is not what it does, is it?!
        if @rle_info[scn].length > longest_match && rle_len > longest_match
          offset = get - scn
          len = @rle_info[scn].length

          len = rle_len if len > rle_len

          if len > 2 || (len == 2 && offset <= MAX_OFFSET_SHORT)
            matches[len].length = len
            matches[len].offset = offset

            longest_match = len
          end
        end

        # Check for matches beyond the RLE...
        if @rle_info[scn].length >= rle_len && @rle_info[scn].value_after == rle_val_after

          # Here is a match that goes beyond the RLE...
          # Find out correct offset to use value_after, then search further to see if more bytes equal
          len = rle_len
          offset = get - scn + @rle_info[scn].length - rle_len

          if offset <= MAX_OFFSET
            len += 1 while len < 255 && get >= offset + len && @ibuf[get - offset - len] == @ibuf[get - len]

            if len > longest_match
              longest_match = len

              # Store the match only if first (= best) of this length
              while len >= 2 && matches[len].length.zero?
                # If len == 2, check against short offset!
                if len > 2 || (len == 2 && offset <= MAX_OFFSET_SHORT)
                  matches[len].length = len
                  matches[len].offset = offset
                end

                len -= 1
              end
            end
          end
        end

        scn = @link[scn] # Table 65535 lookup
      end

      if @rle_info[get].length > 2
        # Expand RLE to next position
        @rle_info[get - 1].length = @rle_info[get].length - 1
        @rle_info[get - 1].value = @rle_info[get].value
        @rle_info[get - 1].value_after = @rle_info[get].value_after
      else
        # End of RLE, advance link
        @first[cur] = @link[@first[cur]] # Waste first entry
      end
    end

    # Now that we have all matches from this position, visit all nodes reached by the matches
    255.downto(1).to_a.each do |i|
      # Find all matches we stored
      len = matches[i].length
      offset = matches[i].offset

      next if len.zero?

      target_i = get - len + 1
      target = @context[target_i]

      # Calculate cost for this jump
      current_cost = last_node.cost
      current_cost += calculate_cost_of_match(len, offset)

      # If this match is first or cheapest way to get here, then update node
      next if target.cost != 0 && target.cost <= current_cost

      target.cost = current_cost
      target.next = get + 1
      target.lit_len = 0
      target.offset = offset
    end

    # Calc the cost for this node if using one more literal
    lit_len = last_node.lit_len + 1
    lit_cost = calculate_cost_of_literal(last_node.cost, lit_len)

    # If literal run is first or cheapest way to get here, then update node
    this = @context[get]
    if this.cost.zero? || this.cost >= lit_cost
      this.cost = lit_cost
      this.next = get + 1
      this.lit_len = lit_len
    end

    last_node.cost = this.cost
    last_node.next = this.next
    last_node.lit_len = this.lit_len

    # Loop to the next position
    get -= 1
  end
end
new_node() click to toggle source
# File lib/byteboozer2/cruncher.rb, line 426
def new_node
  OpenStruct.new(cost: 0, next: 0, lit_len: 0, offset: 0)
end
setup_help_structures() click to toggle source
# File lib/byteboozer2/cruncher.rb, line 430
def setup_help_structures
  # Setup RLE-info
  get = @ibuf_size - 1
  while get.positive?

    cur = @ibuf[get]
    if cur == @ibuf[get - 1]

      len = 2
      len += 1 while get >= len && cur == @ibuf[get - len]

      @rle_info[get].length = len
      @rle_info[get].value_after = get >= len ? @ibuf[get - len] : cur # Avoid accessing @ibuf[-1]

      get -= len
    else
      get -= 1
    end
  end

  # Setup linked list
  @first = Array.new(MEM_SIZE) { 0 }
  @last = Array.new(MEM_SIZE) { 0 }

  get = @ibuf_size - 1
  cur = @ibuf[get]

  while get.positive?
    cur = ((cur << 8) | @ibuf[get - 1]) & 0xffff

    if @first[cur].zero?
      @first[cur] = @last[cur] = get
    else
      @link[@last[cur]] = get
      @last[cur] = get
    end

    get -= @rle_info[get].length.zero? ? 1 : @rle_info[get].length - 1 # if RLE-match...
  end
end
wbit(bit) click to toggle source
# File lib/byteboozer2/cruncher.rb, line 471
def wbit(bit)
  if @cur_cnt.zero?
    @obuf[@cur_index] = @cur_byte
    @cur_index = @put
    @cur_cnt = 8
    @cur_byte = 0
    @put += 1
  end

  @cur_byte <<= 1
  @cur_byte |= bit & 1
  @cur_cnt -= 1
end
wbyte(b) click to toggle source
# File lib/byteboozer2/cruncher.rb, line 485
def wbyte(b)
  @obuf[@put] = b
  @put += 1
end
wbytes(get, len) click to toggle source
# File lib/byteboozer2/cruncher.rb, line 490
def wbytes(get, len)
  (0..len - 1).each do
    wbyte(@ibuf[get])
    get += 1
  end
end
wflush() click to toggle source
# File lib/byteboozer2/cruncher.rb, line 497
def wflush
  while @cur_cnt != 0
    @cur_byte <<= 1
    @cur_cnt -= 1
  end
  @obuf[@cur_index] = @cur_byte
end
wlength(len) click to toggle source
# File lib/byteboozer2/cruncher.rb, line 505
def wlength(len)
  # return if len.zero? # Should never happen

  bit = 0x80
  bit >>= 1 while (len & bit).zero?

  while bit > 1
    wbit(1)
    bit >>= 1
    wbit((len & bit).zero? ? 0 : 1)
  end

  wbit(0) if len < 0x80
end
woffset(offset, len) click to toggle source
# File lib/byteboozer2/cruncher.rb, line 520
def woffset(offset, len)
  i = 0
  n = 0

  if len == 1
    if cond_short_0(offset)
      i = 0
      n = NUM_BITS_SHORT_0
    end
    if cond_short_1(offset)
      i = 1
      n = NUM_BITS_SHORT_1
    end
    if cond_short_2(offset)
      i = 2
      n = NUM_BITS_SHORT_2
    end
    if cond_short_3(offset)
      i = 3
      n = NUM_BITS_SHORT_3
    end
  else
    if cond_long_0(offset)
      i = 0
      n = NUM_BITS_LONG_0
    end
    if cond_long_1(offset)
      i = 1
      n = NUM_BITS_LONG_1
    end
    if cond_long_2(offset)
      i = 2
      n = NUM_BITS_LONG_2
    end
    if cond_long_3(offset)
      i = 3
      n = NUM_BITS_LONG_3
    end
  end

  # First write number of bits
  wbit((i & 2).zero? ? 0 : 1)
  wbit((i & 1).zero? ? 0 : 1)

  b = 1 << n
  if n >= 8 # Offset is 2 bytes

    # Then write the bits less than 8
    while b > 0x100
      b >>= 1
      wbit((b & offset).zero? ? 0 : 1)
    end

    # Finally write a whole byte, if necessary
    wbyte(offset & 255 ^ 255) # Inverted (!)
    # offset >>= 8

  else # Offset is 1 byte

    # Then write the bits less than 8
    while b > 1
      b >>= 1
      wbit((b & offset).zero? ? 1 : 0) # Inverted (!)
    end
  end
end
write_output() click to toggle source
# File lib/byteboozer2/cruncher.rb, line 587
def write_output
  @put = 0

  @cur_byte = 0
  @cur_cnt = 8
  @cur_index = @put
  @put += 1

  max_diff = 0

  need_copy_bit = true

  i = 0
  while i < @ibuf_size
    link = @context[i].next
    # cost = @context[i].cost
    lit_len = @context[i].lit_len
    offset = @context[i].offset

    if lit_len.zero?
      # Put match
      len = link - i

      ByteBoozer2.logger.debug format('$%<i>04x: Mat(%<len>i, %<offset>i)', i: i, len: len, offset: offset)

      wbit(1) if need_copy_bit
      wlength(len - 1)
      woffset(offset - 1, len - 1)

      i = link

      need_copy_bit = true
    else
      # Put literal
      need_copy_bit = false

      while lit_len.positive?
        len = lit_len < 255 ? lit_len : 255

        ByteBoozer2.logger.debug format('$%<i>04x: Lit(%<len>i)', i: i, len: len)

        wbit(0)
        wlength(len)
        wbytes(i, len)

        need_copy_bit = true if lit_len == 255

        lit_len -= len
        i += len
      end
    end

    max_diff = i - @put if i - @put > max_diff
  end

  wbit(1)
  wlength(0xff)
  wflush

  max_diff - i + @put
end