class MorMor::FSA::CFSA2

Port of CFSA2.java

See constant description and other docs there: github.com/morfologik/morfologik-stemming/blob/master/morfologik-fsa/src/main/java/morfologik/fsa/CFSA2.java

Constants

BIT_FINAL_ARC
BIT_LAST_ARC
BIT_TARGET_NEXT
LABEL_INDEX_BITS
LABEL_INDEX_MASK
NUMBERS

Attributes

arcs[R]
mapping[R]

Public Class Methods

new(io) click to toggle source
# File lib/mormor/fsa/cfsa2.rb, line 17
def initialize(io)
  # Java's short = "network (big-endian)"
  flag_bits = io.read(2).unpack('n').first # rubocop:disable Style/UnpackFirst -- doesn't work under 2.3
  @numbers = flag_bits.allbits?(NUMBERS)

  mapping_size = io.getbyte & 0xff
  @mapping = io.read(mapping_size).unpack('c*')

  @arcs = io.read.unpack('c*')
end

Public Instance Methods

arc_label(arc) click to toggle source

Examining arcs

# File lib/mormor/fsa/cfsa2.rb, line 42
def arc_label(arc)
  index = arcs[arc] & LABEL_INDEX_MASK
  index.positive? ? mapping[index] : arcs[arc + 1]
end
end_node(arc) click to toggle source
# File lib/mormor/fsa/cfsa2.rb, line 37
def end_node(arc)
  destination_node_offset(arc)
end
final_arc?(arc) click to toggle source
# File lib/mormor/fsa/cfsa2.rb, line 55
def final_arc?(arc)
  arcs[arc].allbits?(BIT_FINAL_ARC)
end
first_arc(node) click to toggle source

Navigating through arcs

# File lib/mormor/fsa/cfsa2.rb, line 33
def first_arc(node)
  numbers? ? skip_v_int(node) : node
end
last_arc?(arc) click to toggle source
# File lib/mormor/fsa/cfsa2.rb, line 51
def last_arc?(arc)
  arcs[arc].allbits?(BIT_LAST_ARC)
end
root_node() click to toggle source
# File lib/mormor/fsa/cfsa2.rb, line 28
def root_node
  destination_node_offset(first_arc(0))
end
terminal_arc?(arc) click to toggle source
# File lib/mormor/fsa/cfsa2.rb, line 47
def terminal_arc?(arc)
  destination_node_offset(arc).zero?
end

Private Instance Methods

destination_node_offset(arc) click to toggle source
# File lib/mormor/fsa/cfsa2.rb, line 86
def destination_node_offset(arc)
  if next_set?(arc)
    # OC: Follow until the last arc of this state.
    arc = next_arc(arc) until last_arc?(arc)

    # OC: And return the byte right after it.
    skip_arc(arc)
  else
    # OC: The destination node address is v-coded. v-code starts either
    # at the next byte (label indexed) or after the next byte (label explicit).
    read_v_int(arcs, arc + (arcs[arc].anybits?(LABEL_INDEX_MASK) ? 1 : 2))
  end
end
next_set?(arc) click to toggle source
# File lib/mormor/fsa/cfsa2.rb, line 100
def next_set?(arc)
  arcs[arc].allbits?(BIT_TARGET_NEXT)
end
numbers?() click to toggle source
# File lib/mormor/fsa/cfsa2.rb, line 63
def numbers?
  @numbers
end
read_v_int(array, offset) click to toggle source
# File lib/mormor/fsa/cfsa2.rb, line 72
def read_v_int(array, offset)
  b = array[offset]
  value = b & 0x7F
  shift = 7
  while b.negative?
    offset += 1
    b = array[offset]
    value |= (b & 0x7F) << shift
    shift += 7
  end

  value
end
skip_arc(offset) click to toggle source
# File lib/mormor/fsa/cfsa2.rb, line 104
def skip_arc(offset)
  flag = arcs[offset]
  offset += 1

  # OC: Explicit label?
  offset += 1 if flag.nobits?(LABEL_INDEX_MASK)

  # OC: Explicit goto?
  offset = skip_v_int(offset) if flag.nobits?(BIT_TARGET_NEXT)

  offset
end
skip_v_int(offset) click to toggle source
# File lib/mormor/fsa/cfsa2.rb, line 67
def skip_v_int(offset)
  offset += 1 while arcs[offset].negative?
  offset + 1
end