module Annex29::WordSegmentation
Attributes
_segmenter_eof_trans[RW]
_segmenter_from_state_actions[RW]
_segmenter_index_offsets[RW]
_segmenter_indicies[RW]
_segmenter_key_offsets[RW]
_segmenter_range_lengths[RW]
_segmenter_single_lengths[RW]
_segmenter_to_state_actions[RW]
_segmenter_trans_actions[RW]
_segmenter_trans_keys[RW]
_segmenter_trans_targs[RW]
segmenter_en_main[RW]
segmenter_error[RW]
segmenter_first_final[RW]
segmenter_start[RW]
Public Class Methods
call(input)
click to toggle source
# File lib/annex_29/word_segmentation.rb, line 2809 def call(input) data = input.each_char.map(&:ord) eof = data.length words = [] # line 2816 "lib/annex_29/word_segmentation.rb" begin p ||= 0 pe ||= data.length cs = segmenter_start ts = nil te = nil act = 0 end # line 181 "lib/annex_29/word_segmentation.rl" # line 2828 "lib/annex_29/word_segmentation.rb" begin testEof = false _klen, _trans, _keys = nil _goto_level = 0 _resume = 10 _eof_trans = 15 _again = 20 _test_eof = 30 _out = 40 while true if _goto_level <= 0 if p == pe _goto_level = _test_eof next end end if _goto_level <= _resume case _segmenter_from_state_actions[cs] when 4 then # line 1 "NONE" begin ts = p end # line 2852 "lib/annex_29/word_segmentation.rb" end # from state action switch _keys = _segmenter_key_offsets[cs] _trans = _segmenter_index_offsets[cs] _klen = _segmenter_single_lengths[cs] _break_match = false begin if _klen > 0 _lower = _keys _upper = _keys + _klen - 1 loop do break if _upper < _lower _mid = _lower + ( (_upper - _lower) >> 1 ) if data[p].ord < _segmenter_trans_keys[_mid] _upper = _mid - 1 elsif data[p].ord > _segmenter_trans_keys[_mid] _lower = _mid + 1 else _trans += (_mid - _keys) _break_match = true break end end # loop break if _break_match _keys += _klen _trans += _klen end _klen = _segmenter_range_lengths[cs] if _klen > 0 _lower = _keys _upper = _keys + (_klen << 1) - 2 loop do break if _upper < _lower _mid = _lower + (((_upper-_lower) >> 1) & ~1) if data[p].ord < _segmenter_trans_keys[_mid] _upper = _mid - 2 elsif data[p].ord > _segmenter_trans_keys[_mid+1] _lower = _mid + 2 else _trans += ((_mid - _keys) >> 1) _break_match = true break end end # loop break if _break_match _trans += _klen end end while false _trans = _segmenter_indicies[_trans]; end if _goto_level <= _eof_trans cs = _segmenter_trans_targs[_trans]; if _segmenter_trans_actions[_trans] != 0 case _segmenter_trans_actions[_trans] when 2 then # line 1 "NONE" begin te = p+1 end when 5 then # line 10 "lib/annex_29/word_segmentation.rl" begin te = p+1 begin words << data[ts...te].pack("U*") end end when 6 then # line 10 "lib/annex_29/word_segmentation.rl" begin te = p p = p - 1; begin words << data[ts...te].pack("U*") end end when 1 then # line 10 "lib/annex_29/word_segmentation.rl" begin begin p = ((te))-1; end begin words << data[ts...te].pack("U*") end end # line 2941 "lib/annex_29/word_segmentation.rb" end # action switch end end if _goto_level <= _again case _segmenter_to_state_actions[cs] when 3 then # line 1 "NONE" begin ts = nil; end # line 2952 "lib/annex_29/word_segmentation.rb" end p += 1 if p != pe _goto_level = _resume next end end if _goto_level <= _test_eof if p == eof if _segmenter_eof_trans[cs] > 0 _trans = _segmenter_eof_trans[cs] - 1; _goto_level = _eof_trans next; end end end if _goto_level <= _out break end end end # line 182 "lib/annex_29/word_segmentation.rl" words end