class Glaemscribe::API::TranscriptionProcessor
Attributes
mode[R]
rule_groups[R]
Public Class Methods
new(mode)
click to toggle source
# File lib/api/transcription_processor.rb, line 30 def initialize(mode) @mode = mode @rule_groups = {} end
Public Instance Methods
add_subrule(sub_rule)
click to toggle source
# File lib/api/transcription_processor.rb, line 35 def add_subrule(sub_rule) path = sub_rule.src_combination.join("") @transcription_tree.add_subpath(path, sub_rule.dst_combination) end
apply(l)
click to toggle source
# File lib/api/transcription_processor.rb, line 76 def apply(l) ret = [] current_group = nil accumulated_word = "" l.split("").each{ |c| case c when " ", "\t" ret += transcribe_word(accumulated_word) ret += ["*SPACE"] accumulated_word = "" when "\r" # Ignore when "\n" ret += transcribe_word(accumulated_word) ret += ["*LF"] accumulated_word = "" else c_group = @in_charset[c] if c_group == current_group accumulated_word += c else ret += transcribe_word(accumulated_word) current_group = c_group accumulated_word = c end end } # Just in case ret += transcribe_word(accumulated_word) ret end
finalize(trans_options)
click to toggle source
# File lib/api/transcription_processor.rb, line 40 def finalize(trans_options) @errors = [] @transcription_tree = TranscriptionTreeNode.new(nil,nil) # Add WORD_BOUNDARY and WORD_BREAKER in the tree @transcription_tree.add_subpath(WORD_BOUNDARY_TREE, [""]) @transcription_tree.add_subpath(WORD_BREAKER, [""]) rule_groups.each{ |rgname, rg| rg.finalize(trans_options) } # Build the input charset @in_charset = {} rule_groups.each{ |rgname, rg| rg.in_charset.each{ |char, group| group_for_char = @in_charset[char] if group_for_char mode.errors << Glaeml::Error.new(-1,"Group #{rgname} uses input character #{char} which is also used by group #{group_for_char.name}. Input charsets should not intersect between groups.") else @in_charset[char] = group end } } # Build the transcription tree rule_groups.each{ |rgname, rg| rg.rules.each { |r| r.sub_rules.each{ |sr| add_subrule(sr) } } } end
transcribe_word(word)
click to toggle source
# File lib/api/transcription_processor.rb, line 111 def transcribe_word(word) res = [] word = WORD_BOUNDARY_TREE + word + WORD_BOUNDARY_TREE while word.length != 0 r, len = @transcription_tree.transcribe(word) word = word[len..-1] res += r end # Return token list res end