class Glaemscribe::API::TranscriptionProcessor

Attributes

mode[R]
rule_groups[R]

Public Class Methods

new(mode) click to toggle source
# File lib/api/transcription_processor.rb, line 30
def initialize(mode)
  @mode         = mode
  @rule_groups  = {}
end

Public Instance Methods

add_subrule(sub_rule) click to toggle source
# File lib/api/transcription_processor.rb, line 35
def add_subrule(sub_rule)
  path = sub_rule.src_combination.join("")
  @transcription_tree.add_subpath(path, sub_rule.dst_combination)
end
apply(l) click to toggle source
# File lib/api/transcription_processor.rb, line 76
def apply(l)
  ret = []
  current_group     = nil
  accumulated_word  = ""
 
  l.split("").each{ |c|
    case c
    when " ", "\t" 
      ret += transcribe_word(accumulated_word)
      ret += ["*SPACE"]
      
      accumulated_word = ""
    when "\r"
      # Ignore
    when "\n"
      ret += transcribe_word(accumulated_word)
      ret += ["*LF"]
      
      accumulated_word = ""
    else
      c_group = @in_charset[c]
      if c_group == current_group
        accumulated_word += c
      else
        ret += transcribe_word(accumulated_word)
        current_group    = c_group
        accumulated_word = c
      end
    end            
  }
  # Just in case
  ret += transcribe_word(accumulated_word)
  ret
end
finalize(trans_options) click to toggle source
# File lib/api/transcription_processor.rb, line 40
def finalize(trans_options)
  @errors = []
  
  @transcription_tree = TranscriptionTreeNode.new(nil,nil)
  
  # Add WORD_BOUNDARY and WORD_BREAKER in the tree
  @transcription_tree.add_subpath(WORD_BOUNDARY_TREE,   [""])
  @transcription_tree.add_subpath(WORD_BREAKER,         [""])
  
  rule_groups.each{ |rgname, rg| 
    rg.finalize(trans_options) 
  }
  
  # Build the input charset
  @in_charset = {}
  rule_groups.each{ |rgname, rg| 
    rg.in_charset.each{ |char, group|
      group_for_char = @in_charset[char]
      if group_for_char
        mode.errors << Glaeml::Error.new(-1,"Group #{rgname} uses input character #{char} which is also used by group #{group_for_char.name}. Input charsets should not intersect between groups.") 
      else
        @in_charset[char] = group
      end
    }
  }
  
  # Build the transcription tree
  rule_groups.each{ |rgname, rg|
    rg.rules.each { |r|
      r.sub_rules.each{ |sr|      
        add_subrule(sr)
      }
    }
  }        
end
transcribe_word(word) click to toggle source
# File lib/api/transcription_processor.rb, line 111
def transcribe_word(word)
  res = []
  word = WORD_BOUNDARY_TREE + word + WORD_BOUNDARY_TREE
  while word.length != 0
    r, len = @transcription_tree.transcribe(word)       
    word = word[len..-1]
    res += r
  end
  # Return token list
  res
end