class Migemo
Attributes
dict_cache[RW]
insertion[RW]
optimization[RW]
regex_dict[RW]
type[RW]
user_dict[RW]
with_paren[RW]
Public Class Methods
new(pattern, dict=nil)
click to toggle source
# File lib/migemo.rb, line 21 def initialize (pattern, dict=nil) @static_dict = if dict.nil? MigemoStaticDict.new(File.dirname(File.expand_path(__FILE__)) + '/../data/migemo-dict') elsif dict.is_a?(String) MigemoStaticDict.new(dict) else dict end @type = "ruby" @pattern = pattern @insertion = "" @optimization = 3 @dict_cache = nil @user_dict = nil @regex_dict = nil @with_paren = false end
Public Instance Methods
lookup()
click to toggle source
# File lib/migemo.rb, line 47 def lookup if @pattern == "" return RegexAlternation.new end result = if @dict_cache lookup_cache || lookup0 else lookup0 end if @user_dict lookup_user_dict.each{|x| result.push(x) } end result end
regex()
click to toggle source
# File lib/migemo.rb, line 66 def regex regex = lookup renderer = RegexRendererFactory.new(regex, @type, @insertion) renderer.with_paren = @with_paren string = renderer.render string = renderer.join_regexes(string, lookup_regex_dict) if @regex_dict string end
regex_tree()
click to toggle source
# File lib/migemo.rb, line 62 def regex_tree lookup end
Private Instance Methods
expand_kanas()
click to toggle source
`do' => (ど) `d' => (っ だ ぢ づ で ど) `sh' => (しゃ し しゅ しぇ しょ) `don' => (どん どな どに どぬ どね どの どっ) # special case 1 `nodd' => (のっ) # special case 2 `doc' => (どっ どち) # special case 3 `dox' => (どっ どゃ どゅ どょ) # special case 4 `essy' => (えっしゃ えっしゅ えっしょ) # special case 5 `ny' => (にゃ にゅ にょ) # special case 6
# File lib/migemo.rb, line 85 def expand_kanas kana = @pattern.downcase.to_kana /^(.*)(.)$/ =~ kana ; head = $1; last = $2; cand = Array.new; return [] if last == nil if last.consonant? if /^(.*)(.)$/ =~ head && $2.consonant? head2 = $1; beforelast = $2; if last == beforelast # special case 2 cand.push head2 + "っ" elsif /^(.*)(.)$/ =~ head2 && beforelast == $2 && last.consonant? # special case 5 cand += (beforelast + last).expand_consonant.map do |x| $1 + "っ" + x.to_kana end else cand += (beforelast + last).expand_consonant.map do |x| head2 + x.to_kana end end elsif /^(.*?)(n?)ny$/ =~ @pattern && $2 == "" # special case 6 head2 = $1 cand += "ny".expand_consonant.map do |x| head2 + x.to_kana end else deriv = last.expand_consonant deriv.push "xtsu"; if last == "c" # special case 3 deriv.push "chi"; elsif last == "x" # special case 4 deriv.push "xya", "xyu", "xyo", "xwa" end cand += deriv.map do |x| head + x.to_kana end end elsif last == "ん" # speacial case 1 cand.push kana; cand += ("n".expand_consonant + ["っ"]).map do |x| head + x.to_kana end else cand.push kana end return cand.sort end
expand_words(dict, pattern)
click to toggle source
`めし' => (飯 飯合 雌蘂 雌蕊 飯櫃 目下 飯粒 召使 飯屋)
# File lib/migemo.rb, line 136 def expand_words (dict, pattern) raise if pattern == nil words = Array.new dict.lookup(pattern) do |item| words += item.values end return words end
lookup0()
click to toggle source
# File lib/migemo.rb, line 149 def lookup0 compiler = RegexCompiler.new compiler.push(@pattern) compiler.push(@pattern.to_fullwidth) expand_kanas.each do |x| compiler.push(x) compiler.push(x.to_katakana) expand_words(@static_dict, x).each do |_x| compiler.push(_x) end end expand_words(@static_dict, @pattern).each do |x| compiler.push(x) end compiler.uniq compiler.optimize(@optimization) if @optimization compiler.regex end
lookup_cache()
click to toggle source
# File lib/migemo.rb, line 145 def lookup_cache @dict_cache.lookup(@pattern) end
lookup_regex_dict()
click to toggle source
# File lib/migemo.rb, line 175 def lookup_regex_dict regexes = [] @regex_dict.lookup(@pattern) do |item| regexes += item.values end regexes end
lookup_user_dict()
click to toggle source
# File lib/migemo.rb, line 164 def lookup_user_dict compiler = RegexCompiler.new expand_kanas.each do |x| expand_words(@user_dict, x).each do |_x| compiler.push(_x) end end expand_words(@user_dict, @pattern).each do |x| compiler.push(x) end compiler.uniq compiler.optimize(@optimization) if @optimization compiler.regex end