class MigemoRegex::RegexCompiler
Attributes
regex[R]
Public Class Methods
new()
click to toggle source
# File lib/migemo-regex.rb, line 44 def initialize @regex = RegexAlternation.new end
Public Instance Methods
optimize(level)
click to toggle source
# File lib/migemo-regex.rb, line 59 def optimize (level) @regex = optimize1(@regex) if level >= 1 @regex = optimize2(@regex) if level >= 2 @regex = optimize3(@regex) if level >= 3 end
push(item)
click to toggle source
# File lib/migemo-regex.rb, line 49 def push (item) if item and item != "" @regex.push(item) end end
uniq()
click to toggle source
# File lib/migemo-regex.rb, line 55 def uniq @regex.uniq end
Private Instance Methods
optimize1(regex)
click to toggle source
(運|運動|運転|日本|日本語) => (安|運|日本)
# File lib/migemo-regex.rb, line 68 def optimize1 (regex) prefixpat = nil sorted = (defined?(Encoding)) ? regex.sort_by{|s| s.encode("EUC-JP") } : regex.sort sorted.select do |word| if prefixpat && prefixpat.match(word) then false # excluded else prefixpat = Regexp.new("^" + Regexp.quote(word)) true # included end end end
optimize2(regex)
click to toggle source
(あああ|ああい|ああう)
> (あ(あ(あ|い|う)))¶ ↑
# File lib/migemo-regex.rb, line 83 def optimize2 (regex) tmpregex = (defined?(Encoding)) ? regex.sort_by{|s| s.encode("EUC-JP") }.clone : regex.sort.clone # I wish Array#cdr were available... optimized = RegexAlternation.new until tmpregex.empty? head = tmpregex.shift initial = head.first friends = RegexAlternation.new while item = tmpregex.first if initial == item.first friends.push(item.rest) tmpregex.shift else break end end if friends.empty? optimized.push head else concat = RegexConcatnation.new concat.push(initial) friends.unshift(head.rest) concat.push(optimize2(friends)) optimized.push(concat) end end return optimized end
optimize3(regex)
click to toggle source
(あ|い|う|え|お)
> [あいうえお]¶ ↑
# File lib/migemo-regex.rb, line 113 def optimize3 (regex) charclass = RegexCharClass.new if regex.instance_of?(RegexAlternation) regex.delete_if do |x| if x.instance_of?(String) && x =~ /^.$/ then charclass.push(x) true end end end if charclass.length == 1 regex.unshift charclass.first elsif charclass.length > 1 regex.unshift charclass end regex.map do |x| if x.instance_of?(RegexAlternation) || x.instance_of?(RegexConcatnation) optimize3(x) else x end end end