class MigemoRegex::RegexCompiler

Attributes

regex[R]

Public Class Methods

new() click to toggle source
# File lib/migemo-regex.rb, line 44
def initialize
  @regex = RegexAlternation.new
end

Public Instance Methods

optimize(level) click to toggle source
# File lib/migemo-regex.rb, line 59
def optimize (level)
  @regex = optimize1(@regex) if level >= 1
  @regex = optimize2(@regex) if level >= 2
  @regex = optimize3(@regex) if level >= 3
end
push(item) click to toggle source
# File lib/migemo-regex.rb, line 49
def push (item)
  if item and item != ""
    @regex.push(item)
  end
end
uniq() click to toggle source
# File lib/migemo-regex.rb, line 55
def uniq
  @regex.uniq
end

Private Instance Methods

optimize1(regex) click to toggle source
“運”, “運動”, “運転”, “日本”, “日本語”

> [“安” “運” “日本”]

(運|運動|運転|日本|日本語) => (安|運|日本)

# File lib/migemo-regex.rb, line 68
def optimize1 (regex)
  prefixpat = nil
  sorted = (defined?(Encoding)) ? regex.sort_by{|s| s.encode("EUC-JP") } : regex.sort
  sorted.select do |word|
    if prefixpat && prefixpat.match(word) then
      false # excluded
    else
      prefixpat = Regexp.new("^" + Regexp.quote(word))
      true # included
    end
  end
end
optimize2(regex) click to toggle source

(あああ|ああい|ああう)

> (あ(あ(あ|い|う)))

# File lib/migemo-regex.rb, line 83
def optimize2 (regex)
  tmpregex = (defined?(Encoding)) ? regex.sort_by{|s| s.encode("EUC-JP") }.clone : regex.sort.clone # I wish Array#cdr were available...
  optimized = RegexAlternation.new
  until tmpregex.empty?
    head = tmpregex.shift
    initial = head.first
    friends = RegexAlternation.new
    while item = tmpregex.first
      if initial == item.first
        friends.push(item.rest)
        tmpregex.shift
      else
        break
      end
    end
    if friends.empty?
      optimized.push head
    else
      concat = RegexConcatnation.new
      concat.push(initial)
      friends.unshift(head.rest) 
      concat.push(optimize2(friends))
      optimized.push(concat)
    end
  end
  return optimized
end
optimize3(regex) click to toggle source

(あ|い|う|え|お)

> [あいうえお]

# File lib/migemo-regex.rb, line 113
def optimize3 (regex)
  charclass = RegexCharClass.new
  if regex.instance_of?(RegexAlternation)
    regex.delete_if do |x|
      if x.instance_of?(String) && x =~ /^.$/ then
        charclass.push(x)
        true
      end
    end
  end

  if charclass.length == 1
    regex.unshift charclass.first
  elsif charclass.length > 1
    regex.unshift charclass
  end

  regex.map do |x|
    if x.instance_of?(RegexAlternation) || x.instance_of?(RegexConcatnation)
      optimize3(x)
    else
      x
    end
  end
end