class Regexp::OptimizeTrie

trie for optimization

Attributes

opt_maybe[RW]
opt_suffix[RW]
parent[RW]

Public Instance Methods

[]=(k, v) click to toggle source
Calls superclass method
# File lib/regexp_optimized_union.rb, line 7
def []= k, v
  super(k, v)
  v.parent = self
end
build_char_group(chars) click to toggle source
# File lib/regexp_optimized_union.rb, line 66
def build_char_group chars
  return chars.first if chars.size == 1

  mb_chars = []

  chars = chars.map(&:ord)
  chars.sort!
  first_char = chars.shift
  groups = [(first_char..first_char)]
  chars.each do |c|
    if c == groups.last.end + 1
      groups[-1] = groups.last.begin..c
    else
      groups << (c..c)
    end
  end

  groups = groups.flat_map do |range|
    # only apply range to >= 4 contiguous chars
    if range.end >= range.begin + 3
      [range.begin, '-'.ord, range.end]
    elsif range.end > range.begin
      range.to_a
    else
      [range.begin]
    end
  end

  "[#{groups.pack 'U*'}#{mb_chars.join}]"
end
extract_common_suffix() click to toggle source

prereq: opt_suffix returns: regexp src

# File lib/regexp_optimized_union.rb, line 31
def extract_common_suffix
  branches = map do |key, value|
    [key, *value.to_chars]
  end
  branches.each &:reverse!
  max_common_size = branches.map(&:size).min
  common_size = nil
  max_common_size.downto 1 do |i|
    found = true
    branches.map {|b| b.take i }.each_cons(2) do |b1, b2|
      if b1 != b2
        found = false
        break
      end
    end
    if found
      common_size = i
      break
    end
  end

  if common_size
    common = branches[0].take(common_size).reverse.join
    if branches.all?{|b| b.size == common_size + 1 }
      diff = build_char_group(branches.map &:last)
      "#{diff}#{common}"
    else
      diff = branches.map do |b|
        b.drop(common_size).reverse.join
      end.join '|'
      "(?:#{diff})#{common}"
    end
  end
end
single_branch?() click to toggle source
# File lib/regexp_optimized_union.rb, line 12
def single_branch?
  empty? or (size == 1 and !opt_maybe and values[0].single_branch?)
end
single_char?() click to toggle source
# File lib/regexp_optimized_union.rb, line 16
def single_char?
  size == 1 and values[0].empty?
end
to_chars() click to toggle source

prereq: single_branch?

# File lib/regexp_optimized_union.rb, line 21
def to_chars
  if empty?
    []
  else
    [keys[0], *values[0].to_chars]
  end
end
to_re_src() click to toggle source
# File lib/regexp_optimized_union.rb, line 97
def to_re_src
  return '' if empty?

  res = extract_common_suffix if opt_suffix
  char_group = false
  if !res
    can_be_branched = true
    branches = map do |key, value|
      "#{key}#{value.to_re_src}"
    end
    if branches.all?{|b| b.bytesize == 1}
      char_group = true
      res = build_char_group branches
    else
      res = branches.join '|'
    end
  end

  if opt_maybe
    if char_group or single_char?
      "#{res}?"
    else
      "(?:#{res})?"
    end
  else
    if can_be_branched and size > 1 and parent and !char_group
      "(?:#{res})"
    else
      res
    end
  end
end