class Regexp::OptimizeTrie
trie for optimization
Attributes
opt_maybe[RW]
opt_suffix[RW]
parent[RW]
Public Instance Methods
[]=(k, v)
click to toggle source
Calls superclass method
# File lib/regexp_optimized_union.rb, line 7 def []= k, v super(k, v) v.parent = self end
build_char_group(chars)
click to toggle source
# File lib/regexp_optimized_union.rb, line 66 def build_char_group chars return chars.first if chars.size == 1 mb_chars = [] chars = chars.map(&:ord) chars.sort! first_char = chars.shift groups = [(first_char..first_char)] chars.each do |c| if c == groups.last.end + 1 groups[-1] = groups.last.begin..c else groups << (c..c) end end groups = groups.flat_map do |range| # only apply range to >= 4 contiguous chars if range.end >= range.begin + 3 [range.begin, '-'.ord, range.end] elsif range.end > range.begin range.to_a else [range.begin] end end "[#{groups.pack 'U*'}#{mb_chars.join}]" end
extract_common_suffix()
click to toggle source
prereq: opt_suffix
returns: regexp src
# File lib/regexp_optimized_union.rb, line 31 def extract_common_suffix branches = map do |key, value| [key, *value.to_chars] end branches.each &:reverse! max_common_size = branches.map(&:size).min common_size = nil max_common_size.downto 1 do |i| found = true branches.map {|b| b.take i }.each_cons(2) do |b1, b2| if b1 != b2 found = false break end end if found common_size = i break end end if common_size common = branches[0].take(common_size).reverse.join if branches.all?{|b| b.size == common_size + 1 } diff = build_char_group(branches.map &:last) "#{diff}#{common}" else diff = branches.map do |b| b.drop(common_size).reverse.join end.join '|' "(?:#{diff})#{common}" end end end
single_branch?()
click to toggle source
# File lib/regexp_optimized_union.rb, line 12 def single_branch? empty? or (size == 1 and !opt_maybe and values[0].single_branch?) end
single_char?()
click to toggle source
# File lib/regexp_optimized_union.rb, line 16 def single_char? size == 1 and values[0].empty? end
to_chars()
click to toggle source
prereq: single_branch?
# File lib/regexp_optimized_union.rb, line 21 def to_chars if empty? [] else [keys[0], *values[0].to_chars] end end
to_re_src()
click to toggle source
# File lib/regexp_optimized_union.rb, line 97 def to_re_src return '' if empty? res = extract_common_suffix if opt_suffix char_group = false if !res can_be_branched = true branches = map do |key, value| "#{key}#{value.to_re_src}" end if branches.all?{|b| b.bytesize == 1} char_group = true res = build_char_group branches else res = branches.join '|' end end if opt_maybe if char_group or single_char? "#{res}?" else "(?:#{res})?" end else if can_be_branched and size > 1 and parent and !char_group "(?:#{res})" else res end end end