module RegularExpression::Bytecode

The bytecode module defines instructions, and has a compiled object for storing a stream of them, and a builder object for creating the compiled object.

Public Class Methods

compile(nfa) click to toggle source

Never recurse a graph in a compiler! We don't know how deep it is and don't want to limit how large a program we can accept due to arbitrary stack space. Always use a worklist.

# File lib/regular_expression/bytecode.rb, line 11
def self.compile(nfa)
  builder = Builder.new
  label = ->(state, index = 0) { :"state_#{state.object_id}_#{index}" }

  visited = Set.new
  worklist = [[nfa, [Insns::Jump.new(:fail)]]]

  # For each state in the NFA.
  until worklist.empty?
    state, fallback = worklist.pop
    next if visited.include?(state)

    # Label the start of the state.
    builder.mark_label(label[state])
    visited.add(state)

    if state.is_a?(NFA::FinishState)
      builder.push(Insns::Match.new)
      next
    end

    # Other states have transitions out of them. Go through each
    # transition.
    state.transitions.each_with_index do |transition, index|
      builder.mark_label(label[state, index])

      if state.transitions.length > 1 && index != state.transitions.length - 1
        builder.push(Insns::PushIndex.new)
      end

      case transition
      when NFA::Transition::BeginAnchor
        builder.push(Insns::GuardBegin.new(label[transition.state]))
      when NFA::Transition::EndAnchor
        builder.push(Insns::GuardEnd.new(label[transition.state]))
      when NFA::Transition::Any
        builder.push(Insns::JumpAny.new(label[transition.state]))
      when NFA::Transition::Value
        builder.push(Insns::JumpValue.new(transition.value, label[transition.state]))
      when NFA::Transition::Invert
        builder.push(Insns::JumpValuesInvert.new(transition.values, label[transition.state]))
      when NFA::Transition::Range
        if transition.invert
          builder.push(Insns::JumpRangeInvert.new(transition.left, transition.right, label[transition.state]))
        else
          builder.push(Insns::JumpRange.new(transition.left, transition.right, label[transition.state]))
        end
      when NFA::Transition::Epsilon
        builder.push(Insns::Jump.new(label[transition.state]))
      else
        raise
      end

      next_fallback =
        if state.transitions.length > 1 && index != state.transitions.length - 1
          [Insns::PopIndex.new, Insns::Jump.new(label[state, index + 1])]
        else
          fallback
        end

      worklist.push([transition.state, next_fallback])
    end

    # If we don't have one of the transitions that always executes, then we
    # need to add the fallback to the output for this state.
    if state.transitions.none? { |t| t.is_a?(NFA::Transition::BeginAnchor) || t.is_a?(NFA::Transition::Epsilon) }
      builder.push(*fallback)
    end
  end

  # We always have a failure case - it's just the failure instruction.
  builder.mark_label(:fail)
  builder.push(Insns::Fail.new)
  builder.build
end