class Antlr4::Runtime::LexerATNSimulator

Constants

MAX_DFA_EDGE
MIN_DFA_EDGE

Attributes

debug[R]
empty[R]
char_position_in_line[R]
line[R]

Public Class Methods

new(recog, atn, decision_to_dfa, shared_context_cache) click to toggle source
Calls superclass method Antlr4::Runtime::ATNSimulator::new
# File lib/antlr4/runtime/lexer_atn_simulator.rb, line 31
def initialize(recog, atn, decision_to_dfa, shared_context_cache)
  super(atn, shared_context_cache)
  @@debug = false
  @dfa_debug = false

  @decision_to_dfa = decision_to_dfa
  @recog = recog
  @start_index = -1
  @line = 1
  @char_position_in_line = 0
  @mode = Lexer::DEFAULT_MODE
  @prev_accept = SimState.new
  @match_calls = 0
end

Public Instance Methods

accept(input, lexer_action_executor, start_index, index, line, char_pos) click to toggle source
# File lib/antlr4/runtime/lexer_atn_simulator.rb, line 254
def accept(input, lexer_action_executor, start_index, index, line, char_pos)
  printf format("ACTION %s\n", lexer_action_executor) if @@debug

  # seek to after last char in token
  input.seek(index)
  @line = line
  @char_position_in_line = char_pos

  if !lexer_action_executor.nil? && !@recog.nil?
    lexer_action_executor.execute(@recog, input, start_index)
  end
end
add_dfa_edge_dfastate_atnconfigset(from, t, q) click to toggle source
# File lib/antlr4/runtime/lexer_atn_simulator.rb, line 438
def add_dfa_edge_dfastate_atnconfigset(from, t, q)
  suppress_edge = q.has_semantic_context
  q.has_semantic_context = false

  to = add_dfa_state(q)

  return to if suppress_edge

  add_dfa_edge_dfastate_dfastate(from, t, to)
  to
end
add_dfa_edge_dfastate_dfastate(p, t, q) click to toggle source
# File lib/antlr4/runtime/lexer_atn_simulator.rb, line 450
def add_dfa_edge_dfastate_dfastate(p, t, q)
  if t < MIN_DFA_EDGE || t > MAX_DFA_EDGE
    # Only track edges within the DFA bounds
    return
  end

  if @@debug
    message = 'EDGE ' << p.to_s << ' -> ' << q.to_s << ' upon ' << token_name(t)
    puts(message)
  end

  if p.edges.nil?
    #  make room for tokens 1..n and -1 masquerading as index 0
    p.edges = []
  end
  p.edges[t - MIN_DFA_EDGE] = q # connect
end
add_dfa_state(configs) click to toggle source
# File lib/antlr4/runtime/lexer_atn_simulator.rb, line 468
def add_dfa_state(configs)
  proposed = DFAState.new(configs)
  first_config_with_rule_stop_state = configs.find_first_rule_stop_state

  unless first_config_with_rule_stop_state.nil?
    proposed.is_accept_state = true
    proposed.lexer_action_executor = first_config_with_rule_stop_state.lexer_action_executor
    proposed.prediction = atn.rule_to_token_type[first_config_with_rule_stop_state.state.rule_index]
  end

  dfa = @decision_to_dfa[@mode]

  existing = dfa.states[proposed]
  return existing unless existing.nil?

  new_state = proposed

  new_state.state_number = dfa.states.size
  configs.readonly = true
  new_state.configs = configs
  dfa.states[new_state] = new_state
  new_state
end
capture_sim_state(settings, input, dfa_state) click to toggle source
# File lib/antlr4/runtime/lexer_atn_simulator.rb, line 431
def capture_sim_state(settings, input, dfa_state)
  settings.index = input.index
  settings.line = @line
  settings.char_pos = @char_position_in_line
  settings.dfa_state = dfa_state
end
clear_dfa() click to toggle source
# File lib/antlr4/runtime/lexer_atn_simulator.rb, line 80
def clear_dfa
  d = 0
  while d < @decision_to_dfa.length
    @decision_to_dfa[d] = DFA.new(atn.decision_state(d), d)
    d += 1
  end
end
closure(input, config, configs, current_alt_reached_accept_state, speculative, treat_eof_as_epsilon) click to toggle source
# File lib/antlr4/runtime/lexer_atn_simulator.rb, line 289
def closure(input, config, configs, current_alt_reached_accept_state, speculative, treat_eof_as_epsilon)
  if config.state.is_a? RuleStopState
    if @@debug
      if !@recog.nil?
        printf format("closure at %s rule stop %s\n", @recog.rule_names[config.state.rule_index], config)
      else
        printf format("closure at rule stop %s\n", config)
      end
    end

    if config.context.nil? || config.context.empty_path?
      if config.context.nil? || config.context.empty?
        configs.add(config)
        return true
      else
        configs.add(LexerATNConfig.create_from_config2(config, config.state, EmptyPredictionContext::EMPTY))
        current_alt_reached_accept_state = true
      end
    end

    if !config.context.nil? && !config.context.empty?
      i = 0
      while i < config.context.size
        if config.context.get_return_state(i) != PredictionContext::EMPTY_RETURN_STATE
          new_context = config.context.get_parent(i) # "pop" return state
          return_state = atn.states[config.context.get_return_state(i)]
          c = LexerATNConfig.new
          c.lexer_atn_config5(config, return_state, new_context)
          current_alt_reached_accept_state = closure(input, c, configs, current_alt_reached_accept_state, speculative, treat_eof_as_epsilon)
        end
        i += 1
      end
    end

    return current_alt_reached_accept_state
  end

  # optimization
  unless config.state.only_has_epsilon_transitions
    if !current_alt_reached_accept_state || !config.passed_through_non_greedy_decision
      configs.add(config)
    end
  end

  p = config.state
  i = 0
  while i < p.number_of_transitions
    t = p.transition(i)
    c = epsilon_target(input, config, t, configs, speculative, treat_eof_as_epsilon)
    unless c.nil?
      current_alt_reached_accept_state = closure(input, c, configs, current_alt_reached_accept_state, speculative, treat_eof_as_epsilon)
    end
    i += 1
  end

  current_alt_reached_accept_state
end
compute_start_state(input, p) click to toggle source
# File lib/antlr4/runtime/lexer_atn_simulator.rb, line 275
def compute_start_state(input, p)
  initial_context = @@empty
  configs = ATNConfigSet.new
  i = 0
  while i < p.number_of_transitions
    target = p.transition(i).target
    c = LexerATNConfig.new
    c.lexer_atn_config1(target, i + 1, initial_context)
    closure(input, c, configs, false, false, false)
    i += 1
  end
  configs
end
compute_target_state(input, s, t) click to toggle source
# File lib/antlr4/runtime/lexer_atn_simulator.rb, line 177
def compute_target_state(input, s, t)
  reach = OrderedATNConfigSet.new

  # if we don't find an existing DFA state
  # Fill reach starting from closure, following t transitions
  reachable_config_set(input, s.configs, reach, t)

  if reach.empty? # we got nowhere on t from s
    unless reach.has_semantic_context
      # we got nowhere on t, don't throw out this knowledge it'd
      # cause a failover from DFA later.
      add_dfa_edge_dfastate_dfastate(s, t, @@error)
    end

    # stop when we can't match any more char
    return @@error
  end

  # Add an edge from s to target DFA found/created for reach
  add_dfa_edge_dfastate_atnconfigset(s, t, reach)
end
consume(input) click to toggle source
# File lib/antlr4/runtime/lexer_atn_simulator.rb, line 500
def consume(input)
  cur_char = input.la(1)
  if cur_char == 10 # newline
    @line += 1
    @char_position_in_line = 0
  else
    @char_position_in_line += 1
  end
  input.consume
end
copy_state(simulator) click to toggle source
# File lib/antlr4/runtime/lexer_atn_simulator.rb, line 46
def copy_state(simulator)
  @char_position_in_line = simulator.char_position_in_line
  @line = simulator.line
  @mode = simulator.mode
  @start_index = simulator.start_index
end
dfa(mode) click to toggle source
# File lib/antlr4/runtime/lexer_atn_simulator.rb, line 492
def dfa(mode)
  @decision_to_dfa[mode]
end
epsilon_target(input, config, t, configs, speculative, treat_eof_as_epsilon) click to toggle source

side-effect: can alter configs.hasSemanticContext

# File lib/antlr4/runtime/lexer_atn_simulator.rb, line 349
def epsilon_target(input, config, t, configs, speculative, treat_eof_as_epsilon)
  c = nil
  serialization_type = t.serialization_type

  if serialization_type == Transition::RULE
    rule_transition = t
    new_context = SingletonPredictionContext.new(config.context, rule_transition.follow_state.state_number)
    c = LexerATNConfig.new
    c.lexer_atn_config5(config, t.target, new_context)

  elsif serialization_type == Transition::PRECEDENCE

    raise UnsupportedOperationException, 'Precedence predicates are not supported in lexers.'

  elsif serialization_type == Transition::PREDICATE
    pt = t
    # puts('EVAL rule ' + pt.rule_index + ':' + pt.pred_index) if @@debug
    configs.has_semantic_context = true
    if evaluate_predicate(input, pt.rule_index, pt.pred_index, speculative)
      c = LexerATNConfig.create_from_config(config, t.target)
    end

  elsif serialization_type == Transition::ACTION

    if config.context.nil? || config.context.empty_path?
      # execute actions anywhere in the start rule for a token.
      #
      # TODO: if the entry rule is invoked recursively, some
      # actions may be executed during the recursive call. The
      # problem can appear when hasEmptyPath() is true but
      # isEmpty() is false. In this case, the config needs to be
      # split into two contexts - one with just the empty path
      # and another with everything but the empty path.
      # Unfortunately, the current algorithm does not allow
      # getEpsilonTarget to return two configurations, so
      # additional modifications are needed before we can support
      # the split operation.
      lexer_action_executor = LexerActionExecutor.append(config.lexer_action_executor, @atn._a[t.action_index])
      c = LexerATNConfig.new
      c.lexer_atn_config4(config, t.target, lexer_action_executor)
    else # ignore actions in referenced rules
      c = LexerATNConfig.new
      c.lexer_atn_config3(config, t.target)
    end

  elsif serialization_type == Transition::EPSILON
    c = LexerATNConfig.new
    c.lexer_atn_config3(config, t.target)
  elsif serialization_type == Transition::ATOM || serialization_type == Transition::RANGE || serialization_type == Transition::SET
    if treat_eof_as_epsilon
      if t.matches(CharStream.EOF, Lexer.MIN_CHAR_VALUE, Lexer.MAX_CHAR_VALUE)
        c = LexerATNConfig.create_from_config(config, t.target)
      end
    end

  else
    # empty
  end

  c
end
evaluate_predicate(input, rule_index, pred_index, speculative) click to toggle source
# File lib/antlr4/runtime/lexer_atn_simulator.rb, line 411
def evaluate_predicate(input, rule_index, pred_index, speculative) # assume true if no recognizer was provided
  return true if @recog.nil?

  return @recog.sempred(nil, rule_index, pred_index) unless speculative

  saved_char_position_in_line = @char_position_in_line
  saved_line = @line
  index = input.index
  marker = input.mark
  begin
    consume(input)
    return @recog.sempred(nil, rule_index, pred_index)
  ensure
    @char_position_in_line = saved_char_position_in_line
    @line = saved_line
    input.seek(index)
    input.release(marker)
  end
end
exec_atn(input, ds0) click to toggle source
# File lib/antlr4/runtime/lexer_atn_simulator.rb, line 111
def exec_atn(input, ds0)
  printf format("start state closure=%s\n", ds0.configs) if @@debug

  if ds0.is_accept_state
    # allow zero-length tokens
    capture_sim_state(@prev_accept, input, ds0)
  end

  t = input.la(1)

  s = ds0 # s is current/from DFA state

  loop do # while more work
    printf format("execATN loop starting closure: %s\n", s.configs) if @@debug

    # As we move src->trg, src->trg, we keep track of the previous trg to
    # avoid looking up the DFA state again, which is expensive.
    # If the previous target was already part of the DFA, we might
    # be able to avoid doing a reach operation upon t. If s!=nil,
    # it means that semantic predicates didn't prevent us from
    # creating a DFA state. Once we know s!=nil, we check to see if
    # the DFA state has an edge already for t. If so, we can just reuse
    # it's configuration set there's no point in re-computing it.
    # This is kind of like doing DFA simulation within the ATN
    # simulation because DFA simulation is really just a way to avoid
    # computing reach/closure sets. Technically, once we know that
    # we have a previously added DFA state, we could jump over to
    # the DFA simulator. But, that would mean popping back and forth
    # a lot and making things more complicated algorithmically.
    # This optimization makes a lot of sense for loops within DFA.
    # A character will take us back to an existing DFA state
    # that already has lots of edges out of it. e.g., .* in comments.
    target = existing_target_state(s, t)
    target = compute_target_state(input, s, t) if target.nil?

    break if target == @@error

    # If this is a consumable input element, make sure to consume before
    # capturing the accept state so the input index, line, and char
    # position accurately reflect the state of the interpreter at the
    # end of the token.
    consume(input) if t != IntStream::EOF

    if target.is_accept_state
      capture_sim_state(@prev_accept, input, target)
      break if t == IntStream::EOF
    end

    t = input.la(1)
    s = target # flip current DFA target becomes new src/from state
  end

  fail_or_accept(@prev_accept, input, s.configs, t)
end
existing_target_state(s, t) click to toggle source
# File lib/antlr4/runtime/lexer_atn_simulator.rb, line 166
def existing_target_state(s, t)
  return nil if s.edges.nil? || t < MIN_DFA_EDGE || t > MAX_DFA_EDGE

  target = s.edges[t - MIN_DFA_EDGE]
  if @@debug && !target.nil?
    puts 'reuse state ' + s.state_number.to_s + ' edge to ' + target.state_number.to_s
  end

  target
end
fail_or_accept(prev_accept, input, _reach, t) click to toggle source
# File lib/antlr4/runtime/lexer_atn_simulator.rb, line 199
def fail_or_accept(prev_accept, input, _reach, t)
  if !prev_accept.dfa_state.nil?
    lexer_action_executor = prev_accept.dfa_state.lexer_action_executor
    accept(input, lexer_action_executor, @start_index, prev_accept.index, prev_accept.line, prev_accept.char_pos)
    prev_accept.dfa_state.prediction
  else # if no accept and EOF is first char, return EOF
    return Token::EOF if t == IntStream::EOF && input.index == @start_index

    raise LexerNoViableAltException, @recog
  end
end
match(input, mode) click to toggle source
# File lib/antlr4/runtime/lexer_atn_simulator.rb, line 53
def match(input, mode)
  @match_calls += 1
  @mode = mode
  mark = input.mark

  begin
    @start_index = input.index
    @prev_accept.reset
    dfa = @decision_to_dfa[mode]
    if dfa.s0.nil?
      return match_atn(input)
    else
      return exec_atn(input, dfa.s0)
    end
  ensure
    input.release(mark)
  end
end
match_atn(input) click to toggle source
# File lib/antlr4/runtime/lexer_atn_simulator.rb, line 88
def match_atn(input)
  start_state = atn.mode_to_start_state[@mode]

  printf format("matchATN mode %d start: %s\n", @mode, start_state) if @@debug

  old_mode = @mode

  s0_closure = compute_start_state(input, start_state)
  suppress_edge = s0_closure.has_semantic_context
  s0_closure.has_semantic_context = false

  next_state = add_dfa_state(s0_closure)
  @decision_to_dfa[@mode].s0 = next_state unless suppress_edge

  predict = exec_atn(input, next_state)

  if @@debug
    printf format("DFA after matchATN: %s\n", @decision_to_dfa[old_mode].to_lexer_string)
  end

  predict
end
reachable_config_set(input, closure, reach, t) click to toggle source
# File lib/antlr4/runtime/lexer_atn_simulator.rb, line 211
def reachable_config_set(input, closure, reach, t) # this is used to skip processing for configs which have a lower priority
  # than a config that already reached an accept state for the same rule
  skip_alt = ATN::INVALID_ALT_NUMBER
  i = 0
  while i < closure.configs.length
    c = closure.configs[i]
    current_alt_reached_accept_state = (c.alt == skip_alt)
    if current_alt_reached_accept_state && c.passed_through_non_greedy_decision
      i += 1
      next
    end

    if @@debug
      printf format("testing %s at %s\n", token_name(t), c.to_s2(@recog, true))
    end

    n = c.state.number_of_transitions
    ti = 0
    while ti < n # for each transition
      trans = c.state.transition(ti)
      target = reachable_target(trans, t)
      unless target.nil?
        lexer_action_executor = c.lexer_action_executor
        unless lexer_action_executor.nil?
          lexer_action_executor = lexer_action_executor.fix_offset_before_match(input.index - start_index)
        end

        treat_eof_as_epsilon = (t == CharStream::EOF)
        cfg = LexerATNConfig.new
        cfg.lexer_atn_config4(c, target, lexer_action_executor)
        if closure(input, cfg, reach, current_alt_reached_accept_state, true, treat_eof_as_epsilon)
          # any remaining configs for this alt have a lower priority than
          # the one that just reached an accept state.
          skip_alt = c.alt
          break
        end
      end
      ti += 1
    end
    i += 1
  end
end
reachable_target(trans, t) click to toggle source
# File lib/antlr4/runtime/lexer_atn_simulator.rb, line 267
def reachable_target(trans, t)
  if trans.matches(t, Lexer::MIN_CHAR_VALUE, Lexer::MAX_CHAR_VALUE)
    return trans.target
  end

  nil
end
reset() click to toggle source
# File lib/antlr4/runtime/lexer_atn_simulator.rb, line 72
def reset
  @prev_accept.reset
  @start_index = -1
  @line = 1
  @char_position_in_line = 0
  @mode = Lexer.DEFAULT_MODE
end
text(input) click to toggle source
# File lib/antlr4/runtime/lexer_atn_simulator.rb, line 496
def text(input) # index is first lookahead char, don' t include.
  input.text(Interval.of(@start_index, input.index - 1))
end
token_name(t) click to toggle source
# File lib/antlr4/runtime/lexer_atn_simulator.rb, line 511
def token_name(t)
  return 'EOF' if t == -1

  # if ( atn.g!=nil ) return atn.g.getTokenDisplayName(t)
  "'" + t.to_s + "'"
end