class BibTeX::Lexer

The BibTeX::Lexer handles the lexical analysis of BibTeX bibliographies.

Constants

MODE

Attributes

defaults[R]
patterns[R]
mode[RW]
options[R]
scanner[R]
stack[R]

Public Class Methods

new(options = {}) click to toggle source

Creates a new instance. Possible options and their respective default values are:

  • :include => [:errors] A list that may contain :meta_content, and :errors; depending on whether or not these are present, the respective tokens are included in the parse tree.

  • :strict => true In strict mode objects can start anywhere; therefore the ‘@’ symbol is not possible except inside literals or @comment objects; for a more lenient lexer set to false and objects are expected to start after a new line (leading white space is permitted).

  • :strip => true When enabled, newlines will be stripped from quoted string values.

# File lib/bibtex/lexer.rb, line 90
def initialize(options = {})
  @options = Lexer.defaults.merge(options)
  reset
end

Public Instance Methods

active?(object) click to toggle source

Returns true if the lexer is currently parsing the given object type.

# File lib/bibtex/lexer.rb, line 133
def active?(object)
  @active_object == object
end
allow_missing_keys?() click to toggle source
# File lib/bibtex/lexer.rb, line 142
def allow_missing_keys?
  !!@options[:allow_missing_keys]
end
analyse(string = nil) click to toggle source

Start the lexical analysis.

# File lib/bibtex/lexer.rb, line 174
def analyse(string = nil)
  raise(ArgumentError, 'Lexer: failed to start analysis: no source given!') unless
    string || @scanner

  self.data = string || @scanner.string

  send("parse_#{MODE[@mode]}") until @scanner.eos?

  push([false, '$end'])
end
bibtex_mode?() click to toggle source

Returns true if the lexer is currenty parsing a BibTeX object.

# File lib/bibtex/lexer.rb, line 124
def bibtex_mode?
  MODE[@mode] == :bibtex
end
data=(data) click to toggle source

Sets the source for the lexical analysis and resets the internal state.

# File lib/bibtex/lexer.rb, line 109
def data=(data)
  @scanner = StringScanner.new(data)
  reset
end
next_token() click to toggle source

Returns the next token from the parse stack.

# File lib/bibtex/lexer.rb, line 119
def next_token
  @stack.shift
end
push(value) click to toggle source

Pushes a value onto the parse stack. Returns the Lexer.

# File lib/bibtex/lexer.rb, line 151
def push(value)
  case value[0]
  when :CONTENT, :STRING_LITERAL
    value[1].gsub!(/\n\s*/, ' ') if strip_line_breaks?

    if !@stack.empty? && value[0] == @stack[-1][0]
      @stack[-1][1] << value[1]
    else
      @stack.push(value)
    end
  when :ERROR
    @stack.push(value) if @include_errors
    leave_object
  when :META_CONTENT
    @stack.push(value) if @include_meta_content
  else
    @stack.push(value)
  end

  self
end
reset() click to toggle source
# File lib/bibtex/lexer.rb, line 95
def reset
  @stack = []
  @brace_level = 0
  @mode = :meta
  @active_object = nil

  # cache options for speed
  @include_meta_content = @options[:include].include?(:meta_content)
  @include_errors = @options[:include].include?(:errors)

  self
end
strict?() click to toggle source

Returns true if the lexer is currently in strict mode.

# File lib/bibtex/lexer.rb, line 138
def strict?
  !!@options[:strict]
end
strip_line_breaks?() click to toggle source
# File lib/bibtex/lexer.rb, line 146
def strip_line_breaks?
  !!options[:strip] && !active?(:comment)
end
symbols() click to toggle source
# File lib/bibtex/lexer.rb, line 114
def symbols
  @stack.map(&:first)
end

Private Instance Methods

backtrace(error) click to toggle source
# File lib/bibtex/lexer.rb, line 351
def backtrace(error)
  bt = []
  bt.unshift(@stack.pop) until @stack.empty? || (!bt.empty? && %i[AT META_CONTENT].include?(bt[0][0]))
  bt << error
  push [:ERROR, bt]
end
enter_object() click to toggle source

Called when the lexer encounters a new BibTeX object.

# File lib/bibtex/lexer.rb, line 288
def enter_object
  @brace_level = 0
  push [:AT, '@']

  if @scanner.scan(Lexer.patterns[:string])
    @mode = @active_object = :string
    push [:STRING, @scanner.matched]
  elsif @scanner.scan(Lexer.patterns[:preamble])
    @mode = @active_object = :preamble
    push [:PREAMBLE, @scanner.matched]
  elsif @scanner.scan(Lexer.patterns[:comment])
    @mode = @active_object = :comment
    push [:COMMENT, @scanner.matched]
  elsif @scanner.scan(Lexer.patterns[:entry])
    @mode = @active_object = :entry
    push [:NAME, @scanner.matched]

    # TODO: DRY - try to parse key
    if @scanner.scan(Lexer.patterns[:lbrace])
      @brace_level += 1
      push([:LBRACE, '{'])
      @mode = :content if @brace_level > 1 || @brace_level == 1 && active?(:comment)

      push [:KEY, @scanner.matched.chop.strip] if @scanner.scan(Lexer.patterns[allow_missing_keys? ? :optional_key : :key])
    end

  else
    error_unexpected_object
  end
end
error_unbalanced_braces() click to toggle source
# File lib/bibtex/lexer.rb, line 326
def error_unbalanced_braces
  BibTeX.log.warn("Lexer: unbalanced braces at #{@scanner.pos}; brace level #{@brace_level}; mode #{@mode.inspect}.")
  backtrace [:E_UNBALANCED, @scanner.matched]
end
error_unexpected_object() click to toggle source
# File lib/bibtex/lexer.rb, line 346
def error_unexpected_object
  BibTeX.log.warn("Lexer: unexpected object at #{@scanner.pos}; brace level #{@brace_level}; mode #{@mode.inspect}.")
  backtrace [:E_UNEXPECTED_OBJECT, '@']
end
error_unexpected_token() click to toggle source
# File lib/bibtex/lexer.rb, line 341
def error_unexpected_token
  BibTeX.log.warn("Lexer: unexpected token `#{@scanner.matched}' at #{@scanner.pos}; brace level #{@brace_level}; mode #{@mode.inspect}.")
  backtrace [:E_UNEXPECTED_TOKEN, @scanner.matched]
end
error_unterminated_content() click to toggle source
# File lib/bibtex/lexer.rb, line 336
def error_unterminated_content
  BibTeX.log.warn("Lexer: unterminated content at #{@scanner.pos}; brace level #{@brace_level}; mode #{@mode.inspect}.")
  backtrace [:E_UNTERMINATED_CONTENT, @scanner.matched]
end
error_unterminated_string() click to toggle source
# File lib/bibtex/lexer.rb, line 331
def error_unterminated_string
  BibTeX.log.warn("Lexer: unterminated string at #{@scanner.pos}; brace level #{@brace_level}; mode #{@mode.inspect}.")
  backtrace [:E_UNTERMINATED_STRING, @scanner.matched]
end
leave_object() click to toggle source

Called when parser leaves a BibTeX object.

# File lib/bibtex/lexer.rb, line 320
def leave_object
  @mode = :meta
  @active_object = nil
  @brace_level = 0
end
parse_bibtex() click to toggle source
# File lib/bibtex/lexer.rb, line 187
def parse_bibtex
  case
  when @scanner.scan(Lexer.patterns[:lbrace])
    @brace_level += 1
    push([:LBRACE, '{'])
    @mode = :content if @brace_level > 1 || @brace_level == 1 && active?(:comment)
  when @scanner.scan(Lexer.patterns[:rbrace])
    @brace_level -= 1
    push([:RBRACE, '}'])
    return leave_object if @brace_level == 0
    return error_unbalanced_braces if @brace_level < 0
  when @scanner.scan(Lexer.patterns[:eq])
    push([:EQ, '='])
  when @scanner.scan(Lexer.patterns[:comma])
    push([:COMMA, ','])
  when @scanner.scan(Lexer.patterns[:number])
    push([:NUMBER, @scanner.matched])
  when @scanner.scan(Lexer.patterns[:name])
    push([:NAME, @scanner.matched.rstrip])
  when @scanner.scan(Lexer.patterns[:quote])
    @mode = :literal
  when @scanner.scan(Lexer.patterns[:sharp])
    push([:SHARP, '#'])
  when @scanner.scan(Lexer.patterns[:object])
    enter_object
  when @scanner.scan(Lexer.patterns[:space])
    # skip
  when @scanner.scan(Lexer.patterns[:period])
    error_unexpected_token
  end
end
parse_content() click to toggle source
# File lib/bibtex/lexer.rb, line 230
def parse_content
  match = @scanner.scan_until(Lexer.patterns[:braces])
  case @scanner.matched
  when '{'
    @brace_level += 1
    push([:CONTENT, match])
  when '}'
    @brace_level -= 1
    if @brace_level == 0
      push([:CONTENT, match.chop])
      push([:RBRACE, '}'])
      leave_object
    elsif @brace_level == 1 && !active?(:comment)
      push([:CONTENT, match.chop])
      push([:RBRACE, '}'])
      @mode = :bibtex
    elsif @brace_level < 0
      push([:CONTENT, match.chop])
      error_unbalanced_braces
    else
      push([:CONTENT, match])
    end
  else
    push([:CONTENT, @scanner.rest])
    @scanner.terminate
    error_unterminated_content
  end
end
parse_literal() click to toggle source
# File lib/bibtex/lexer.rb, line 259
def parse_literal
  match = @scanner.scan_until(Lexer.patterns[:unquote])
  case @scanner.matched
  when '{'
    @brace_level += 1
    push([:STRING_LITERAL, match])
  when '}'
    @brace_level -= 1
    if @brace_level < 1
      push([:STRING_LITERAL, match.chop])
      error_unbalanced_braces
    else
      push([:STRING_LITERAL, match])
    end
  when '"'
    if @brace_level == 1
      push([:STRING_LITERAL, match.chop])
      @mode = :bibtex
    else
      push([:STRING_LITERAL, match])
    end
  else
    push([:STRING_LITERAL, @scanner.rest])
    @scanner.terminate
    error_unterminated_string
  end
end
parse_meta() click to toggle source
# File lib/bibtex/lexer.rb, line 219
def parse_meta
  match = @scanner.scan_until(Lexer.patterns[strict? ? :strict_next : :next])
  if @scanner.matched
    push([:META_CONTENT, match.chop])
    enter_object
  else
    push([:META_CONTENT, @scanner.rest])
    @scanner.terminate
  end
end