class BibTeX::Lexer
The BibTeX::Lexer
handles the lexical analysis of BibTeX
bibliographies.
Constants
- MODE
Attributes
Public Class Methods
Source
# File lib/bibtex/lexer.rb, line 90 def initialize(options = {}) @options = Lexer.defaults.merge(options) reset end
Creates a new instance. Possible options and their respective default values are:
-
:include => [:errors] A list that may contain :meta_content, and :errors; depending on whether or not these are present, the respective tokens are included in the parse tree.
-
:strict => true In strict mode objects can start anywhere; therefore the ‘@’ symbol is not possible except inside literals or @comment objects; for a more lenient lexer set to false and objects are expected to start after a new line (leading white space is permitted).
-
:strip => true When enabled, newlines will be stripped from quoted string values.
Public Instance Methods
Source
# File lib/bibtex/lexer.rb, line 133 def active?(object) @active_object == object end
Returns true if the lexer is currently parsing the given object type.
Source
# File lib/bibtex/lexer.rb, line 142 def allow_missing_keys? !!@options[:allow_missing_keys] end
Source
# File lib/bibtex/lexer.rb, line 174 def analyse(string = nil) raise(ArgumentError, 'Lexer: failed to start analysis: no source given!') unless string || @scanner self.data = string || @scanner.string send("parse_#{MODE[@mode]}") until @scanner.eos? push([false, '$end']) end
Start the lexical analysis.
Source
# File lib/bibtex/lexer.rb, line 124 def bibtex_mode? MODE[@mode] == :bibtex end
Returns true if the lexer is currenty parsing a BibTeX
object.
Source
# File lib/bibtex/lexer.rb, line 109 def data=(data) @scanner = StringScanner.new(data) reset end
Sets the source for the lexical analysis and resets the internal state.
Source
# File lib/bibtex/lexer.rb, line 119 def next_token @stack.shift end
Returns the next token from the parse stack.
Source
# File lib/bibtex/lexer.rb, line 151 def push(value) case value[0] when :CONTENT, :STRING_LITERAL value[1].gsub!(/\n\s*/, ' ') if strip_line_breaks? if !@stack.empty? && value[0] == @stack[-1][0] @stack[-1][1] << value[1] else @stack.push(value) end when :ERROR @stack.push(value) if @include_errors leave_object when :META_CONTENT @stack.push(value) if @include_meta_content else @stack.push(value) end self end
Pushes a value onto the parse stack. Returns the Lexer
.
Source
# File lib/bibtex/lexer.rb, line 95 def reset @stack = [] @brace_level = 0 @mode = :meta @active_object = nil # cache options for speed @include_meta_content = @options[:include].include?(:meta_content) @include_errors = @options[:include].include?(:errors) self end
Source
# File lib/bibtex/lexer.rb, line 138 def strict? !!@options[:strict] end
Returns true if the lexer is currently in strict mode.
Source
# File lib/bibtex/lexer.rb, line 146 def strip_line_breaks? !!options[:strip] && !active?(:comment) end
Private Instance Methods
Source
# File lib/bibtex/lexer.rb, line 351 def backtrace(error) bt = [] bt.unshift(@stack.pop) until @stack.empty? || (!bt.empty? && %i[AT META_CONTENT].include?(bt[0][0])) bt << error push [:ERROR, bt] end
Source
# File lib/bibtex/lexer.rb, line 288 def enter_object @brace_level = 0 push [:AT, '@'] if @scanner.scan(Lexer.patterns[:string]) @mode = @active_object = :string push [:STRING, @scanner.matched] elsif @scanner.scan(Lexer.patterns[:preamble]) @mode = @active_object = :preamble push [:PREAMBLE, @scanner.matched] elsif @scanner.scan(Lexer.patterns[:comment]) @mode = @active_object = :comment push [:COMMENT, @scanner.matched] elsif @scanner.scan(Lexer.patterns[:entry]) @mode = @active_object = :entry push [:NAME, @scanner.matched] # TODO: DRY - try to parse key if @scanner.scan(Lexer.patterns[:lbrace]) @brace_level += 1 push([:LBRACE, '{']) @mode = :content if @brace_level > 1 || @brace_level == 1 && active?(:comment) push [:KEY, @scanner.matched.chop.strip] if @scanner.scan(Lexer.patterns[allow_missing_keys? ? :optional_key : :key]) end else error_unexpected_object end end
Called when the lexer encounters a new BibTeX
object.
Source
# File lib/bibtex/lexer.rb, line 326 def error_unbalanced_braces BibTeX.log.warn("Lexer: unbalanced braces at #{@scanner.pos}; brace level #{@brace_level}; mode #{@mode.inspect}.") backtrace [:E_UNBALANCED, @scanner.matched] end
Source
# File lib/bibtex/lexer.rb, line 346 def error_unexpected_object BibTeX.log.warn("Lexer: unexpected object at #{@scanner.pos}; brace level #{@brace_level}; mode #{@mode.inspect}.") backtrace [:E_UNEXPECTED_OBJECT, '@'] end
Source
# File lib/bibtex/lexer.rb, line 341 def error_unexpected_token BibTeX.log.warn("Lexer: unexpected token `#{@scanner.matched}' at #{@scanner.pos}; brace level #{@brace_level}; mode #{@mode.inspect}.") backtrace [:E_UNEXPECTED_TOKEN, @scanner.matched] end
Source
# File lib/bibtex/lexer.rb, line 336 def error_unterminated_content BibTeX.log.warn("Lexer: unterminated content at #{@scanner.pos}; brace level #{@brace_level}; mode #{@mode.inspect}.") backtrace [:E_UNTERMINATED_CONTENT, @scanner.matched] end
Source
# File lib/bibtex/lexer.rb, line 331 def error_unterminated_string BibTeX.log.warn("Lexer: unterminated string at #{@scanner.pos}; brace level #{@brace_level}; mode #{@mode.inspect}.") backtrace [:E_UNTERMINATED_STRING, @scanner.matched] end
Source
# File lib/bibtex/lexer.rb, line 320 def leave_object @mode = :meta @active_object = nil @brace_level = 0 end
Called when parser leaves a BibTeX
object.
Source
# File lib/bibtex/lexer.rb, line 187 def parse_bibtex case when @scanner.scan(Lexer.patterns[:lbrace]) @brace_level += 1 push([:LBRACE, '{']) @mode = :content if @brace_level > 1 || @brace_level == 1 && active?(:comment) when @scanner.scan(Lexer.patterns[:rbrace]) @brace_level -= 1 push([:RBRACE, '}']) return leave_object if @brace_level == 0 return error_unbalanced_braces if @brace_level < 0 when @scanner.scan(Lexer.patterns[:eq]) push([:EQ, '=']) when @scanner.scan(Lexer.patterns[:comma]) push([:COMMA, ',']) when @scanner.scan(Lexer.patterns[:number]) push([:NUMBER, @scanner.matched]) when @scanner.scan(Lexer.patterns[:name]) push([:NAME, @scanner.matched.rstrip]) when @scanner.scan(Lexer.patterns[:quote]) @mode = :literal when @scanner.scan(Lexer.patterns[:sharp]) push([:SHARP, '#']) when @scanner.scan(Lexer.patterns[:object]) enter_object when @scanner.scan(Lexer.patterns[:space]) # skip when @scanner.scan(Lexer.patterns[:period]) error_unexpected_token end end
Source
# File lib/bibtex/lexer.rb, line 230 def parse_content match = @scanner.scan_until(Lexer.patterns[:braces]) case @scanner.matched when '{' @brace_level += 1 push([:CONTENT, match]) when '}' @brace_level -= 1 if @brace_level == 0 push([:CONTENT, match.chop]) push([:RBRACE, '}']) leave_object elsif @brace_level == 1 && !active?(:comment) push([:CONTENT, match.chop]) push([:RBRACE, '}']) @mode = :bibtex elsif @brace_level < 0 push([:CONTENT, match.chop]) error_unbalanced_braces else push([:CONTENT, match]) end else push([:CONTENT, @scanner.rest]) @scanner.terminate error_unterminated_content end end
Source
# File lib/bibtex/lexer.rb, line 259 def parse_literal match = @scanner.scan_until(Lexer.patterns[:unquote]) case @scanner.matched when '{' @brace_level += 1 push([:STRING_LITERAL, match]) when '}' @brace_level -= 1 if @brace_level < 1 push([:STRING_LITERAL, match.chop]) error_unbalanced_braces else push([:STRING_LITERAL, match]) end when '"' if @brace_level == 1 push([:STRING_LITERAL, match.chop]) @mode = :bibtex else push([:STRING_LITERAL, match]) end else push([:STRING_LITERAL, @scanner.rest]) @scanner.terminate error_unterminated_string end end
Source
# File lib/bibtex/lexer.rb, line 219 def parse_meta match = @scanner.scan_until(Lexer.patterns[strict? ? :strict_next : :next]) if @scanner.matched push([:META_CONTENT, match.chop]) enter_object else push([:META_CONTENT, @scanner.rest]) @scanner.terminate end end