class Rouge::Lexer

@abstract A lexer transforms text into a stream of ‘[token, chunk]` pairs.

Attributes

options[R]

-*- instance methods -*- #

Public Class Methods

aliases(*args) click to toggle source

Used to specify alternate names this lexer class may be found by.

@example

class Erb < Lexer
  tag 'erb'
  aliases 'eruby', 'rhtml'
end

Lexer.find('eruby') # => Erb
# File lib/rouge/lexer.rb, line 250
def aliases(*args)
  args.map!(&:to_s)
  args.each { |arg| Lexer.register(arg, self) }
  (@aliases ||= []).concat(args)
end
all() click to toggle source

@return a list of all lexers.

# File lib/rouge/lexer.rb, line 130
def all
  @all ||= registry.values.uniq
end
assert_utf8!(str) click to toggle source

@private

# File lib/rouge/lexer.rb, line 284
def assert_utf8!(str)
  encoding = str.encoding.name
  return if encoding == 'US-ASCII' || encoding == 'UTF-8' || encoding == 'ASCII-8BIT'

  raise EncodingError.new(
    "Bad encoding: #{str.encoding.names.join(',')}. " +
    "Please convert your string to UTF-8."
  )
end
continue_lex(*a, &b) click to toggle source

In case continue_lex is called statically, we simply begin a new lex from the beginning, since there is no state.

@see continue_lex

# File lib/rouge/lexer.rb, line 29
def continue_lex(*a, &b)
  lex(*a, &b)
end
debug_enabled?() click to toggle source
# File lib/rouge/lexer.rb, line 204
def debug_enabled?
  (defined? @debug_enabled) ? true : false
end
demo(arg=:absent) click to toggle source

Specify or get a small demo string for this lexer

# File lib/rouge/lexer.rb, line 123
def demo(arg=:absent)
  return @demo = arg unless arg == :absent

  @demo = File.read(demo_file, mode: 'rt:bom|utf-8')
end
demo_file(arg=:absent) click to toggle source

Specify or get the path name containing a small demo for this lexer (can be overriden by {demo}).

# File lib/rouge/lexer.rb, line 116
def demo_file(arg=:absent)
  return @demo_file = Pathname.new(arg) unless arg == :absent

  @demo_file = Pathname.new(File.join(__dir__, 'demos', tag))
end
desc(arg=:absent) click to toggle source

Specify or get this lexer’s description.

# File lib/rouge/lexer.rb, line 98
def desc(arg=:absent)
  if arg == :absent
    @desc
  else
    @desc = arg
  end
end
detect?(text) click to toggle source

@abstract

Return true if there is an in-text indication (such as a shebang or DOCTYPE declaration) that this lexer should be used.

@param [TextAnalyzer] text

the text to be analyzed, with a couple of handy methods on it,
like {TextAnalyzer#shebang?} and {TextAnalyzer#doctype?}
# File lib/rouge/lexer.rb, line 501
def self.detect?(text)
  false
end
detectable?() click to toggle source

Determine if a lexer has a method named :detect? defined in its singleton class.

# File lib/rouge/lexer.rb, line 210
def detectable?
  return @detectable if defined?(@detectable)
  @detectable = singleton_methods(false).include?(:detect?)
end
disable_debug!() click to toggle source
# File lib/rouge/lexer.rb, line 200
def disable_debug!
  remove_instance_variable :@debug_enabled if defined? @debug_enabled
end
enable_debug!() click to toggle source
# File lib/rouge/lexer.rb, line 196
def enable_debug!
  @debug_enabled = true
end
filenames(*fnames) click to toggle source

Specify a list of filename globs associated with this lexer.

If a filename glob is associated with more than one lexer, this can cause a Guesser::Ambiguous error to be raised in various guessing methods. These errors can be avoided by disambiguation. Filename globs are disambiguated in one of two ways. Either the lexer will define a ‘self.detect?` method (intended for use with shebangs and doctypes) or a manual rule will be specified in Guessers::Disambiguation.

@example

class Ruby < Lexer
  filenames '*.rb', '*.ruby', 'Gemfile', 'Rakefile'
end
# File lib/rouge/lexer.rb, line 269
def filenames(*fnames)
  (@filenames ||= []).concat(fnames)
end
find(name) click to toggle source

Given a name in string, return the correct lexer class. @param [String] name @return [Class<Rouge::Lexer>,nil]

# File lib/rouge/lexer.rb, line 36
def find(name)
  registry[name.to_s]
end
find_fancy(str, code=nil, additional_options={}) click to toggle source

Find a lexer, with fancy shiny features.

  • The string you pass can include CGI-style options

    Lexer.find_fancy('erb?parent=tex')
    
  • You can pass the special name ‘guess’ so we guess for you, and you can pass a second argument of the code to guess by

    Lexer.find_fancy('guess', "#!/bin/bash\necho Hello, world")
    

    If the code matches more than one lexer then Guesser::Ambiguous is raised.

This is used in the Redcarpet plugin as well as Rouge’s own markdown lexer for highlighting internal code blocks.

# File lib/rouge/lexer.rb, line 57
def find_fancy(str, code=nil, additional_options={})

  if str && !str.include?('?') && str != 'guess'
    lexer_class = find(str)
    return lexer_class && lexer_class.new(additional_options)
  end

  name, opts = str ? str.split('?', 2) : [nil, '']

  # parse the options hash from a cgi-style string
  opts = CGI.parse(opts || '').map do |k, vals|
    val = case vals.size
    when 0 then true
    when 1 then vals[0]
    else vals
    end

    [ k.to_s, val ]
  end

  opts = additional_options.merge(Hash[opts])

  lexer_class = case name
  when 'guess', nil
    self.guess(:source => code, :mimetype => opts['mimetype'])
  when String
    self.find(name)
  end

  lexer_class && lexer_class.new(opts)
end
guess(info={}, &fallback) click to toggle source

Guess which lexer to use based on a hash of info.

@option info :mimetype

A mimetype to guess by

@option info :filename

A filename to guess by

@option info :source

The source itself, which, if guessing by mimetype or filename
fails, will be searched for shebangs, <!DOCTYPE ...> tags, and
other hints.

@param [Proc] fallback called if multiple lexers are detected.

If omitted, Guesser::Ambiguous is raised.

@see Lexer.detect? @see Lexer.guesses @return [Class<Rouge::Lexer>]

# File lib/rouge/lexer.rb, line 171
def guess(info={}, &fallback)
  lexers = guesses(info)

  return Lexers::PlainText if lexers.empty?
  return lexers[0] if lexers.size == 1

  if fallback
    fallback.call(lexers)
  else
    raise Guesser::Ambiguous.new(lexers)
  end
end
guess_by_filename(fname) click to toggle source
# File lib/rouge/lexer.rb, line 188
def guess_by_filename(fname)
  guess :filename => fname
end
guess_by_mimetype(mt) click to toggle source
# File lib/rouge/lexer.rb, line 184
def guess_by_mimetype(mt)
  guess :mimetype => mt
end
guess_by_source(source) click to toggle source
# File lib/rouge/lexer.rb, line 192
def guess_by_source(source)
  guess :source => source
end
guesses(info={}) click to toggle source

Guess which lexer to use based on a hash of info.

This accepts the same arguments as Lexer.guess, but will never throw an error. It will return a (possibly empty) list of potential lexers to use.

# File lib/rouge/lexer.rb, line 139
def guesses(info={})
  mimetype, filename, source = info.values_at(:mimetype, :filename, :source)
  custom_globs = info[:custom_globs]

  guessers = (info[:guessers] || []).dup

  guessers << Guessers::Mimetype.new(mimetype) if mimetype
  guessers << Guessers::GlobMapping.by_pairs(custom_globs, filename) if custom_globs && filename
  guessers << Guessers::Filename.new(filename) if filename
  guessers << Guessers::Modeline.new(source) if source
  guessers << Guessers::Source.new(source) if source
  guessers << Guessers::Disambiguation.new(filename, source) if source && filename

  Guesser.guess(guessers, Lexer.all)
end
lex(stream, opts={}, &b) click to toggle source

Lexes ‘stream` with the given options. The lex is delegated to a new instance.

@see lex

# File lib/rouge/lexer.rb, line 21
def lex(stream, opts={}, &b)
  new(opts).lex(stream, &b)
end
mimetypes(*mts) click to toggle source

Specify a list of mimetypes associated with this lexer.

@example

class Html < Lexer
  mimetypes 'text/html', 'application/xhtml+xml'
end
# File lib/rouge/lexer.rb, line 279
def mimetypes(*mts)
  (@mimetypes ||= []).concat(mts)
end
new(opts={}) click to toggle source

Create a new lexer with the given options. Individual lexers may specify extra options. The only current globally accepted option is ‘:debug`.

@option opts :debug

Prints debug information to stdout.  The particular info depends
on the lexer in question.  In regex lexers, this will log the
state stack at the beginning of each step, along with each regex
tried and each stream consumed.  Try it, it's pretty useful.
# File lib/rouge/lexer.rb, line 312
def initialize(opts={})
  @options = {}
  opts.each { |k, v| @options[k.to_s] = v }

  @debug = Lexer.debug_enabled? && bool_option('debug')
end
option(name, desc) click to toggle source
# File lib/rouge/lexer.rb, line 110
def option(name, desc)
  option_docs[name.to_s] = desc
end
option_docs() click to toggle source
# File lib/rouge/lexer.rb, line 106
def option_docs
  @option_docs ||= InheritableHash.new(superclass.option_docs)
end
tag(t=nil) click to toggle source

Used to specify or get the canonical name of this lexer class.

@example

class MyLexer < Lexer
  tag 'foo'
end

MyLexer.tag # => 'foo'

Lexer.find('foo') # => MyLexer
# File lib/rouge/lexer.rb, line 234
def tag(t=nil)
  return @tag if t.nil?

  @tag = t.to_s
  Lexer.register(@tag, self)
end
title(t=nil) click to toggle source

Specify or get this lexer’s title. Meant to be human-readable.

# File lib/rouge/lexer.rb, line 90
def title(t=nil)
  if t.nil?
    t = tag.capitalize
  end
  @title ||= t
end

Protected Class Methods

register(name, lexer) click to toggle source

@private

# File lib/rouge/lexer.rb, line 217
def register(name, lexer)
  # reset an existing list of lexers
  @all = nil if defined?(@all)
  registry[name.to_s] = lexer
end

Private Class Methods

registry() click to toggle source
# File lib/rouge/lexer.rb, line 295
def registry
  @registry ||= {}
end

Public Instance Methods

as_bool(val) click to toggle source
# File lib/rouge/lexer.rb, line 319
def as_bool(val)
  case val
  when nil, false, 0, '0', 'false', 'off'
    false
  when Array
    val.empty? ? true : as_bool(val.last)
  else
    true
  end
end
as_lexer(val) click to toggle source
# File lib/rouge/lexer.rb, line 347
def as_lexer(val)
  return as_lexer(val.last) if val.is_a?(Array)
  return val.new(@options) if val.is_a?(Class) && val < Lexer

  case val
  when Lexer
    val
  when String
    lexer_class = Lexer.find(val)
    lexer_class && lexer_class.new(@options)
  end
end
as_list(val) click to toggle source
# File lib/rouge/lexer.rb, line 336
def as_list(val)
  case val
  when Array
    val.flat_map { |v| as_list(v) }
  when String
    val.split(',')
  else
    []
  end
end
as_string(val) click to toggle source
# File lib/rouge/lexer.rb, line 330
def as_string(val)
  return as_string(val.last) if val.is_a?(Array)

  val ? val.to_s : nil
end
as_token(val) click to toggle source
# File lib/rouge/lexer.rb, line 360
def as_token(val)
  return as_token(val.last) if val.is_a?(Array)
  case val
  when Token
    val
  else
    Token[val]
  end
end
bool_option(name, &default) click to toggle source
# File lib/rouge/lexer.rb, line 370
def bool_option(name, &default)
  name_str = name.to_s

  if @options.key?(name_str)
    as_bool(@options[name_str])
  else
    default ? default.call : false
  end
end
continue_lex(string, &b) click to toggle source

Continue the lex from the the current state without resetting

# File lib/rouge/lexer.rb, line 455
def continue_lex(string, &b)
  return enum_for(:continue_lex, string, &b) unless block_given?

  # consolidate consecutive tokens of the same type
  last_token = nil
  last_val = nil
  stream_tokens(string) do |tok, val|
    next if val.empty?

    if tok == last_token
      last_val << val
      next
    end

    b.call(last_token, last_val) if last_token
    last_token = tok
    last_val = val
  end

  b.call(last_token, last_val) if last_token
end
hash_option(name, defaults, &val_cast) click to toggle source
# File lib/rouge/lexer.rb, line 396
def hash_option(name, defaults, &val_cast)
  name = name.to_s
  out = defaults.dup

  base = @options.delete(name.to_s)
  base = {} unless base.is_a?(Hash)
  base.each { |k, v| out[k.to_s] = val_cast ? val_cast.call(v) : v }

  @options.keys.each do |key|
    next unless key =~ /(\w+)\[(\w+)\]/ and $1 == name
    value = @options.delete(key)

    out[$2] = val_cast ? val_cast.call(value) : value
  end

  out
end
lex(string, opts=nil, &b) click to toggle source

Given a string, yield [token, chunk] pairs. If no block is given, an enumerator is returned.

@option opts :continue

Continue the lex from the previous state (i.e. don't call #reset!)

@note The use of :continue => true has been deprecated. A warning is

issued if run with `$VERBOSE` set to true.

@note The use of arbitrary ‘opts` has never been supported, but we

previously ignored them with no error. We now warn unconditionally.
# File lib/rouge/lexer.rb, line 432
def lex(string, opts=nil, &b)
  if opts
    if (opts.keys - [:continue]).size > 0
      # improper use of options hash
      warn('Improper use of Lexer#lex - this method does not receive options.' +
           ' This will become an error in a future version.')
    end

    if opts[:continue]
      warn '`lex :continue => true` is deprecated, please use #continue_lex instead'
      return continue_lex(string, &b)
    end
  end

  return enum_for(:lex, string) unless block_given?

  Lexer.assert_utf8!(string)
  reset!

  continue_lex(string, &b)
end
lexer_option(name, &default) click to toggle source
# File lib/rouge/lexer.rb, line 384
def lexer_option(name, &default)
  as_lexer(@options.delete(name.to_s, &default))
end
list_option(name, &default) click to toggle source
# File lib/rouge/lexer.rb, line 388
def list_option(name, &default)
  as_list(@options.delete(name.to_s, &default))
end
reset!() click to toggle source

@abstract

Called after each lex is finished. The default implementation is a noop.

# File lib/rouge/lexer.rb, line 418
def reset!
end
stream_tokens(stream, &b) click to toggle source

@abstract

Yield ‘[token, chunk]` pairs, given a prepared input stream. This must be implemented.

@param [StringScanner] stream

the stream
# File lib/rouge/lexer.rb, line 489
def stream_tokens(stream, &b)
  raise 'abstract'
end
string_option(name, &default) click to toggle source
# File lib/rouge/lexer.rb, line 380
def string_option(name, &default)
  as_string(@options.delete(name.to_s, &default))
end
tag() click to toggle source

delegated to {Lexer.tag}

# File lib/rouge/lexer.rb, line 478
def tag
  self.class.tag
end
token_option(name, &default) click to toggle source
# File lib/rouge/lexer.rb, line 392
def token_option(name, &default)
  as_token(@options.delete(name.to_s, &default))
end