class Shoes::Highlighter::Syntax::Ruby

A tokenizer for the Ruby language. It recognizes all common syntax (and some less common syntax) but because it is not a true lexer, it will make mistakes on some ambiguous cases.

Constants

KEYWORDS

The list of all identifiers recognized as keywords.

Public Instance Methods

setup() click to toggle source

Perform ruby-specific setup

# File lib/shoes/highlighter/lang/ruby.rb, line 16
def setup
  @selector = false
  @allow_operator = false
  @heredocs = []
end
step() click to toggle source

Step through a single iteration of the tokenization process.

# File lib/shoes/highlighter/lang/ruby.rb, line 23
def step
  case
  when bol? && check(/=begin/)
    start_group(:comment, scan_until(/^=end#{EOL}/))
  when bol? && check(/__END__#{EOL}/)
    start_group(:comment, scan_until(/\Z/))
  else
    case
    when check(/def\s+/)
      start_group :keyword, scan(/def\s+/)
      start_group :method,  scan_until(/(?=[;(\s]|#{EOL})/)
    when check(/class\s+/)
      start_group :keyword, scan(/class\s+/)
      start_group :class,  scan_until(/(?=[;\s<]|#{EOL})/)
    when check(/module\s+/)
      start_group :keyword, scan(/module\s+/)
      start_group :module,  scan_until(/(?=[;\s]|#{EOL})/)
    when check(/::/)
      start_group :punct, scan(/::/)
    when check(/:"/)
      start_group :symbol, scan(/:/)
      scan_delimited_region :symbol, :symbol, "", true
      @allow_operator = true
    when check(/:'/)
      start_group :symbol, scan(/:/)
      scan_delimited_region :symbol, :symbol, "", false
      @allow_operator = true
    when scan(/:[_a-zA-Z@$][$@\w]*[=!?]?/)
      start_group :symbol, matched
      @allow_operator = true
    when scan(/\?(\\[^\n\r]|[^\\\n\r\s])/)
      start_group :char, matched
      @allow_operator = true
    when check(/(__FILE__|__LINE__|true|false|nil|self)[?!]?/)
      if @selector || matched[-1] == '?' || matched[-1] == '!'
        start_group :ident,
                    scan(/(__FILE__|__LINE__|true|false|nil|self)[?!]?/)
      else
        start_group :constant,
                    scan(/(__FILE__|__LINE__|true|false|nil|self)/)
      end
      @selector = false
      @allow_operator = true
    when scan(/0([bB][01]+|[oO][0-7]+|[dD][0-9]+|[xX][0-9a-fA-F]+)/)
      start_group :number, matched
      @allow_operator = true
    else
      case peek(2)
      when "%r"
        scan_delimited_region :punct, :regex, scan(/../), true
        @allow_operator = true
      when "%w", "%q"
        scan_delimited_region :punct, :string, scan(/../), false
        @allow_operator = true
      when "%s"
        scan_delimited_region :punct, :symbol, scan(/../), false
        @allow_operator = true
      when "%W", "%Q", "%x"
        scan_delimited_region :punct, :string, scan(/../), true
        @allow_operator = true
      when /%[^\sa-zA-Z0-9]/
        scan_delimited_region :punct, :string, scan(/./), true
        @allow_operator = true
      when "<<"
        saw_word = (chunk[-1, 1] =~ /[\w!?]/)
        start_group :punct, scan(/<</)
        if saw_word
          @allow_operator = false
          return
        end

        float_right = scan(/-/)
        append "-" if float_right
        if (type = scan(/['"]/))
          append type
          delim = scan_until(/(?=#{type})/)
          if delim.nil?
            append scan_until(/\Z/)
            return
          end
        else
          delim = scan(/\w+/) or return
        end
        start_group :constant, delim
        start_group :punct, scan(/#{type}/) if type
        @heredocs << [float_right, type, delim]
        @allow_operator = true
      else
        case peek(1)
        when /[\n\r]/
          unless @heredocs.empty?
            scan_heredoc(*@heredocs.shift)
          else
            start_group :normal, scan(/\s+/)
          end
          @allow_operator = false
        when /\s/
          start_group :normal, scan(/\s+/)
        when "#"
          start_group :comment, scan(/#[^\n\r]*/)
        when /[A-Z]/
          start_group @selector ? :ident : :constant, scan(/\w+/)
          @allow_operator = true
        when /[a-z_]/
          word = scan(/\w+[?!]?/)
          if !@selector && KEYWORDS.include?(word)
            start_group :keyword, word
            @allow_operator = false
          elsif
            start_group :ident, word
            @allow_operator = true
          end
          @selector = false
        when /\d/
          start_group :number,
                      scan(/[\d_]+(\.[\d_]+)?([eE][\d_]+)?/)
          @allow_operator = true
        when '"'
          scan_delimited_region :punct, :string, "", true
          @allow_operator = true
        when '/'
          if @allow_operator
            start_group :punct, scan(%r{/})
            @allow_operator = false
          else
            scan_delimited_region :punct, :regex, "", true
            @allow_operator = true
          end
        when "'"
          scan_delimited_region :punct, :string, "", false
          @allow_operator = true
        when "."
          dots = scan(/\.{1,3}/)
          start_group :punct, dots
          @selector = (dots.length == 1)
        when /[@]/
          start_group :attribute, scan(/@{1,2}\w*/)
          @allow_operator = true
        when /[$]/
          start_group :global, scan(/\$/)
          start_group :global, scan(/\w+|./) if check(/./)
          @allow_operator = true
        when /[-!?*\/+=<>(\[\{}:;,&|%]/
          start_group :punct, scan(/./)
          @allow_operator = false
        when /[)\]]/
          start_group :punct, scan(/./)
          @allow_operator = true
        else
          # all else just falls through this, to prevent
          # infinite loops...
          append getch
        end
      end
    end
  end
end

Private Instance Methods

scan_delimited_region(delim_group, inner_group, starter, exprs, delim = nil, heredoc = false) click to toggle source

Scan a delimited region of text. This handles the simple cases (strings delimited with quotes) as well as the more complex cases of %-strings and here-documents.

  • delim_group is the group to use to classify the delimiters of the region

  • inner_group is the group to use to classify the contents of the region

  • starter is the text to use as the starting delimiter

  • exprs is a boolean flag indicating whether the region is an interpolated string or not

  • delim is the text to use as the delimiter of the region. If nil, the next character will be treated as the delimiter.

  • heredoc is either false, meaning the region is not a heredoc, or :flush (meaning the delimiter must be flushed left), or :float (meaning the delimiter doens’t have to be flush left).

# File lib/shoes/highlighter/lang/ruby.rb, line 199
def scan_delimited_region(delim_group, inner_group, starter, exprs,
                          delim = nil, heredoc = false)
  # begin
  unless delim
    start_group delim_group, starter
    delim = scan(/./)
    append delim

    delim = case delim
            when '{' then '}'
            when '(' then ')'
            when '[' then ']'
            when '<' then '>'
            else delim
            end
  end

  start_region inner_group

  items = "\\\\|"
  if heredoc
    items << "(^"
    items << '\s*' if heredoc == :float
    items << "#{Regexp.escape(delim)}\s*?)#{EOL}"
  else
    items << "#{Regexp.escape(delim)}"
  end
  items << "|#(\\$|@@?|\\{)" if exprs
  items = Regexp.new(items)

  loop do
    p = pos
    match = scan_until(items)
    if match.nil?
      start_group inner_group, scan_until(/\Z/)
      break
    else
      text = pre_match[p..-1]
      start_group inner_group, text if text.length > 0
      case matched.strip
      when "\\"
        unless exprs
          case peek(1)
          when "'"
            scan(/./)
            start_group :escape, "\\'"
          when "\\"
            scan(/./)
            start_group :escape, "\\\\"
          else
            start_group inner_group, "\\"
          end
        else
          start_group :escape, "\\"
          c = getch
          append c
          case c
          when 'x'
            append scan(/[a-fA-F0-9]{1,2}/)
          when /[0-7]/
            append scan(/[0-7]{0,2}/)
          end
        end
      when delim
        end_region inner_group
        start_group delim_group, matched
        break
      when /^#/
        do_highlight = (option(:expressions) == :highlight)
        start_region :expr if do_highlight
        start_group :expr, matched
        case matched[1]
        when '{'
          depth = 1
          content = ""
          while depth > 0
            p = pos
            c = scan_until(/[\{}]/)
            if c.nil?
              content << scan_until(/\Z/)
              break
            else
              depth += (matched == "{" ? 1 : -1)
              content << pre_match[p..-1]
              content << matched if depth > 0
            end
          end
          if do_highlight
            subtokenize "ruby", content
            start_group :expr, "}"
          else
            append content + "}"
          end
        when '$', '@'
          append scan(/\w+/)
        end
        end_region :expr if do_highlight
      else fail "unexpected match on #{matched}"
      end
    end
  end
end
scan_heredoc(float, type, delim) click to toggle source

Scan a heredoc beginning at the current position.

  • float indicates whether the delimiter may be floated to the right

  • type is nil, a single quote, or a double quote

  • delim is the delimiter to look for

# File lib/shoes/highlighter/lang/ruby.rb, line 307
def scan_heredoc(float, type, delim)
  scan_delimited_region(:constant, :string, "", type != "'",
                        delim, float ? :float : :flush)
end