class Shoes::Highlighter::Syntax::Ruby
A tokenizer for the Ruby
language. It recognizes all common syntax (and some less common syntax) but because it is not a true lexer, it will make mistakes on some ambiguous cases.
Constants
- KEYWORDS
The list of all identifiers recognized as keywords.
Public Instance Methods
Perform ruby-specific setup
# File lib/shoes/highlighter/lang/ruby.rb, line 16 def setup @selector = false @allow_operator = false @heredocs = [] end
Step through a single iteration of the tokenization process.
# File lib/shoes/highlighter/lang/ruby.rb, line 23 def step case when bol? && check(/=begin/) start_group(:comment, scan_until(/^=end#{EOL}/)) when bol? && check(/__END__#{EOL}/) start_group(:comment, scan_until(/\Z/)) else case when check(/def\s+/) start_group :keyword, scan(/def\s+/) start_group :method, scan_until(/(?=[;(\s]|#{EOL})/) when check(/class\s+/) start_group :keyword, scan(/class\s+/) start_group :class, scan_until(/(?=[;\s<]|#{EOL})/) when check(/module\s+/) start_group :keyword, scan(/module\s+/) start_group :module, scan_until(/(?=[;\s]|#{EOL})/) when check(/::/) start_group :punct, scan(/::/) when check(/:"/) start_group :symbol, scan(/:/) scan_delimited_region :symbol, :symbol, "", true @allow_operator = true when check(/:'/) start_group :symbol, scan(/:/) scan_delimited_region :symbol, :symbol, "", false @allow_operator = true when scan(/:[_a-zA-Z@$][$@\w]*[=!?]?/) start_group :symbol, matched @allow_operator = true when scan(/\?(\\[^\n\r]|[^\\\n\r\s])/) start_group :char, matched @allow_operator = true when check(/(__FILE__|__LINE__|true|false|nil|self)[?!]?/) if @selector || matched[-1] == '?' || matched[-1] == '!' start_group :ident, scan(/(__FILE__|__LINE__|true|false|nil|self)[?!]?/) else start_group :constant, scan(/(__FILE__|__LINE__|true|false|nil|self)/) end @selector = false @allow_operator = true when scan(/0([bB][01]+|[oO][0-7]+|[dD][0-9]+|[xX][0-9a-fA-F]+)/) start_group :number, matched @allow_operator = true else case peek(2) when "%r" scan_delimited_region :punct, :regex, scan(/../), true @allow_operator = true when "%w", "%q" scan_delimited_region :punct, :string, scan(/../), false @allow_operator = true when "%s" scan_delimited_region :punct, :symbol, scan(/../), false @allow_operator = true when "%W", "%Q", "%x" scan_delimited_region :punct, :string, scan(/../), true @allow_operator = true when /%[^\sa-zA-Z0-9]/ scan_delimited_region :punct, :string, scan(/./), true @allow_operator = true when "<<" saw_word = (chunk[-1, 1] =~ /[\w!?]/) start_group :punct, scan(/<</) if saw_word @allow_operator = false return end float_right = scan(/-/) append "-" if float_right if (type = scan(/['"]/)) append type delim = scan_until(/(?=#{type})/) if delim.nil? append scan_until(/\Z/) return end else delim = scan(/\w+/) or return end start_group :constant, delim start_group :punct, scan(/#{type}/) if type @heredocs << [float_right, type, delim] @allow_operator = true else case peek(1) when /[\n\r]/ unless @heredocs.empty? scan_heredoc(*@heredocs.shift) else start_group :normal, scan(/\s+/) end @allow_operator = false when /\s/ start_group :normal, scan(/\s+/) when "#" start_group :comment, scan(/#[^\n\r]*/) when /[A-Z]/ start_group @selector ? :ident : :constant, scan(/\w+/) @allow_operator = true when /[a-z_]/ word = scan(/\w+[?!]?/) if !@selector && KEYWORDS.include?(word) start_group :keyword, word @allow_operator = false elsif start_group :ident, word @allow_operator = true end @selector = false when /\d/ start_group :number, scan(/[\d_]+(\.[\d_]+)?([eE][\d_]+)?/) @allow_operator = true when '"' scan_delimited_region :punct, :string, "", true @allow_operator = true when '/' if @allow_operator start_group :punct, scan(%r{/}) @allow_operator = false else scan_delimited_region :punct, :regex, "", true @allow_operator = true end when "'" scan_delimited_region :punct, :string, "", false @allow_operator = true when "." dots = scan(/\.{1,3}/) start_group :punct, dots @selector = (dots.length == 1) when /[@]/ start_group :attribute, scan(/@{1,2}\w*/) @allow_operator = true when /[$]/ start_group :global, scan(/\$/) start_group :global, scan(/\w+|./) if check(/./) @allow_operator = true when /[-!?*\/+=<>(\[\{}:;,&|%]/ start_group :punct, scan(/./) @allow_operator = false when /[)\]]/ start_group :punct, scan(/./) @allow_operator = true else # all else just falls through this, to prevent # infinite loops... append getch end end end end end
Private Instance Methods
Scan a delimited region of text. This handles the simple cases (strings delimited with quotes) as well as the more complex cases of %-strings and here-documents.
-
delim_group
is the group to use to classify the delimiters of the region -
inner_group
is the group to use to classify the contents of the region -
starter
is the text to use as the starting delimiter -
exprs
is a boolean flag indicating whether the region is an interpolated string or not -
delim
is the text to use as the delimiter of the region. Ifnil
, the next character will be treated as the delimiter. -
heredoc
is eitherfalse
, meaning the region is not a heredoc, or:flush
(meaning the delimiter must be flushed left), or:float
(meaning the delimiter doens’t have to be flush left).
# File lib/shoes/highlighter/lang/ruby.rb, line 199 def scan_delimited_region(delim_group, inner_group, starter, exprs, delim = nil, heredoc = false) # begin unless delim start_group delim_group, starter delim = scan(/./) append delim delim = case delim when '{' then '}' when '(' then ')' when '[' then ']' when '<' then '>' else delim end end start_region inner_group items = "\\\\|" if heredoc items << "(^" items << '\s*' if heredoc == :float items << "#{Regexp.escape(delim)}\s*?)#{EOL}" else items << "#{Regexp.escape(delim)}" end items << "|#(\\$|@@?|\\{)" if exprs items = Regexp.new(items) loop do p = pos match = scan_until(items) if match.nil? start_group inner_group, scan_until(/\Z/) break else text = pre_match[p..-1] start_group inner_group, text if text.length > 0 case matched.strip when "\\" unless exprs case peek(1) when "'" scan(/./) start_group :escape, "\\'" when "\\" scan(/./) start_group :escape, "\\\\" else start_group inner_group, "\\" end else start_group :escape, "\\" c = getch append c case c when 'x' append scan(/[a-fA-F0-9]{1,2}/) when /[0-7]/ append scan(/[0-7]{0,2}/) end end when delim end_region inner_group start_group delim_group, matched break when /^#/ do_highlight = (option(:expressions) == :highlight) start_region :expr if do_highlight start_group :expr, matched case matched[1] when '{' depth = 1 content = "" while depth > 0 p = pos c = scan_until(/[\{}]/) if c.nil? content << scan_until(/\Z/) break else depth += (matched == "{" ? 1 : -1) content << pre_match[p..-1] content << matched if depth > 0 end end if do_highlight subtokenize "ruby", content start_group :expr, "}" else append content + "}" end when '$', '@' append scan(/\w+/) end end_region :expr if do_highlight else fail "unexpected match on #{matched}" end end end end
Scan a heredoc beginning at the current position.
-
float
indicates whether the delimiter may be floated to the right -
type
isnil
, a single quote, or a double quote -
delim
is the delimiter to look for
# File lib/shoes/highlighter/lang/ruby.rb, line 307 def scan_heredoc(float, type, delim) scan_delimited_region(:constant, :string, "", type != "'", delim, float ? :float : :flush) end