class Omnitest::Psychic::Code2Doc::CodeSegmenter
This class was extracted from the [Rocco](rtomayko.github.com/rocco/) project which was in turn based on the [Docco](jashkenas.github.com/docco/).
Constants
- DEFAULT_OPTIONS
Public Class Methods
new(options = {})
click to toggle source
# File lib/omnitest/psychic/code2doc/code_segmenter.rb, line 17 def initialize(options = {}) @options = DEFAULT_OPTIONS.merge options @options[:comment_chars] = generate_comment_chars if @options[:comment_chars].is_a? String end
Public Instance Methods
comment(lines)
click to toggle source
# File lib/omnitest/psychic/code2doc/code_segmenter.rb, line 151 def comment(lines) lines.map do | line | "#{@options[:comment_chars][:single]} #{line}" end.join "\n" end
normalize_leading_spaces(sections)
click to toggle source
Normalizes documentation whitespace by checking for leading whitespace, removing it, and then removing the same amount of whitespace from each succeeding line. That is:
def func(): """ Comment 1 Comment 2 """ print "omg!"
should yield a comment block of ‘Comment 1nComment 2` and code of `def func():n print “omg!”`
# File lib/omnitest/psychic/code2doc/code_segmenter.rb, line 138 def normalize_leading_spaces(sections) sections.map do |section| if section.any? && section[0].any? leading_space = section[0][0].match("^\s+") if leading_space section[0] = section[0].map { |line| line.sub(/^#{leading_space.to_s}/, '') } end end section end end
segment(source_code)
click to toggle source
Parse the raw file source_code into a list of two-tuples. Each tuple has the form ‘[docs, code]` where both elements are arrays containing the raw lines parsed from the input file, comment characters stripped.
# File lib/omnitest/psychic/code2doc/code_segmenter.rb, line 27 def segment(source_code) # rubocop:disable all sections, docs, code = [], [], [] lines = source_code.split("\n") # The first line is ignored if it is a shebang line. We also ignore the # PEP 263 encoding information in python sourcefiles, and the similar ruby # 1.9 syntax. lines.shift if lines[0] =~ /^\#\!/ lines.shift if lines[0] =~ /coding[:=]\s*[-\w.]+/ && %w(python rb).include?(@options[:language]) # To detect both block comments and single-line comments, we'll set # up a tiny state machine, and loop through each line of the file. # This requires an `in_comment_block` boolean, and a few regular # expressions for line tests. We'll do the same for fake heredoc parsing. in_comment_block = false in_heredoc = false single_line_comment, block_comment_start, block_comment_mid, block_comment_end = nil, nil, nil, nil unless @options[:comment_chars][:single].nil? single_line_comment = Regexp.new("^\\s*#{Regexp.escape(@options[:comment_chars][:single])}\\s?") end unless @options[:comment_chars][:multi].nil? block_comment_start = Regexp.new("^\\s*#{Regexp.escape(@options[:comment_chars][:multi][:start])}\\s*$") block_comment_end = Regexp.new("^\\s*#{Regexp.escape(@options[:comment_chars][:multi][:end])}\\s*$") block_comment_one_liner = Regexp.new("^\\s*#{Regexp.escape(@options[:comment_chars][:multi][:start])}\\s*(.*?)\\s*#{Regexp.escape(@options[:comment_chars][:multi][:end])}\\s*$") # rubocop:disable Metrics/LineLength block_comment_start_with = Regexp.new("^\\s*#{Regexp.escape(@options[:comment_chars][:multi][:start])}\\s*(.*?)$") block_comment_end_with = Regexp.new("\\s*(.*?)\\s*#{Regexp.escape(@options[:comment_chars][:multi][:end])}\\s*$") if @options[:comment_chars][:multi][:middle] block_comment_mid = Regexp.new("^\\s*#{Regexp.escape(@options[:comment_chars][:multi][:middle])}\\s?") end end unless @options[:comment_chars][:heredoc].nil? heredoc_start = Regexp.new("#{Regexp.escape(@options[:comment_chars][:heredoc])}(\\S+)$") end lines.each do |line| # If we're currently in a comment block, check whether the line matches # the _end_ of a comment block or the _end_ of a comment block with a # comment. if in_comment_block if block_comment_end && line.match(block_comment_end) in_comment_block = false elsif block_comment_end_with && line.match(block_comment_end_with) in_comment_block = false docs << line.match(block_comment_end_with).captures.first .sub(block_comment_mid || '', '') else docs << line.sub(block_comment_mid || '', '') end # If we're currently in a heredoc, we're looking for the end of the # heredoc, and everything it contains is code. elsif in_heredoc if line.match(Regexp.new("^#{Regexp.escape(in_heredoc)}$")) in_heredoc = false end code << line # Otherwise, check whether the line starts a heredoc. If so, note the end # pattern, and the line is code. Otherwise check whether the line matches # the beginning of a block, or a single-line comment all on it's lonesome. # In either case, if there's code, start a new section. else if heredoc_start && line.match(heredoc_start) in_heredoc = Regexp.last_match[1] code << line elsif block_comment_one_liner && line.match(block_comment_one_liner) if code.any? sections << [docs, code] docs, code = [], [] end docs << line.match(block_comment_one_liner).captures.first elsif block_comment_start && line.match(block_comment_start) in_comment_block = true if code.any? sections << [docs, code] docs, code = [], [] end elsif block_comment_start_with && line.match(block_comment_start_with) in_comment_block = true if code.any? sections << [docs, code] docs, code = [], [] end docs << line.match(block_comment_start_with).captures.first elsif single_line_comment && line.match(single_line_comment) if code.any? sections << [docs, code] docs, code = [], [] end docs << line.sub(single_line_comment || '', '') else code << line end end end sections << [docs, code] if docs.any? || code.any? normalize_leading_spaces(sections) end
Private Instance Methods
generate_comment_chars()
click to toggle source
# File lib/omnitest/psychic/code2doc/code_segmenter.rb, line 159 def generate_comment_chars @_commentchar ||= if COMMENT_STYLES[@options[:language]] COMMENT_STYLES[@options[:language]] else { single: @options[:comment_chars], multi: nil, heredoc: nil } end end