module SrcML

Wrapper for the srcML commandline interface, with functions specifically directed at extracting method names and calculating diffs between files.

Constants

LANGUAGES

Hash of supported languages.

The file extension must map to a known format for srcML

SRCML

name of srcml command

Public Class Methods

ast(path,revision: FALSE) click to toggle source

Calculates the AST of the given file

@param [String] path the path to the file @param [String] rev if specified, retrieves the file from the given revision

@return [Nokogiri::XML::Document] an AST representation

# File lib/srcML/srcml.rb, line 89
def self.ast(path,revision: FALSE)
  # get the file content
  ast = ''
  if revision
    # explicitly call bash to get support for process substitution
    if language = LANGUAGES[File.extname(path)]
      ast,e,s = Open3.capture3("bash -c '#{SRCML} --language #{language} <(git show #{revision}:#{path})'")
      if !s.success?
        raise SrcML::ParseError, "Failed to parse #{revision}:#{path} using the #{language} parser"
      end
    else
      raise SrcML::UnsupportedLanguageError, "Language in the file '#{path}' not supported (guessed language from file type)"
    end
  else
    ast,e,s = Open3.capture3("#{SRCML} #{path}")
    if !s.success?
      raise SrcML::ParseError, "Failed to parse #{path}, is srcml installed?"
    end
  end
  # turn into structured xml
  xml = Nokogiri::XML(ast)
  if ignore_comments?
    # remove all comments
    xml.search('comment').each do |c|
      # trailing newline + any number of spaces are removed from the previous node
      # this gives a more intuitive behaviour
      # i.e., the newline + spaces before the comment is considered "part of" the comment
      if previous_node = c.previous_sibling
        previous_node.content = previous_node.content.gsub(/\n(\s)*/,"")
      end
      # now remove the comment
      c.remove
    end
  end
  if ignore_whitespace?
    # remove all new lines
    xml.search("text()").each do |node|
      if node.content =~ /\S/
        node.content = node.content.gsub(/[[:space:]]([[:space:]])*/,"")
      else
        node.remove
      end
    end
  end
  return xml
end
basename_qualify=(bool) click to toggle source
# File lib/srcML/srcml.rb, line 59
def self.basename_qualify= bool
  unless [true, false].include?(bool) then raise ArgumentError, "Must be boolean" end
  @@basename_qualify = bool
end
basename_qualify?() click to toggle source
# File lib/srcML/srcml.rb, line 64
def self.basename_qualify?
  @@basename_qualify
end
changed_methods(old,new) click to toggle source

Calculate the changed methods of the file specified by revision and path

@param [String] old the path to the old file @param [String] new the path to the new file @return [Array<String>] the changed methods

# File lib/srcML/srcml.rb, line 239
def self.changed_methods(old,new)
  methods_old = methods(old)
  methods_new = methods(new)
  return different_entries(methods_old,methods_new)
end
changed_methods_git(path,revision) click to toggle source

Like changed_methods but retrieves the file from a git revision Calculate the changed methods of the file specified by revision and path

@param [String] path the path to the file @param [String] revision the revision to retrieve the file from @return [Array<String>] the changed methods

# File lib/srcML/srcml.rb, line 252
def self.changed_methods_git(path,revision)
  methods_new = methods(path, revision: revision)
  methods_old = methods(path, revision: revision+'~1')
  return different_entries(methods_old,methods_new)
end
different_entries(old,new) click to toggle source

Given two Hashes, returns all the keys that either have different values in the two hashes or are not in both hashes.

@param: [Hash] old @param: [Hash] new @return [Array<String>]

# File lib/srcML/srcml.rb, line 210
def self.different_entries(old,new)
  different = []
  new.each do |k,v|
    # new keys
    if !old.key?(k)
      #    puts "KEY NOT IN OLD: #{k}"
      different << k
      # different values for same key
    elsif v != old[k]
      #    puts "DIFFERENT VALUES SAME KEY\nOLD WAS:\n--\n#{old[k].split(//)}\n--\nNEW WAS:\n--\n#{v.split(//)}\n--"
      different << k
    end
  end
  # keys that are only in old
  deleted_keys = old.keys - new.keys
  if !deleted_keys.empty?
    # puts "KEY NOT IN NEW: #{deleted_keys}"
    different.concat(deleted_keys) 
  end
  return different
end
ignore_comments=(bool) click to toggle source
# File lib/srcML/srcml.rb, line 41
def self.ignore_comments= bool
  unless [true, false].include?(bool) then raise ArgumentError, "Must be boolean" end
  @@ignore_comments = bool
end
ignore_comments?() click to toggle source
# File lib/srcML/srcml.rb, line 46
def self.ignore_comments?
  @@ignore_comments
end
ignore_whitespace=(bool) click to toggle source
# File lib/srcML/srcml.rb, line 50
def self.ignore_whitespace= bool
  unless [true, false].include?(bool) then raise ArgumentError, "Must be boolean" end
  @@ignore_whitespace = bool
end
ignore_whitespace?() click to toggle source
# File lib/srcML/srcml.rb, line 55
def self.ignore_whitespace?
  @@ignore_whitespace
end
methods(path,revision: FALSE) click to toggle source

Returns the methods of the given file

If the method has any parameters, the parameter types are also returned with the method name

@param [String] path the path to the file @param [String] rev if specified, retrieves the file from the given revision @return [Hash[method_name => method_hash]] a hash storing the methods as keys and the hashed method as values

# File lib/srcML/srcml.rb, line 144
def self.methods(path,revision: FALSE)
  ast = ast(path,revision: revision)
  # hash each method and store in hash map with function name as key
  methods = Hash.new
  qualified_file = path
  if basename_qualify?
    qualified_file = File.basename(path)
  end
  # split file based on class declarations
  partitions = ast.search("class")
  if partitions.empty?
    # no classes, just use the full ast
    partitions = [ast]
  end
  partitions.each do |partition|
    # if partitioned into classes, attempt to extract class name
    class_name = ''
    if !partition.document? & name = partition.at_css("/name")
      class_name = name.text
    else
      logger.debug "(#{qualified_file}) Found partitioned file but could not find classname for this partition at location \\name'. Context:\n###\n#{partition}\n###"
    end
    partition.search("function").each do |function|
      if name = function.at_css("/name")
        # attempt to extract parameters
        parameters = []
        if parameter_list = function.at_css("/parameter_list")
          parameter_list.search("parameter").each do |p|
            if parameter = (p.at_css("decl type name name") or p.at_css("decl type name") or p.at_css("decl type") or p.at_css("type") or p.at_css("name"))
              parameters << parameter.text
            else
              logger.debug "(#{qualified_file}) Function: #{name}. Nested structures: (decl type name name) or (decl type name) or (decl type) not found in parameter xml, ignoring this parameter. Context:\n###\n#{p}\n###"
            end
          end
        else
          logger.debug "(#{qualified_file}) Parameter list not found for Function: #{name}. Searched for structure '/parameter_list'. Context:\n###\n#{function}\n###"
        end
        method_name = parameters.empty? ? name.text : name.text+"("+parameters.join(',')+")"
        fully_qualified_name = class_name.empty? ? [qualified_file,method_name].join(':') : [qualified_file,class_name,method_name].join(':')
        if block = function.at_css("block")
          methods[fully_qualified_name] = block.content.hash
        else
          logger.debug "(#{qualified_file}) No <block> (i.e. the function content) in the function xml. Function: #{name}."
        end
      else
        logger.debug "(#{qualified_file}) Could not identify function name at location '\\name'. Context:\n###\n#{p}\n###"
      end
    end
  end
  if residuals?
    # add residuals entry
    # i.e., whats left of the code when all methods are removed
    ast.search("function").remove
    methods[qualified_file+':'+'@residuals'] = ast.content.hash
  end

  return methods
end
residuals=(bool) click to toggle source
# File lib/srcML/srcml.rb, line 68
def self.residuals= bool
  unless [true, false].include?(bool) then raise ArgumentError, "Must be boolean" end
  @@residuals = bool
end
residuals?() click to toggle source
# File lib/srcML/srcml.rb, line 73
def self.residuals?
  @@residuals
end