class Vcs2Json::Git
Constants
- FIELD_SEP
Generate separators between fields and commits
- META_DATA
Attributes
case_id[RW]
fine_grained[RW]
ignore[R]
number[RW]
Public Class Methods
new(opts)
click to toggle source
# File lib/vcs2json/git.rb, line 21 def initialize(opts) # case id must be set before setting ignore # as the id is used to lookup the list of # files to ignore in the ignorefile self.case_id = opts[:case_id] self.ignore = opts[:ignore] self.before = opts[:before] self.after = opts[:after] self.number = opts[:number] self.fine_grained = opts[:fine_grained] # Set logger level Logging.set_location(opts[:logger_location]) Logging.set_level(opts[:logger_level]) SrcML.ignore_comments = opts[:ignore_comments] SrcML.ignore_whitespace = opts[:ignore_whitespace] SrcML.residuals = opts[:residuals] # Check that SrcML is available if fine grained is turned on if self.fine_grained begin Open3.capture3("srcml --version") rescue Errno::ENOENT $stderr.puts "SrcML is required for fine grained change history extraction, please install from www.srcml.com" $stderr.puts "Defaulting to file level" self.fine_grained = false end end end
Public Instance Methods
after()
click to toggle source
# File lib/vcs2json/git.rb, line 63 def after @after.nil? ? '' : "--after=\"#{@after}\"" end
after=(after)
click to toggle source
# File lib/vcs2json/git.rb, line 51 def after=(after) if !after.nil? begin Date.parse(after) @after = after rescue STDERR.puts "Invalid date --after=#{after}. Ignoring option." @after = nil end end end
before()
click to toggle source
# File lib/vcs2json/git.rb, line 79 def before @before.nil? ? '' : "--before=\"#{@before}\"" end
before=(before)
click to toggle source
# File lib/vcs2json/git.rb, line 67 def before=(before) if !before.nil? begin Date.parse(before) @before = before rescue STDERR.puts "Invalid date --before=#{before}. Ignoring option." @before = nil end end end
ignore=(path)
click to toggle source
# File lib/vcs2json/git.rb, line 232 def ignore= path default_locations = ["#{Dir.pwd}/.evocignore","~/.evocignore"] paths = (path.nil? ? default_locations : [path] + default_locations) file = nil ignore = [] paths.each do |p| if File.exist?(p) file = File.open(p) STDERR.puts "Loading files to ignore from #{file.path}" # return first match break end end if file.nil? STDERR.puts ".evocignore not found. Tried #{paths}. All files will be used." else if self.case_id.nil? STDERR.puts "Id in .evocignore not specified, not ignoring any files." else ignore_file = YAML.load(file) if ignore_file.key?(self.case_id) ignore = ignore_file[self.case_id] if !ignore.nil? STDERR.puts "Ignoring #{ignore.size} files" end else STDERR.puts "The id: '#{self.case_id}' not found in #{file.path}" end end end @ignore = (ignore.nil? ? [] : ignore) return @ignore end
parse()
click to toggle source
# File lib/vcs2json/git.rb, line 83 def parse # keeps track of number of commits successfully parsed commit_counter = 0 # keeps track of empty commits empty_commits = [] ########################## # GET LIST OF COMMIT IDS # ########################## # getting the list of revision ids is cheap, so we get some extra in case we are unable to parse the required amount in the first 'n' commits commit_ids = `git rev-list HEAD #{self.before} #{self.after} -n #{self.number*10} --no-merges`.split ############################ # ITERATE OVER EACH COMMIT # ############################ commit_ids.each do |id| logger.debug "Parsing commit: #{id}" # get the changed files changed_files = `git log --pretty=format:'' --name-status #{id} -n 1`.split("\n") .map {|line| line.split(/(^[AMD])\s+/).delete_if {|e| e.empty?}} # remove ignored files changed_files.reject! {|file| if self.ignore.include?(file[1]) logger.debug "[IGNOREDEBUG] Ignored #{file[1]} in commit #{id}" true else false end } # add files changed info if !changed_files.empty? ################## # FETCH METADATA # ################## raw_commit = `git log --pretty=format:'#{META_DATA}' #{id} -n 1` commit = '' ################## # CLEAN RAW DATA # ################## begin # try encoding to utf8 commit = raw_commit.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '') # need to expliceitely check if the encoding is valid for ruby <= 2.0 # utf8 -> utf8 will not do anything even with invalid bytes # http://stackoverflow.com/questions/24036821/ruby-2-0-0-stringmatch-argumenterror-invalid-byte-sequence-in-utf-8 if !commit.valid_encoding? # encode to utf16 first and then back to utf8 commit.encode!("UTF-16be", invalid: :replace, undef: :replace, :replace=>'') commit.encode!('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '') end rescue ArgumentError raise EncodingError.new, "Unable to encode input as UTF-8" end ############################## # CONSTRUCT OUTPUT HASH/JSON # ############################## output_hash = Hash.new fields = commit.split(FIELD_SEP) sha = fields[0].delete("\n") #remove astray newlines output_hash[:sha] = sha output_hash[:name] = fields[1] output_hash[:email] = fields[2] output_hash[:date] = Time.parse fields[3] output_hash[:committer_name] = fields[4] output_hash[:committer_email]= fields[5] output_hash[:committer_date] = Time.parse fields[6] output_hash[:message] = fields[7] output_hash[:changes] = [] ####################################### # PARSE FILES FOR FINEGRAINED CHANGES # ####################################### # print progress changed_files.each_with_index do |(status,file_name),index| STDERR.print "Parsing file #{index+1} Of #{changed_files.size} in commit #{commit_counter+1} of #{self.number} \r" if ([status,file_name].empty? || status.nil? || file_name.nil? || status.empty? || file_name.empty?) # ignoring commit else # add finer grained change info if self.fine_grained begin # new file, all methods are new, no need to calculate diff if status == 'A' SrcML.methods(file_name,revision: id).keys.each {|m| output_hash[:changes] << m} # calculate diffs else SrcML.changed_methods_git(file_name,id).each {|m| output_hash[:changes] << m} end rescue SrcML::UnsupportedLanguageError, SrcML::ParseError output_hash[:changes] << file_name end else output_hash[:changes] << file_name end end end # changes_files.each # Only add commits where at least one change was detected if !output_hash[:changes].empty? ########################### # PRINT COMMIT TO $stdout # ########################### $stdout.puts output_hash.to_json # increase counter for number of commits successfully parsed commit_counter += 1 ######################################## # CHECK IF REQUESTED AMOUNT IS REACHED # ######################################## if commit_counter == self.number break # out of loop end else # no changes detected in commit empty_commits << id end else # no files in commit empty_commits << id end end # we may still lack commits after exhaustive search, notify user if commit_counter < self.number STDERR.puts "Asked for #{self.number} commits, only found #{commit_counter} non-empty commits in the last #{self.number*2} commits" end # print ids of empty commits to stderr if !empty_commits.empty? STDERR.puts "EMPTY COMMITS" STDERR.puts empty_commits end end