class Evoc::Analyze
Attributes
opts[RW]
tx_store[RW]
Public Class Methods
new(opts)
click to toggle source
# File lib/evoc/analyze.rb, line 6 def initialize(opts) self.opts = opts Logging.set_level(self.opts[:logger_level]) if opts[:tx_store].nil? self.tx_store = TxStore.new(path: opts[:transactions],case_id: self.opts[:case_id], granularity: self.opts[:granularity]) else self.tx_store = opts[:tx_store] end end
Public Instance Methods
aggregator_range()
click to toggle source
# File lib/evoc/analyze.rb, line 379 def aggregator_range r = Random.new $stdout.puts 'aggregator,length,range,y' (1..1000).each do |length| list = ([1]*length).map {|i| i - rand(0.00001..1)}.sort.reverse list_inf = ([1]*length).map {|i| rand(0..10)}.sort.reverse aggregator = Evoc::InterestingnessMeasureAggregator.new('m_support',list) aggregator_inf = Evoc::InterestingnessMeasureAggregator.new('m_hyper_coefficient',list_inf) $stdout.puts "CG,#{length},\"0..1\",#{aggregator.cg}" $stdout.puts "DCG,#{length},\"0..1\",#{aggregator.dcg}" $stdout.puts "DCG2,#{length},\"0..1\",#{aggregator.dcg2}" $stdout.puts "CG,#{length},inf,#{aggregator_inf.cg}" $stdout.puts "DCG,#{length},inf,#{aggregator_inf.dcg}" $stdout.puts "DCG2,#{length},inf,#{aggregator_inf.dcg2}" end end
all()
click to toggle source
Perform all of the numerical analyzes
Prints to stdout
# File lib/evoc/analyze.rb, line 20 def all methods = %W(num_commits percent_method_changes_of_all_changes avg_changes_per_file avg_method_changes_per_parsable_file num_unique_changes average_changes_per_commit average_commits_per_specific_change time_span_in_years average_time_between_commits_in_minutes) CSV {|row| row << methods} results = [] methods.each do |m| results << self.method(m).call end CSV {|row| row << results} end
average_changes_per_commit()
click to toggle source
# File lib/evoc/analyze.rb, line 155 def average_changes_per_commit arr = self.tx_store.map(&:size) (arr.inject{ |sum, el| sum + el }.to_f / arr.size).round(2) end
average_commits_per_specific_change()
click to toggle source
# File lib/evoc/analyze.rb, line 115 def average_commits_per_specific_change (self.tx_store.items.values.inject(0) {|sum,txes| sum + txes.size}.to_f/self.tx_store.items.size).round(2) end
average_time_between_commits_in_minutes()
click to toggle source
# File lib/evoc/analyze.rb, line 172 def average_time_between_commits_in_minutes average_time_between_commits(unit: 'minutes').round(2) end
avg_changes_per_file()
click to toggle source
# File lib/evoc/analyze.rb, line 82 def avg_changes_per_file files_changed = 0 total_changes = 0 self.tx_store.each do |tx| named_items = tx.items.map {|i| self.tx_store.int_2_name[i]} named_items.group_by {|i| /^(?<file>[^:]+?)(?::|$)/.match(i)[:file]}.each do |file,changes| files_changed += 1 total_changes += changes.size end end return (total_changes.to_f/files_changed).round(2) end
avg_method_changes_per_parsable_file()
click to toggle source
# File lib/evoc/analyze.rb, line 60 def avg_method_changes_per_parsable_file parsable_files_changed = 0 method_changes = 0 self.tx_store.each do |tx| named_items = tx.items.map {|i| self.tx_store.int_2_name[i]} # group changed items by file name named_items.group_by {|i| /^(?<file>[^:]+?)(?::|$)/.match(i)[:file]}.each do |file,changes| # check if any method changes were found for this file if changes.any? {|c| c =~ /:(?!@residuals)/} parsable_files_changed += 1 # count number of method changes per file group changes.each do |change| if change =~ /:(?!@residuals)/ method_changes += 1 end end end end end return (method_changes.to_f/parsable_files_changed).round(2) end
commit_size()
click to toggle source
Dumps the commit sizes, one commit per line
# File lib/evoc/analyze.rb, line 135 def commit_size if self.opts[:group] $stdout.puts 'commit_size,frequency' self.tx_store.group_by(&:size).sort.each do |size,txes| STDOUT.puts "#{size},#{txes.size}" end else $stdout.puts 'commit_size' self.tx_store.each_with_index do |tx,index| $stderr.print "Dumping commit sizes: #{index+1} of #{self.tx_store.size} \r" $stdout.puts tx.size end end $stderr.puts "DONE " end
commits()
click to toggle source
# File lib/evoc/analyze.rb, line 44 def commits unique_items = Set.new changes_so_far = 0 self.tx_store.each do |tx| data = Hash.new tx.items.each {|item| unique_items << item} changes_so_far = changes_so_far += tx.items.size data['sha'] = tx.id data['index'] = tx.index data['num_changes'] = tx.items.size data['items_touched_so_far'] = unique_items.size data['moving_average'] = (changes_so_far/(tx.index+1)).to_f.round(2) STDOUT.puts data.to_json end end
create_dict()
click to toggle source
# File lib/evoc/analyze.rb, line 397 def create_dict puts "id,name" self.tx_store.int_2_name.each do |id,name| puts "#{id},#{name}" end end
evolution()
click to toggle source
# File lib/evoc/analyze.rb, line 31 def evolution CSV {|row| row << %w(index relevant_index overlap)} self.tx_store.each do |tx| changed_in = self.tx_store.transactions_of_list(tx.items, strict: false, identifier: :index) previous_txes = changed_in.select {|i| i <= tx.index} previous_txes.each do |prev_index| prev_tx = self.tx_store.get_tx(id: prev_index,id_type: :index) overlap = ((prev_tx.items & tx.items).size/tx.size.to_f).round(2) CSV {|row| row << [tx.index,prev_index,overlap]} end end end
file_frequency()
click to toggle source
Prints a CSV formated table of the top N frequent files N is configured in opts
# File lib/evoc/analyze.rb, line 122 def file_frequency # print header CSV {|row| row << %W(file frequency)} frequency = self.tx_store.items.map {|item,txes| [item, txes.size] } frequency.sort_by! {|item,freq| -freq} frequency.take(self.opts[:top]).each do |file,freq| filename = self.tx_store.int_2_name[file] CSV {|row| row << [filename,freq]} end end
measure_ranges()
click to toggle source
# File lib/evoc/analyze.rb, line 222 def measure_ranges #rules = [] #ab_range = ([0.00000001]+(0.0..1).step(0.01).to_a+[0.99999999]).map(&:rationalize) #ab_range.each do |ab| # ab_zero = ((ab == 0) ? 0.0000001 : 0) # a_range = (ab+ab_zero..1).step(0.01).map(&:rationalize) # b_range = (ab+ab_zero..1).step(0.01).map(&:rationalize) # # construct all rules starting from A # a_range.each do |a| # new_b_range = (ab..1-a+ab).step(0.01).map(&:rationalize) # new_b_range.each do |b| # if (a == 0) | (b == 0) # # a or b cant be 0 # next # else # r = Evoc::Rule.new(lhs: [], rhs: []) # r.set_p('p_A',a) # r.set_p('p_B',b) # r.set_p('p_AB',ab) # rules << r # end # end # end # # construct all rules starting from B # b_range.each do |b| # new_a_range = (ab..1-b+ab).step(0.01).map(&:rationalize) # new_a_range.each do |a| # r = Evoc::Rule.new(lhs: [], rhs: []) # r.set_p('p_A',a) # r.set_p('p_B',b) # r.set_p('p_AB',ab) # rules << r # end # end #end ## A and B never change together, but do change r1 = Evoc::Rule.new(lhs: [], rhs: []) r1.set_p('p_A',1.to_r/4) r1.set_p('p_B',1.to_r/4) r1.set_p('p_AB',0) # A and B never change together, and A almost never change r2 = Evoc::Rule.new(lhs: [], rhs: []) r2.set_p('p_A',1.to_r/1000000) r2.set_p('p_B',1.to_r/2) r2.set_p('p_AB',0) # A and B never change together, and B almost never change r3 = Evoc::Rule.new(lhs: [], rhs: []) r3.set_p('p_A',1.to_r/2) r3.set_p('p_B',1.to_r/1000000) r3.set_p('p_AB',0) # A and B never change together, but change half of the time r4 = Evoc::Rule.new(lhs: [], rhs: []) r4.set_p('p_A',1.to_r/2) r4.set_p('p_B',1.to_r/2) r4.set_p('p_AB',0) # A and B never change together, and A and B almost never change r5 = Evoc::Rule.new(lhs: [], rhs: []) r5.set_p('p_A',1.to_r/1000000) r5.set_p('p_B',1.to_r/1000000) r5.set_p('p_AB',0) # A and B always change together r6 = Evoc::Rule.new(lhs: [], rhs: []) r6.set_p('p_A',1.to_r/3) r6.set_p('p_B',1.to_r/3) r6.set_p('p_AB',1.to_r/3) # A and B always change together, but rarely change r7 = Evoc::Rule.new(lhs: [], rhs: []) r7.set_p('p_A',1.to_r/1000000) r7.set_p('p_B',1.to_r/1000000) r7.set_p('p_AB',1.to_r/1000000) # A and B always change together, always r8 = Evoc::Rule.new(lhs: [], rhs: []) r8.set_p('p_A',1) r8.set_p('p_B',1) r8.set_p('p_AB',1) # B always change when A change, but rarely change r9 = Evoc::Rule.new(lhs: [], rhs: []) r9.set_p('p_A',1) r9.set_p('p_B',1.to_r/10000000) r9.set_p('p_AB',1.to_r/10000000) # A always change when B change, but rarely change r10 = Evoc::Rule.new(lhs: [], rhs: []) r10.set_p('p_A',1.to_r/10000000) r10.set_p('p_B',1) r10.set_p('p_AB',1.to_r/10000000) # A and B sometimes change together, B always r11 = Evoc::Rule.new(lhs: [], rhs: []) r11.set_p('p_A',1.to_r/2) r11.set_p('p_B',1) r11.set_p('p_AB',1.to_r/2) # A and B sometimes change together, A always r12 = Evoc::Rule.new(lhs: [], rhs: []) r12.set_p('p_A',1) r12.set_p('p_B',1.to_r/2) r12.set_p('p_AB',1.to_r/2) # A and B always change together, almost always r13 = Evoc::Rule.new(lhs: [], rhs: []) r13.set_p('p_A',0.99999999.rationalize) r13.set_p('p_B',0.99999999.rationalize) r13.set_p('p_AB',0.99999999.rationalize) # A and B always change together, half of the time r14 = Evoc::Rule.new(lhs: [], rhs: []) r14.set_p('p_A',0.5.rationalize) r14.set_p('p_B',0.5.rationalize) r14.set_p('p_AB',0.5.rationalize) # A and B sometimes change together, A always r15 = Evoc::Rule.new(lhs: [], rhs: []) r15.set_p('p_A',0.4.rationalize) r15.set_p('p_B',0.4.rationalize) r15.set_p('p_AB',0.1.rationalize) # A and B sometimes change together, A always r16 = Evoc::Rule.new(lhs: [], rhs: []) r16.set_p('p_A',1.to_r/3) r16.set_p('p_B',1.to_r/2) r16.set_p('p_AB',1.to_r/4) rules = [r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12,r13,r14,r15,r16] measures = (Evoc::Rule.p_measures + Evoc::Rule.measures).sort CSV {|r| r << [nil,nil,nil,nil,'min_configuration',nil,nil,'max_configuration',nil,nil]} CSV {|r| r << ['measure','current_range','min','max','P(A)','P(B)','P(A,B)','P(A)','P(B)','P(A,B)']} # print header measures.each do |m| values = Hash.new rules.each do |r| measure = r.get_measure(m) if value = (measure.is_a?(Evoc::InterestingnessMeasure) ? measure.value : measure) if value != Float::NAN a = r.p_A b = r.p_B ab = r.p_AB key = "#{a},#{b},#{ab}" values[key] = Hash.new values[key][:m] = measure values[key][:v] = value.to_f values[key][:a] = a values[key][:b] = b values[key][:ab] = ab end end end min = values.min_by {|k,v| v[:v]} max = values.max_by {|k,v| v[:v]} current_range = (min[1][:m].is_a?(Evoc::InterestingnessMeasure) ? "[#{Evoc::InterestingnessMeasures.get_min(m)},#{Evoc::InterestingnessMeasures.get_max(m)}]" : "[0,1]") CSV {|r| r << [m,current_range,min[1][:v],max[1][:v],min[1][:a],min[1][:b],min[1][:ab],max[1][:a],max[1][:b],max[1][:ab]]} # print row end end
measure_values()
click to toggle source
# File lib/evoc/analyze.rb, line 199 def measure_values # generate some random rules with min sup 1 samplable = self.tx_store.select {|tx| tx.size > 1} measures = (Evoc::Rule.p_measures + Evoc::Rule.measures).sort CSV {|r| r << measures} # print header self.opts[:number].times do |i| random_tx = samplable.sample lhs = random_tx.items.sample(rand(1..random_tx.size-1)) rhs = (random_tx.items-lhs).sample r = Evoc::Rule.new(lhs: lhs, rhs: rhs, tx_store: self.tx_store) row = CSV::Row.new([],[],false) measures.each do |m| measure = r.get_measure(m) if value = measure.is_a?(Evoc::InterestingnessMeasure) ? measure.value : measure row << value.to_f else row << nil end end CSV {|r| r << row} # print row end end
median_changes_per_commit()
click to toggle source
added but not used as the data is not interesting –LM
# File lib/evoc/analyze.rb, line 161 def median_changes_per_commit arr = self.tx_store.map(&:size) sorted = arr.sort len = sorted.length (sorted[(len - 1) / 2] + sorted[len / 2]) / 2.0 end
num_commits()
click to toggle source
# File lib/evoc/analyze.rb, line 111 def num_commits self.tx_store.size end
num_unique_changes()
click to toggle source
# File lib/evoc/analyze.rb, line 151 def num_unique_changes self.tx_store.map(&:items).flatten.uniq.size end
percent_method_changes_of_all_changes()
click to toggle source
@return [Float] the percentage of changes that are method level
# File lib/evoc/analyze.rb, line 97 def percent_method_changes_of_all_changes total_changes = 0 file_changes = 0 self.tx_store.each do |tx| tx.items.each do |item| total_changes += 1 if self.tx_store.int_2_name[item] =~ /^[^:]+?(\.[^:\/]+?)?(?::@residuals$|$)/ file_changes += 1 end end end return ((1 - file_changes.to_f/total_changes)*100).round(2) end
time_span_in_years()
click to toggle source
# File lib/evoc/analyze.rb, line 168 def time_span_in_years time_span(unit: 'years') end
uniqueness()
click to toggle source
# File lib/evoc/analyze.rb, line 177 def uniqueness result = Hash.new self.tx_store.each do |tx| query_size = tx.size-1 queries = tx.items.combination(query_size).to_a previous_history = self.tx_store.clone_with_subset(0,tx.index) queries.each do |query| hits = previous_history.transactions_of_list(query,true).size if result[hits].nil? result[hits] = 1 else result[hits] += 1 end end end result end
Private Instance Methods
average_time_between_commits(unit: 'hours')
click to toggle source
# File lib/evoc/analyze.rb, line 412 def average_time_between_commits(unit: 'hours') if self.tx_store.size > 1 t1 = self.tx_store.first.date t2 = self.tx_store.last.date total_time = TimeDifference.between(t1,t2).method('in_'+unit).call total_time.to_f/(self.tx_store.size-1) else raise Exception.new, "History only contained 1 or 0 transactions" end end
time_span(unit: 'years')
click to toggle source
# File lib/evoc/analyze.rb, line 406 def time_span(unit: 'years') t1 = self.tx_store.first.date t2 = self.tx_store.last.date TimeDifference.between(t1,t2).method('in_'+unit).call end