class Evoc::SVD
CLASS SVD
public fields:
index: hash of {index -> file}, where file is at "index" in the co_change_matrix and svd matrix
Attributes
co_change_matrix[RW]
s[RW]
u[RW]
v[RW]
Public Class Methods
new(tx_store = nil)
click to toggle source
# File lib/evoc/svd.rb, line 11 def initialize(tx_store = nil) if tx_store.is_a?(Evoc::TxStore) @index2item,@item2index,@co_change_matrix = txstore_2_co_change_matrix(tx_store) @u,@s,@v = svd(@co_change_matrix) end end
Public Instance Methods
clusters(query,threshold = 0)
click to toggle source
Find the clusters in the current svd given a change-vector/query
threshold: the minimum value of an element in the U matrix,
to be considered as part of an cluster
# File lib/evoc/svd.rb, line 43 def clusters(query,threshold = 0) clusters = Hash.new perfect_match = [] query_indexes = query.map {|q_item| item2index(q_item)}.compact #remove nil values col_index = 0 self.u.each_column do |col| #initiate cluster clusters[col_index] = {pos: {query_match: [], clustered: []}, neg: {query_match: [], clustered: []}} # get the column of the item col.each_with_index do |row_item,row_index| # check that the row item is part of cluster if row_item.abs > threshold sign = row_item > 0 ? :pos : :neg # check if its another item from the query if query_indexes.include? row_index clusters[col_index][sign][:query_match] << index2item(row_index) # check if all items in the cluster was in the query (perfect match) if clusters[col_index][sign][:query_match].size == query.size perfect_match << [col_index,sign] end else clusters[col_index][sign][:clustered] << [index2item(row_index),row_item] end end end col_index += 1 end [perfect_match,clusters] end
index2item(index)
click to toggle source
# File lib/evoc/svd.rb, line 26 def index2item index @index2item[index] end
indexes()
click to toggle source
# File lib/evoc/svd.rb, line 18 def indexes @index2item.keys end
item2index(item)
click to toggle source
# File lib/evoc/svd.rb, line 30 def item2index item @item2index[item] end
items()
click to toggle source
# File lib/evoc/svd.rb, line 22 def items @item2index.keys end
svd(co_change_matrix)
click to toggle source
# File lib/evoc/svd.rb, line 34 def svd(co_change_matrix) u,s,v = co_change_matrix.gesvd end
txstore_2_co_change_matrix(tx_store)
click to toggle source
Returns the co-change matrix of currently loaded files each column/row combination specifies how many times the two files changed together
f1 f2 f3
f1 2 1 1 f2 1 3 1 f3 1 1 1
# File lib/evoc/svd.rb, line 84 def txstore_2_co_change_matrix(tx_store) co_change_hash = Hash.new tx_store.each do |tx| tx.items.each do |file_id| if co_change_hash[file_id] == nil co_change_hash[file_id] = {:co_changed => Hash.new} end tx.items.each do |co_changed_file_id| if co_change_hash[file_id][:co_changed][co_changed_file_id] == nil co_change_hash[file_id][:co_changed][co_changed_file_id] = 1 else co_change_hash[file_id][:co_changed][co_changed_file_id] += 1 end end end end # add indexes co_change_hash.each_with_index do |(key,value),index| co_change_hash[key][:index] = index end # Generate the 2 wise dependency weight array # n = co_change_hash.size co_change_matrix = NMatrix.new(n,0,dtype: :float64) co_change_hash.each_with_index do |(key,value),index| this_file = index value[:co_changed].each do |(co_changed_file,sum_co_changes)| co_index = (co_change_hash[co_changed_file][:index]) co_change_matrix[co_index,this_file] = sum_co_changes end end index2item = Hash.new item2index = Hash.new co_change_hash.each do |k,v| index2item[v[:index]] = k item2index[k] = v[:index] end [index2item,item2index,co_change_matrix] end