class Evoc::SVD

CLASS SVD

public fields:

index: 
   hash of {index -> file}, where file is at "index" in the co_change_matrix and svd matrix

Attributes

co_change_matrix[RW]
s[RW]
u[RW]
v[RW]

Public Class Methods

new(tx_store = nil) click to toggle source
# File lib/evoc/svd.rb, line 11
def initialize(tx_store = nil)
  if tx_store.is_a?(Evoc::TxStore)
    @index2item,@item2index,@co_change_matrix = txstore_2_co_change_matrix(tx_store)
    @u,@s,@v = svd(@co_change_matrix)
  end
end

Public Instance Methods

clusters(query,threshold = 0) click to toggle source

Find the clusters in the current svd given a change-vector/query

threshold: the minimum value of an element in the U matrix,

to be considered as part of an cluster
# File lib/evoc/svd.rb, line 43
def clusters(query,threshold = 0)
  clusters = Hash.new
  perfect_match = []
  query_indexes = query.map {|q_item| item2index(q_item)}.compact #remove nil values
  col_index = 0
  self.u.each_column do |col|
    #initiate cluster
    clusters[col_index] = {pos: {query_match: [], clustered: []},
                            neg: {query_match: [], clustered: []}}
    # get the column of the item
    col.each_with_index do |row_item,row_index|
      # check that the row item is part of cluster
      if row_item.abs > threshold 
        sign = row_item > 0 ? :pos : :neg
        # check if its another item from the query
        if query_indexes.include? row_index
          clusters[col_index][sign][:query_match] << index2item(row_index)
          # check if all items in the cluster was in the query (perfect match)
          if clusters[col_index][sign][:query_match].size == query.size
            perfect_match << [col_index,sign]
          end
        else
          clusters[col_index][sign][:clustered] << [index2item(row_index),row_item]
        end
      end
    end
    col_index += 1
  end
  [perfect_match,clusters]
end
index2item(index) click to toggle source
# File lib/evoc/svd.rb, line 26
def index2item index
  @index2item[index]
end
indexes() click to toggle source
# File lib/evoc/svd.rb, line 18
def indexes
  @index2item.keys
end
item2index(item) click to toggle source
# File lib/evoc/svd.rb, line 30
def item2index item
  @item2index[item]
end
items() click to toggle source
# File lib/evoc/svd.rb, line 22
def items
  @item2index.keys
end
svd(co_change_matrix) click to toggle source
# File lib/evoc/svd.rb, line 34
def svd(co_change_matrix)
  u,s,v = co_change_matrix.gesvd
end
txstore_2_co_change_matrix(tx_store) click to toggle source

Returns the co-change matrix of currently loaded files each column/row combination specifies how many times the two files changed together

f1 f2 f3

f1 2 1 1 f2 1 3 1 f3 1 1 1

# File lib/evoc/svd.rb, line 84
def txstore_2_co_change_matrix(tx_store)
  co_change_hash = Hash.new
  tx_store.each do |tx|
    tx.items.each do |file_id| 
      if co_change_hash[file_id] == nil
        co_change_hash[file_id] = {:co_changed => Hash.new}
      end
      tx.items.each do |co_changed_file_id|
        if co_change_hash[file_id][:co_changed][co_changed_file_id] == nil
          co_change_hash[file_id][:co_changed][co_changed_file_id] = 1
        else
          co_change_hash[file_id][:co_changed][co_changed_file_id] += 1
        end
      end
    end             
  end
  # add indexes
  co_change_hash.each_with_index do |(key,value),index|
    co_change_hash[key][:index] = index
  end
  # Generate the 2 wise dependency weight array
  #
  n = co_change_hash.size
  co_change_matrix = NMatrix.new(n,0,dtype: :float64)
  co_change_hash.each_with_index do |(key,value),index|
    this_file = index
    value[:co_changed].each do |(co_changed_file,sum_co_changes)|
      co_index = (co_change_hash[co_changed_file][:index])
      co_change_matrix[co_index,this_file] = sum_co_changes
    end
  end
  index2item = Hash.new
  item2index = Hash.new
  co_change_hash.each do |k,v|
    index2item[v[:index]] = k
    item2index[k] = v[:index]
  end
  [index2item,item2index,co_change_matrix]
end