class AgglomerativeClustering::Set

Attributes

linkage[R]

Public Class Methods

new(linkage) click to toggle source
# File lib/agglomerative_clustering/set.rb, line 6
def initialize(linkage)
  @linkage = linkage
end

Public Instance Methods

cluster(total_clusters) click to toggle source
# File lib/agglomerative_clustering/set.rb, line 26
def cluster total_clusters
  while clusters.size > total_clusters
    merge_clusters(distance_matrix.shortest_distance)
  end
  clusters
end
clusters() click to toggle source
# File lib/agglomerative_clustering/set.rb, line 18
def clusters
  @clusters ||= points.map{ |point| AgglomerativeClustering::Cluster.new(point) }
end
distance_matrix() click to toggle source
# File lib/agglomerative_clustering/set.rb, line 22
def distance_matrix
  @distance_matrix ||= build_distance_matrix
end
find_outliers(percentage_of_clusters, distance) click to toggle source
# File lib/agglomerative_clustering/set.rb, line 37
def find_outliers percentage_of_clusters, distance
  distance_matrix.matrix.each_with_index do |index, row, column|
    count_hash[row] ||= 0
    count_hash[row] += 1 if distance_matrix.matrix[row, column] > distance
    if count_hash[row]/(distance_matrix.matrix.row_count - 1) > percentage_of_clusters/100
      set_outliers << points[row]
    end
  end
  points.reject! { |point| outliers.include?(point) }
  @distance_matrix = build_distance_matrix
  outliers
end
outliers() click to toggle source
# File lib/agglomerative_clustering/set.rb, line 33
def outliers
  set_outliers.uniq
end
points() click to toggle source
# File lib/agglomerative_clustering/set.rb, line 10
def points
  @points ||= []
end
push(point) click to toggle source
# File lib/agglomerative_clustering/set.rb, line 14
def push point
  points << point
end

Private Instance Methods

add_cluster(new_cluster) click to toggle source
# File lib/agglomerative_clustering/set.rb, line 69
def add_cluster new_cluster
  clusters << new_cluster
  update_distance_matrix(clusters.size - 1)
  new_cluster
end
build_distance_matrix() click to toggle source
# File lib/agglomerative_clustering/set.rb, line 88
def build_distance_matrix
  m = Matrix.build(points.size, points.size) do |row, column|
    euclidean_distance(points[row], points[column]).round(2)
  end
  DistanceMatrix.new(m)
end
count_hash() click to toggle source
# File lib/agglomerative_clustering/set.rb, line 84
def count_hash
  @count_hash ||= {}
end
merge_clusters(indexes) click to toggle source
# File lib/agglomerative_clustering/set.rb, line 52
def merge_clusters indexes
  index1, index2 = indexes
  new_cluster = clusters[index1].merge(clusters[index2])
  remove_cluster(index1)
  remove_cluster(index2 - 1)
  add_cluster(new_cluster)
end
remove_cluster(index) click to toggle source
# File lib/agglomerative_clustering/set.rb, line 75
def remove_cluster index
  clusters.delete_at(index)
  distance_matrix.remove_edge(index)
end
set_outliers() click to toggle source
# File lib/agglomerative_clustering/set.rb, line 80
def set_outliers
  @set_outliers ||= []
end
update_distance_matrix(new_cluster) click to toggle source
# File lib/agglomerative_clustering/set.rb, line 60
def update_distance_matrix new_cluster
  distances = []
  clusters.each do |cluster|
    distances << linkage.calculate_distance(clusters[new_cluster], cluster)
  end
  distance_matrix.add_edge(distances)
  distance_matrix
end