class AgglomerativeClustering::Set
Attributes
linkage[R]
Public Class Methods
new(linkage)
click to toggle source
# File lib/agglomerative_clustering/set.rb, line 6 def initialize(linkage) @linkage = linkage end
Public Instance Methods
cluster(total_clusters)
click to toggle source
# File lib/agglomerative_clustering/set.rb, line 26 def cluster total_clusters while clusters.size > total_clusters merge_clusters(distance_matrix.shortest_distance) end clusters end
clusters()
click to toggle source
# File lib/agglomerative_clustering/set.rb, line 18 def clusters @clusters ||= points.map{ |point| AgglomerativeClustering::Cluster.new(point) } end
distance_matrix()
click to toggle source
# File lib/agglomerative_clustering/set.rb, line 22 def distance_matrix @distance_matrix ||= build_distance_matrix end
find_outliers(percentage_of_clusters, distance)
click to toggle source
# File lib/agglomerative_clustering/set.rb, line 37 def find_outliers percentage_of_clusters, distance distance_matrix.matrix.each_with_index do |index, row, column| count_hash[row] ||= 0 count_hash[row] += 1 if distance_matrix.matrix[row, column] > distance if count_hash[row]/(distance_matrix.matrix.row_count - 1) > percentage_of_clusters/100 set_outliers << points[row] end end points.reject! { |point| outliers.include?(point) } @distance_matrix = build_distance_matrix outliers end
outliers()
click to toggle source
# File lib/agglomerative_clustering/set.rb, line 33 def outliers set_outliers.uniq end
points()
click to toggle source
# File lib/agglomerative_clustering/set.rb, line 10 def points @points ||= [] end
push(point)
click to toggle source
# File lib/agglomerative_clustering/set.rb, line 14 def push point points << point end
Private Instance Methods
add_cluster(new_cluster)
click to toggle source
# File lib/agglomerative_clustering/set.rb, line 69 def add_cluster new_cluster clusters << new_cluster update_distance_matrix(clusters.size - 1) new_cluster end
build_distance_matrix()
click to toggle source
# File lib/agglomerative_clustering/set.rb, line 88 def build_distance_matrix m = Matrix.build(points.size, points.size) do |row, column| euclidean_distance(points[row], points[column]).round(2) end DistanceMatrix.new(m) end
count_hash()
click to toggle source
# File lib/agglomerative_clustering/set.rb, line 84 def count_hash @count_hash ||= {} end
merge_clusters(indexes)
click to toggle source
# File lib/agglomerative_clustering/set.rb, line 52 def merge_clusters indexes index1, index2 = indexes new_cluster = clusters[index1].merge(clusters[index2]) remove_cluster(index1) remove_cluster(index2 - 1) add_cluster(new_cluster) end
remove_cluster(index)
click to toggle source
# File lib/agglomerative_clustering/set.rb, line 75 def remove_cluster index clusters.delete_at(index) distance_matrix.remove_edge(index) end
set_outliers()
click to toggle source
# File lib/agglomerative_clustering/set.rb, line 80 def set_outliers @set_outliers ||= [] end
update_distance_matrix(new_cluster)
click to toggle source
# File lib/agglomerative_clustering/set.rb, line 60 def update_distance_matrix new_cluster distances = [] clusters.each do |cluster| distances << linkage.calculate_distance(clusters[new_cluster], cluster) end distance_matrix.add_edge(distances) distance_matrix end