class TLearn::K_Means
Attributes
c_list[RW]
data_list[RW]
k[RW]
Public Instance Methods
calc_dist(v, cluster)
click to toggle source
# File lib/t_learn/k_means.rb, line 48 def calc_dist(v, cluster) dist_sum = 0.0 v.each_with_index { |v_x, i| dist_sum += (cluster.vec[i] - v_x).abs } return dist_sum/v.size end
change_clusters_center?()
click to toggle source
# File lib/t_learn/k_means.rb, line 56 def change_clusters_center?() @cluster_list.each {|c| return true if(c.change_center?) } return false end
fit(data_list, k)
click to toggle source
# File lib/t_learn/k_means.rb, line 18 def fit(data_list, k) init(data_list, k) history = [] loop { @cluster_list.each{|c| c.reset_v_list()} @data_list.each {|d| min_dist = 100000 min_cluster_id = -1 @cluster_list.each {|c| dist = calc_dist(d, c) if dist < min_dist min_cluster_id = c.id min_dist = dist end } @cluster_list[min_cluster_id].add_v(d) } history.push(format_for_log()) @cluster_list.each{|c| c.calc_center()} break if !change_clusters_center? } return {:result => format_for_log(), :history => history} end
format_for_log()
click to toggle source
# File lib/t_learn/k_means.rb, line 44 def format_for_log() result = @cluster_list.map {|c| c.format_hash()} end
init(data_list, k=2)
click to toggle source
# File lib/t_learn/k_means.rb, line 9 def init(data_list, k=2) @data_list = data_list sliced_data_list = @data_list.each_slice(k).to_a @dim = data_list[0].size @k = k @cluster_list = @k.times.map {|n| Cluster.new(n, nil,sliced_data_list[n] , @dim)} end