class TLearn::K_Means

Attributes

c_list[RW]
data_list[RW]
k[RW]

Public Instance Methods

calc_dist(v, cluster) click to toggle source
# File lib/t_learn/k_means.rb, line 48
def calc_dist(v, cluster)
  dist_sum = 0.0
  v.each_with_index { |v_x, i|
    dist_sum += (cluster.vec[i] - v_x).abs
  } 
  return dist_sum/v.size
end
change_clusters_center?() click to toggle source
# File lib/t_learn/k_means.rb, line 56
def change_clusters_center?()
  @cluster_list.each {|c|
    return true if(c.change_center?) 
  } 
  return false
end
fit(data_list, k) click to toggle source
# File lib/t_learn/k_means.rb, line 18
def fit(data_list, k)
  init(data_list, k)
  history = []
  loop {
    @cluster_list.each{|c| c.reset_v_list()}
    @data_list.each {|d|
      min_dist = 100000
      min_cluster_id = -1
      @cluster_list.each {|c|
        dist = calc_dist(d, c)
        if dist < min_dist 
          min_cluster_id = c.id 
          min_dist = dist
        end
      }
      @cluster_list[min_cluster_id].add_v(d)
    }

    history.push(format_for_log())
    @cluster_list.each{|c| c.calc_center()}
    break if !change_clusters_center?
  }

  return {:result => format_for_log(), :history => history}
end
format_for_log() click to toggle source
# File lib/t_learn/k_means.rb, line 44
def format_for_log()
  result = @cluster_list.map {|c| c.format_hash()}
end
init(data_list, k=2) click to toggle source
# File lib/t_learn/k_means.rb, line 9
def init(data_list, k=2)
  @data_list = data_list
  sliced_data_list = @data_list.each_slice(k).to_a
  @dim = data_list[0].size
  @k = k 
  @cluster_list = @k.times.map {|n| Cluster.new(n, nil,sliced_data_list[n] , @dim)}
end