class Cluda::Kmeans

Constants

DEFAULT_OPTS

Public Class Methods

classify(list, opts = {}) click to toggle source

Classify the points using KMeans as the clustering algorithm

Example:

>> points = [
{ x: 1, y: 1},
{ x: 2, y: 1},
{ x: 1, y: 2},
{ x: 2, y: 2},
{ x: 4, y: 6},
{ x: 5, y: 7},
{ x: 5, y: 6},
{ x: 5, y: 5},
{ x: 6, y: 6},
{ x: 6, y: 5}
]
>> Cluda::Kmeans.classify( points, k: 1, distance_method: 'euclidean', be_smart: true, max_iterations: 50)

Arguments:

list:                       (Array [Hash] )
k:                          (Numeric) *optional*
centroids:                  (Array) *optional*
distance_method:            (String) *optional*
[If you want CluDA to be smart you have to specify the centroids ]
be_smart:                   (Boolean) *optional*
margin_distance_percentage: (Numeric) *optional* [Between 0 and 1]
max_iterations:             (Numeric) *optional*
# File lib/cluda/kmeans.rb, line 42
def classify(list, opts = {})
  Cluda.validate(list)
  Cluda.validate_centroids(opts[:centroids]) if opts[:be_smart]

  config = generate_config(list, opts)

  raise Cluda::InvalidDistanceMethod unless Cluda.valid_class?(config[:distance_method])

  calculate_clusters(list, **config)
end
generate_config(list, opts) click to toggle source
# File lib/cluda/kmeans.rb, line 53
def generate_config(list, opts)
  config = DEFAULT_OPTS.merge(opts)

  centroids_present?(config) ? process_centroids(config) : initialize_centroids(list, config)

  config[:margin] = config[:be_smart] ? config[:median_centroid] * config[:margin_distance_percentage] : 0

  config
end
initialize_centroids(list, config) click to toggle source
# File lib/cluda/kmeans.rb, line 75
def initialize_centroids(list, config)
  return config if list.empty? || config[:k] > list.size

  config[:centroids] = list.shuffle(random: Random.new(rand(0...config[:k])))[0...config[:k]]

  config
end
nearest_centroid(point, centroids, klass = Cluda::Euclidean) click to toggle source
# File lib/cluda/kmeans.rb, line 83
def nearest_centroid(point, centroids, klass = Cluda::Euclidean)
  return nil if centroids.empty?

  Cluda.validate(point)

  nearest_centroid = centroids[0]
  min_distance = klass.distance(point, nearest_centroid)

  centroids.each do |centroid|
    new_distance = klass.distance(point, centroid)
    if new_distance < min_distance
      min_distance = new_distance
      nearest_centroid = centroid
    end
  end

  [nearest_centroid, min_distance]
end
process_centroids(config) click to toggle source
# File lib/cluda/kmeans.rb, line 63
def process_centroids(config)
  config[:centroids].each do |point|
    if config[:median_centroid].nil? || config[:median_centroid] < point[:median]
      config[:median_centroid] = point[:median]
    end

    point.delete_if { |k, _| !%i[x y].include? k }
  end

  config
end

Private Class Methods

assign_points_to_centroids(list, centroids, klass, config) click to toggle source
# File lib/cluda/kmeans.rb, line 151
def assign_points_to_centroids(list, centroids, klass, config)
  list.each_with_object({}) do |point, cluster|
    centroid, distance = nearest_centroid(point, centroids, klass)

    if config[:be_smart] && distance > (config[:median_centroid] + config[:margin])
      config[:median_centroid] = distance
      centroids << point
      create_centroid(point, cluster)
      centroid = point
      distance = 0
    end

    cluster[centroid] ||= []
    cluster[centroid] << point.merge(distance: distance)
  end
end
calculate_clusters(list, centroids:, distance_method:, **config) click to toggle source
# File lib/cluda/kmeans.rb, line 104
def calculate_clusters(list, centroids:, distance_method:, **config)
  cluster = {}

  previous_centroids = nil
  klass = Cluda.const_get(distance_method.downcase.capitalize)

  config[:max_iterations].times do
    break if previous_centroids == centroids

    cluster = assign_points_to_centroids(list, centroids, klass, config)

    previous_centroids = centroids
    centroids = move_centroids(cluster)
  end

  cluster
end
centroids_present?(opts) click to toggle source
# File lib/cluda/kmeans.rb, line 122
def centroids_present?(opts)
  !(opts[:centroids].nil? || opts[:centroids].empty?)
end
create_centroid(centroid, output) click to toggle source
# File lib/cluda/kmeans.rb, line 132
def create_centroid(centroid, output)
  output[centroid] = []
end
get_key_values(points, key) click to toggle source
# File lib/cluda/kmeans.rb, line 136
def get_key_values(points, key)
  points.map { |point| point[key] }
end
init_cluster(centroids) click to toggle source
# File lib/cluda/kmeans.rb, line 126
def init_cluster(centroids)
  centroids.each_with_object({}) do |centroid, memo|
    memo[centroid] = []
  end
end
move_centroids(output) click to toggle source
# File lib/cluda/kmeans.rb, line 140
def move_centroids(output)
  output.map do |(_key, value)|
    next if value.empty?

    x = Cluda.median(get_key_values(value, :x))
    y = Cluda.median(get_key_values(value, :y))

    { x: x, y: y }
  end.compact
end