class AprendizajeMaquina::KmeansClustering

Public Class Methods

new(num_of_cluster_centroids,dataset_matrix) click to toggle source
# File lib/aprendizaje_maquina/clustering.rb, line 3
def initialize(num_of_cluster_centroids,dataset_matrix)
        @num_of_cluster_centroids = num_of_cluster_centroids
        @dataset_matrix = dataset_matrix
        @num_columns = @dataset_matrix.column_count
        @num_rows = @dataset_matrix.row_count
        @cluster_centroids = init_cluster_centroids
end

Public Instance Methods

cluster(num) click to toggle source
# File lib/aprendizaje_maquina/clustering.rb, line 15
def cluster(num)
        get("@cluster_#{num}")
end
fit(iterations) click to toggle source
# File lib/aprendizaje_maquina/clustering.rb, line 11
def fit(iterations)
        clustering(iterations)
end
predict(vector) click to toggle source
# File lib/aprendizaje_maquina/clustering.rb, line 19
def predict(vector)   
        array = []                   
        @cluster_centroids.each do |cluster|
          array << (vector-cluster).r
        end
        cluster = array.index(array.min)
        cluster
end

Private Instance Methods

array_to_vector(array) click to toggle source
# File lib/aprendizaje_maquina/clustering.rb, line 30
def array_to_vector(array)
        vector = Vector.elements(array, copy = true)
        vector
end
clustering(iterations) click to toggle source
# File lib/aprendizaje_maquina/clustering.rb, line 62
def clustering(iterations)
        iterations.times do
                array2 = []
                for i in 0...@num_rows
                        array = []
                        @cluster_centroids.each do |cluster|
                                array << (@dataset_matrix.row(i)-cluster).r
                        end
                        array2 << array
                end

                hash = {}
                for i in 0...@num_rows
                        hash[@dataset_matrix.row(i)] = array2[i].index(array2[i].min)
                end

                @cluster_centroids.each_index do |index|
                        set("@cluster_#{index}", Array.new)
                end

                @cluster_centroids.each_index do |index|
                        hash.each do |key,value|
                                if value == index
                                        get("@cluster_#{index}") << key
                                end
                        end
                end

                @cluster_centroids.each_index do |index|
                        @cluster_centroids[index] = media(get("@cluster_#{index}"))
                end
        end
end
get(instance_variable_name) click to toggle source
# File lib/aprendizaje_maquina/clustering.rb, line 58
def get(instance_variable_name)
        instance_variable_get(instance_variable_name)
end
init_cluster_centroids() click to toggle source
# File lib/aprendizaje_maquina/clustering.rb, line 43
def init_cluster_centroids
        cluster_centroids = Array.new(@num_of_cluster_centroids) { 
                min_max_rand = []
                for i in 0...@num_columns
                        min_max_rand << rand(@dataset_matrix.column(i).min..@dataset_matrix.column(i).max)
                end
                array_to_vector(min_max_rand) 
        }
        cluster_centroids
end
media(array) click to toggle source
# File lib/aprendizaje_maquina/clustering.rb, line 35
def media(array)
        if array.empty?
      array#raise ArgumentError.new("array is empty")
    else
      1.0/array.length * array.inject { |mem, var| mem + var }
      end
end
set(instance_variable_name,instance_variable_value) click to toggle source
# File lib/aprendizaje_maquina/clustering.rb, line 54
def set(instance_variable_name,instance_variable_value)
        instance_variable_set(instance_variable_name,instance_variable_value)
end