class Spark::Mllib::GaussianMixtureModel

GaussianMixtureModel

A clustering model derived from the Gaussian Mixture Model method.

Examples:

Spark::Mllib.import

data = [
  DenseVector.new([-0.1, -0.05]),
  DenseVector.new([-0.01, -0.1]),
  DenseVector.new([0.9, 0.8]),
  DenseVector.new([0.75, 0.935]),
  DenseVector.new([-0.83, -0.68]),
  DenseVector.new([-0.91, -0.76])
]

model = GaussianMixture.train($sc.parallelize(data), 3, convergence_tol: 0.0001, max_iterations: 50, seed: 10)

labels = model.predict($sc.parallelize(data)).collect

Attributes

gaussians[R]
k[R]
weights[R]

Public Class Methods

new(weights, gaussians) click to toggle source
# File lib/spark/mllib/clustering/gaussian_mixture.rb, line 29
def initialize(weights, gaussians)
  @weights = weights
  @gaussians = gaussians
  @k = weights.size
end

Public Instance Methods

means() click to toggle source
# File lib/spark/mllib/clustering/gaussian_mixture.rb, line 50
def means
  @means ||= @gaussians.map(&:mu)
end
predict(rdd) click to toggle source

Find the cluster to which the points in 'x' has maximum membership in this model.

# File lib/spark/mllib/clustering/gaussian_mixture.rb, line 37
def predict(rdd)
  if rdd.is_a?(Spark::RDD)
    predict_soft(rdd).map('lambda{|x| x.index(x.max)}')
  else
    raise ArgumentError, 'Argument must be a RDD.'
  end
end
predict_soft(rdd) click to toggle source

Find the membership of each point in 'x' to all mixture components.

# File lib/spark/mllib/clustering/gaussian_mixture.rb, line 46
def predict_soft(rdd)
  Spark.jb.call(RubyMLLibAPI.new, 'predictSoftGMM', rdd, weights, means, sigmas)
end
sigmas() click to toggle source
# File lib/spark/mllib/clustering/gaussian_mixture.rb, line 54
def sigmas
  @sigmas ||= @gaussians.map(&:sigma)
end