class PCA

Attributes

components[R]
explained_variance[R]
explained_variance_ratio[R]
mean[RW]
singular_values[R]
std[RW]

Public Class Methods

new(opts = {}) click to toggle source
# File lib/pca.rb, line 7
def initialize opts = {}
  @n_components = opts[:components]
  @scale_data = opts[:scale_data]
end

Public Instance Methods

components=(c) click to toggle source
# File lib/pca.rb, line 37
def components= c
  c = ensure_matrix(c)
  @components = slice_n(c.transpose).transpose
end
fit(x) click to toggle source
# File lib/pca.rb, line 12
def fit x
  x = prepare_data x
  _fit x
  self
end
fit_transform(x) click to toggle source
# File lib/pca.rb, line 23
def fit_transform x
  x = prepare_data x
  _fit x
  _transform x
end
inverse_transform(x) click to toggle source
# File lib/pca.rb, line 29
def inverse_transform x
  x = ensure_matrix x
  xit = x * @components
  undo_scale(xit) if @scale_data
  undo_mean_normalize xit
  xit
end
transform(x) click to toggle source
# File lib/pca.rb, line 18
def transform x
  x = prepare_data x, use_saved_mean_and_std: true
  _transform x
end

Private Instance Methods

_fit(x) click to toggle source
# File lib/pca.rb, line 54
def _fit x
  covariance_matrix = (x.transpose * x) / x.size1
  u, v, s = covariance_matrix.SV_decomp
  
  ev = s**2 / x.size1
  evr = ev / ev.sum

  @explained_variance = slice_n ev
  @explained_variance_ratio = slice_n evr
  @singular_values = slice_n s
  @components = slice_n(u).transpose
end
_transform(x) click to toggle source
# File lib/pca.rb, line 67
def _transform x
  x * @components.transpose
end
calculate_mean(x) click to toggle source
# File lib/pca.rb, line 82
def calculate_mean x
  x.size2.times.map {|col| x.col(col).mean }
end
calculate_std(x) click to toggle source
# File lib/pca.rb, line 94
def calculate_std x
  x.size2.times.map {|col| x.col(col).sd }
end
ensure_matrix(x) click to toggle source
# File lib/pca.rb, line 71
def ensure_matrix x
  case x
  when GSL::Matrix
    x
  when Array
    GSL::Matrix[*x]
  else
    x.to_gm
  end
end
mean_normalize(x) click to toggle source
# File lib/pca.rb, line 86
def mean_normalize x
  x.size2.times {|col| x.col(col).sub! @mean[col] }
end
prepare_data(x, opts = {}) click to toggle source
# File lib/pca.rb, line 43
def prepare_data x, opts = {}
  x = ensure_matrix x
  @mean = calculate_mean(x) unless opts[:use_saved_mean_and_std]
  mean_normalize x
  if @scale_data
    @std = calculate_std(x) unless opts[:use_saved_mean_and_std]
    scale(x)
  end
  x
end
scale(x) click to toggle source
# File lib/pca.rb, line 98
def scale x
  x.size2.times {|col| x.col(col).div! @std[col] }
end
slice_n(x) click to toggle source
# File lib/pca.rb, line 106
def slice_n x
  return x unless @n_components
  case x
  when GSL::Matrix
    x.submatrix(nil, 0, @n_components)
  when GSL::Vector
    x[0, @n_components]
  end
end
undo_mean_normalize(x) click to toggle source
# File lib/pca.rb, line 90
def undo_mean_normalize x
  x.size2.times {|col| x.col(col).add! @mean[col] }
end
undo_scale(x) click to toggle source
# File lib/pca.rb, line 102
def undo_scale x
  x.size2.times {|col| x.col(col).mul! @std[col] }
end