class Spark::Mllib::SparseVector

A sparse vector represented by an index array and an value array.

Sparse vector is a vector in which most of the elements are zero.

Example:

SparseVector.new(4, {1 => 1.0, 3 => 5.5}).values
# => [0, 1.0, 0, 5.5]

SparseVector.new(4, [[1, 3], [1.0, 5.5]]).values
# => [0, 1.0, 0, 5.5]

SparseVector.new(4, [1, 3], [1.0, 5.5]).values
# => [0, 1.0, 0, 5.5]

Attributes

indices[R]

Public Class Methods

new(arg1, arg2=nil, arg3=nil) click to toggle source
Calls superclass method Spark::Mllib::VectorAdapter::new
# File lib/spark/mllib/vector.rb, line 131
def initialize(arg1, arg2=nil, arg3=nil)
    super(:sparse, arg1)

    if arg2.is_a?(Hash)
      @indices = arg2.keys
      @values = arg2.values
    else
      @indices = arg2
      @values = arg3
    end

    @indices.zip(@values).each do |(index, value)|
      self[index] = value
    end
end
parse(data) click to toggle source

Covert string to vector

SparseVector.parse("(5,[1,4],[3.0,5.0])")
# File lib/spark/mllib/vector.rb, line 151
def self.parse(data)
  data = data.match(/\(([0-9]+)[ ]*,[ ]*\[([0-9,. ]*)\][ ]*,[ ]*\[([0-9,. ]*)\]\)/)
  if data
    size = data[1].to_i
    indices = data[2].split(',')
    indices.map!(&:to_i)
    values = data[3].split(',')
    values.map!(&:to_f)

    SparseVector.new(size, indices, values)
  else
    raise ArgumentError, 'Unknow format for SparseVector.'
  end
end

Public Instance Methods

marshal_dump() click to toggle source
# File lib/spark/mllib/vector.rb, line 175
def marshal_dump
  [size, indices, values]
end
marshal_load(array) click to toggle source
# File lib/spark/mllib/vector.rb, line 179
def marshal_load(array)
  initialize(array[0], array[1], array[2])
end
to_s() click to toggle source

Convert vector to string

SparseVector.new(5, {1 => 3, 4 => 5}).to_s
# => "(5,[1,4],[3.0,5.0])"
# File lib/spark/mllib/vector.rb, line 171
def to_s
  "(#{size},[#{indices.join(',')}],[#{values.join(',')}])"
end