class LrLinearRegression
RubyLinearRegression
Attributes
lambda[R]
mu[R]
normalize[R]
sigma[R]
theta[R]
x[R]
y[R]
Public Class Methods
new()
click to toggle source
# File lib/lr_linear_regression.rb, line 8 def initialize @mu = 0 @sigma = 1 end
Public Instance Methods
compute_cost(test_x = nil, test_y = nil)
click to toggle source
Compute the mean squared cost / error function
# File lib/lr_linear_regression.rb, line 35 def compute_cost test_x = nil, test_y = nil if not test_x.nil? test_x.each_index do |row| test_x[row].each_index do |i| test_x[row][i] = (test_x[row][i] - @mu[i]) / @sigma[i].to_f end end if @normalize test_x = test_x.map { |r| [1].concat(r) } end # per default use training data to compute cost if no data is given cost_x = test_x.nil? ? @x : Matrix.rows( test_x ) cost_y = test_y.nil? ? @y : Matrix.rows( test_y.collect { |e| [e] } ) # First use matrix multiplication and vector subtracton to find errors errors = (cost_x * @theta) - cost_y # Then square all errors errors = errors.map { |e| (e.to_f**2) } # Find the mean of the square errors mean_square_error = 0.5 * (errors.inject{ |sum, e| sum + e }.to_f / errors.row_size) return mean_square_error end
load_training_data(x_data, y_data, normalize = true)
click to toggle source
Loads and normalizes the training data, must be called prior to training. Arguments:
x_data: (Two dimensiolnal array with the independent variables of your training data) y_data: (Array with the dependent variables of your training data)
# File lib/lr_linear_regression.rb, line 17 def load_training_data x_data, y_data, normalize = true @normalize = normalize # normalize the x_data x_data = normalize_data( x_data ) if @normalize # add 1 column to our data x_data = x_data.map { |r| [1].concat(r) } # build our x Matrix & y Vector @x = Matrix.rows( x_data ) @y = Matrix.rows( y_data.collect { |e| [e] } ) @theta = Matrix.zero(@x.column_size, 1) end
predict(data)
click to toggle source
Makes a prediction based on your trained model. train_normal_equation
must be called prior to making a prediction.
Arguments:
data: (Array of independent variables to base your prediction on)
# File lib/lr_linear_regression.rb, line 102 def predict data # normalize data.each_index do |i| data[i] = (data[i] - @mu[i]) / @sigma[i].to_f end if @normalize # add 1 column to prediction data data = [1].concat( data ) # perform prediction prediction = (Matrix[data] * @theta)[0,0].to_f return prediction end
train_gradient_descent( alpha = 0.01, iterations = 500, verbose = false )
click to toggle source
Calculate optimal theta using gradient descent Arguments:
alpha: Learning rate iterations: Number of iterations to run gradient descent verbose: If true will output cost after each iteration, can be used to find optimal learning rate (alpha) and iteration
# File lib/lr_linear_regression.rb, line 82 def train_gradient_descent( alpha = 0.01, iterations = 500, verbose = false ) 0.upto( iterations ) do |i| @temp_theta = Array.new(@theta.row_size) 0.upto(@theta.row_size-1) do |row| @temp_theta[row] = @theta[row,0] - alpha * compute_gradient(row) end @theta = Matrix.columns([@temp_theta]) puts "Cost after #{i} iterations = #{compute_cost}" if verbose end end
train_normal_equation(l = 0)
click to toggle source
Calculate the optimal theta using the normal equation
# File lib/lr_linear_regression.rb, line 63 def train_normal_equation l = 0 @lambda = l lambda_matrix = Matrix.build(@theta.row_size,@theta.row_size) do |c,r| (( c == 0 && r == 0) || c != r) ? 0 : 1; end # Calculate the optimal theta using the normal equation # theta = ( X' * X )^1 * X' * y @theta = (@x.transpose * @x + @lambda * lambda_matrix ).inverse * @x.transpose * @y return @theta end
Private Instance Methods
compute_gradient( parameter )
click to toggle source
Compute the mean squared cost / error function
# File lib/lr_linear_regression.rb, line 148 def compute_gradient( parameter ) # First use matrix multiplication and vector subtracton to find errors gradients = ((@x * @theta) - @y).transpose * @x.column(parameter) # Mean the grandient mean = gradients.inject{ |sum, e| sum + e } / gradients.size return mean end
normalize_data(x_data, mu = nil, sigma = nil)
click to toggle source
# File lib/lr_linear_regression.rb, line 120 def normalize_data(x_data, mu = nil, sigma = nil) row_size = x_data.size column_count = x_data[0].is_a?( Array) ? x_data[0].size : 1 x_norm = Array.new(row_size) @mu = Array.new(column_count) @sigma = Array.new(column_count) 0.upto(column_count - 1) do |column| column_data = x_data.map{ |e| e[column] } @mu[column] = column_data.inject{ |sum, e| sum + e } / row_size @sigma[column] = (column_data.max - column_data.min) end 0.upto(row_size-1) do |row| row_data = x_data[row] x_norm[row] = Array.new(column_count) row_data.each_index do |i| x_norm[row][i] = (row_data[i] - @mu[i]) / @sigma[i].to_f end end return x_norm end