class TLearn::EM_Gaussian

Attributes

conv_list[RW]
data_list[RW]
k_num[RW]
log_likelihood[RW]
mu_list[RW]
pi_list[RW]
real_data_list[RW]

Public Instance Methods

calc_ave(k, nk) click to toggle source
# File lib/t_learn/em.rb, line 114
def calc_ave(k, nk) 
  mu = Array.new(@dim)
  @dim.times{|i|
    mu[i] = @data_list.each_with_index.inject(0.0){|sum,(data, n)| 
      sum += @gamma[n][k] * data[i] 
    } / nk

  }
  return mu
end
calc_conv(k, nk) click to toggle source
# File lib/t_learn/em.rb, line 125
def calc_conv(k, nk)
  conv = Array.new(@dim).map{Array.new(@dim, 0)}
  @dim.times{|i|
    @dim.times{|j|
      @data_list.each_with_index{|data, n|
        conv[i][j] += @gamma[n][k] * (data[i]-@mu_list[k][i]) * (data[j]-@mu_list[k][j])
      } 
    }
  }
  conv = conv.map{|arr| 
    arr.map{|v| 
      (v/nk) != 0.0 ? (v/nk) : 0.1
    }
  }
  return conv
end
calc_first_ave_std(x) click to toggle source
# File lib/t_learn/em.rb, line 184
def calc_first_ave_std(x)
  sum_each_vec = []
  ave_list = []
  std_list = []
  x.each{|vec| 
    vec.each_with_index{|data, i|
      sum_each_vec[i] = (sum_each_vec[i] == nil) ? data : sum_each_vec[i] + data
    }
  }
  x[0].size.times{|i| ave_list.push(sum_each_vec[i]/x.size)}

  sum_each_vec = []
  x.each{|vec| 
    vec.each_with_index{|data, i|
      sum_each_vec[i] = (sum_each_vec[i] == nil) ? (ave_list[i]-data)**2 : (sum_each_vec[i]+(ave_list[i]-data)**2)
    }
  }
  x[0].size.times{|i| 
    std = Math.sqrt(sum_each_vec[i]/x.size)
    std = 0.1 if std == 0.0 
    std_list.push(std)
  }

  return {:ave_list => ave_list, :std_list => std_list}
end
calc_log_likelihood() click to toggle source
# File lib/t_learn/em.rb, line 142
def calc_log_likelihood
  log_likelihood = 0.0
  @data_list.each_with_index{|data, i|
    sum = 0.0
    @k_num.times{|k|
      sum += @pi_list[k] * gauusian(data, @mu_list[k], @conv_list[k]) 
    }
    log_likelihood += Math.log(sum)
  }
  return log_likelihood
end
create_log(cycle, likelihood) click to toggle source
# File lib/t_learn/em.rb, line 55
def create_log(cycle, likelihood)
  log = {:cycle => cycle, 
         :likelihood => likelihood,
         :mu => @mu_list.clone, 
         :conv => @conv_list.clone, 
         :pi_list => @pi_list.clone}
  return log
end
e_step() click to toggle source
# File lib/t_learn/em.rb, line 89
def e_step()
  @data_list.each_with_index{|data, n|
    denominator = 0.0
    @k_num.times{|k|
      denominator += @pi_list[k] * gauusian(data, @mu_list[k], @conv_list[k])
    }
    @k_num.times { |k|
      @gamma[n][k] = @pi_list[k] * gauusian(data, @mu_list[k], @conv_list[k]) / denominator
    }
  }
end
fit(data_list, k_num) click to toggle source
# File lib/t_learn/em.rb, line 65
def fit(data_list, k_num)
  init(data_list, k_num)
  result = []
  cycle = 0
  last_likelihood = calc_log_likelihood()
  loop do 
    e_step()
    m_step()
    likelihood = calc_log_likelihood()
    diff = (likelihood - last_likelihood).abs
    last_likelihood = likelihood
    puts "likelihood: #{likelihood}"
    result.push(create_log(cycle, likelihood))
    cycle += 1
    break if diff < 0.000001
  end
  puts "===================================="
  puts "pi : #{ @pi_list }"
  puts "mu : #{ @mu_list}"
  puts "conv : #{ @conv_list}"
  return result
end
gauusian(x, mu, sigma) click to toggle source

gauusian distribution

# File lib/t_learn/em.rb, line 157
def gauusian(x, mu, sigma)
  if @dim <= 1
    x = x[0]
    mu = mu[0]
    sigma = sigma[0][0]
    f1 = 1.0/(Math.sqrt(2.0*Math::PI)*Math.sqrt(sigma))
    f2 = Math.exp(-(((x-mu)**2)/((2.0*sigma))))
    return f1 * f2
  else
    return gauusian_over_2dim(x, mu, sigma)
  end
end
gauusian_over_2dim(x, mu, conv) click to toggle source

gauusian distribution over 2 dim version

# File lib/t_learn/em.rb, line 174
def gauusian_over_2dim(x, mu, conv)
  x = Matrix[x]
  mu = Matrix[mu]
  conv = Matrix[*conv]
  f1 = 1.0/(((2.0 * Math::PI)**(@dim/2.0)) * ( conv.det**(0.5) ))
  f2 = Math.exp((-1.0/2.0)*((x-mu) * conv.inverse * (x-mu).transpose)[0, 0])

  return (f1 * f2)
end
ini_ave(ave_list) click to toggle source
# File lib/t_learn/em.rb, line 27
def ini_ave(ave_list)
  array = []
  @dim.times {|i|
    array.push(ave_list[i]*rand())
  }
  return array
end
ini_conv(std_list) click to toggle source
# File lib/t_learn/em.rb, line 35
def ini_conv(std_list)
  conv = []
  @dim.times {|i|
    conv.push(make_array(i, std_list[i]))
  }
  return conv
end
init(data_list, k_num) click to toggle source
# File lib/t_learn/em.rb, line 14
def init(data_list, k_num)
  @k_num = k_num 
  @data_list = data_list
  @dim = @data_list[0].size
  # @data_list = scale(@data_list)
  data_ave_std = calc_first_ave_std(@data_list)
  @real_data_list = Marshal.load(Marshal.dump(@data_list))
  @mu_list = Array.new(@k_num).map{ini_ave(data_ave_std[:ave_list])}
  @conv_list = Array.new(@k_num).map{ini_conv(data_ave_std[:std_list])}
  @pi_list = @k_num.times.map{rand()}
  @gamma = Array.new(@data_list.size).map{Array.new(@k_num, 0)}
end
m_step() click to toggle source
# File lib/t_learn/em.rb, line 101
def m_step()
  @k_num.times {|k|
    nk = 0.0
    @data_list.each_with_index{|data,  n| 
      nk += @gamma[n][k] 
    }

    @mu_list[k] = calc_ave(k, nk) 
    @conv_list[k] = calc_conv(k, nk)
    @pi_list[k] = nk/@data_list.size
  }
end
make_array(i, std) click to toggle source
# File lib/t_learn/em.rb, line 43
def make_array(i, std)
  array = []
  @dim.times {|x|
    if i == x 
      array.push(std**2)
    else
      array.push(0.0)
    end
  }
  return array
end
scale(x) click to toggle source
# File lib/t_learn/em.rb, line 211
def scale(x)
  sum_each_vec = []
  ave_list = []
  std_list = []
  x.each{|vec| 
    vec.each_with_index{|data, i|
      sum_each_vec[i] = (sum_each_vec[i] == nil) ? data : sum_each_vec[i]+data
    }
  }
  x[0].size.times{|i|
    ave_list.push(sum_each_vec[i]/x.size)
  }

  sum_each_vec = []
  x.each{|vec| 
    vec.each_with_index{|data, i|
      sum_each_vec[i] = (sum_each_vec[i] == nil) ? (ave_list[i]-data)**2 : (sum_each_vec[i]+(ave_list[i]-data)**2)
    }
  }
  x[0].size.times{|i|
    std_list.push(Math.sqrt(sum_each_vec[i]/x.size))
  }

  scaled_x = []
  x.each_with_index{|vec, i| 
    scaled_x[i] ||= []
    vec.each_with_index{|data, j|
      scaled_x[i][j] ||= (data-ave_list[j])/std_list[j]
    }
  }
  return scaled_x
end