class AdaBoost::FeaturesAnalyzer

Public Class Methods

new(y_index) click to toggle source
# File lib/adaboost/features_analyzer.rb, line 10
def initialize(y_index)
  @y_index = y_index
end

Public Instance Methods

analyze(samples) click to toggle source
# File lib/adaboost/features_analyzer.rb, line 14
def analyze(samples)
  
  statistics = []
  distribution = Distribution.new(0, 0)
  number_of_samples = samples.size
  
  if number_of_samples < 1
    raise ArgumentError.new('At least one sample is needed to analyze.')
  end
  number_of_features = @y_index
  sample_size = samples[0].size
  if number_of_features < 1 or sample_size < 2 or sample_size <= @y_index
    raise ArgumentError.new('At least 1 feature is needed to analyze.')
  end
  0.upto(number_of_features - 1) do
    statistics << FeatureStatistic.new(Float::MAX, -Float::MAX, 0, 0, 0, 0)
  end
  samples.each do |sample|
    y = sample[@y_index]
    if y == -1
        distribution.negative += 1
    else
        distribution.positive += 1
    end
    0.upto(number_of_features - 1) do |i|
      statistic = statistics[i]
      feature_value = sample[i]
      if feature_value < statistic.min
        statistic.min = feature_value
      end
      if feature_value > statistic.max
        statistic.max = feature_value
      end
      statistic.sum += feature_value
    end
  end
  statistics.each do |statistic|
    statistic.avg = statistic.sum / number_of_samples.to_f
    statistic.rng = (statistic.max - statistic.min).abs
  end
  samples.each do |sample|
    statistics.each_with_index do |statistic, i|
      feature_value = sample[i]
      statistic.vrn += (statistic.avg - feature_value) ** 2
    end
  end
  statistics.each do |statistic|
    statistic.vrn /= (number_of_samples - 1).to_f
    statistic.std = Math.sqrt statistic.vrn
  end
  analyze = Analyze.new
  analyze.statistics = statistics
  analyze.distribution = distribution
  analyze
end
relations(x, y, samples, statistics) click to toggle source
# File lib/adaboost/features_analyzer.rb, line 70
def relations(x, y, samples, statistics)
  sum = 0.0
  samples.each do |sample|
    x_value = sample[x].to_f
    y_value = sample[y].to_f
    sum += (x_value - statistics[x].avg) * (y_value - statistics[y].avg)
  end
  cov = sum / (samples.size - 1).to_f
  cor = cov / (statistics[x].std * statistics[y].std).to_f
  VariableRelations.new(x, y, cov, cor)
end