class FisherClassifier::Classifier

Public Class Methods

new(config) click to toggle source
# File lib/fisher_classifier/classifier.rb, line 6
def initialize(config)
  @config = config
end

Public Instance Methods

classify(text) click to toggle source
# File lib/fisher_classifier/classifier.rb, line 17
def classify(text)
  features = get_features(text)
  best = default_category
  max = fisher_threshold

  categories.each do |category|
    prob = fisher_prob(category, features)

    if prob > max
      best = category
      max = prob
    end
  end

  best
end
train(text, category) click to toggle source
# File lib/fisher_classifier/classifier.rb, line 10
def train(text, category)
  get_features(text).each do |feature|
    inc_feature(feature, category)
    inc_category(category)
  end
end

Private Instance Methods

category_prob(category, feature) click to toggle source
# File lib/fisher_classifier/classifier.rb, line 73
def category_prob(category, feature)
  fp = feature_prob(feature, category)
  return fp if fp.zero?

  fp / feature_freqsum(feature, category)
end
feature_freqsum(feature, category) click to toggle source
# File lib/fisher_classifier/classifier.rb, line 80
def feature_freqsum(feature, category)
  counts = categories.map { |c| feature_prob(feature, c) }
  counts.inject(:+)
end
feature_in_all_categories(feature) click to toggle source
# File lib/fisher_classifier/classifier.rb, line 68
def feature_in_all_categories(feature)
  counts = categories.map { |c| features_count(feature, c) }
  counts.inject(:+)
end
feature_prob(feature, category) click to toggle source
# File lib/fisher_classifier/classifier.rb, line 54
def feature_prob(feature, category)
  cc = category_count(category)
  return cc if cc.zero?

  features_count(feature, category) / cc.to_f
end
fisher_factor(probs_multiply) click to toggle source
# File lib/fisher_classifier/classifier.rb, line 44
def fisher_factor(probs_multiply)
  -2 * Math.log(probs_multiply)
end
fisher_prob(category, features) click to toggle source
# File lib/fisher_classifier/classifier.rb, line 36
def fisher_prob(category, features)
  invchi2(
    fisher_factor(
      probs_multiply(features, category)
    ), features.size * 2
  )
end
invchi2(chi, df) click to toggle source
# File lib/fisher_classifier/classifier.rb, line 85
def invchi2(chi, df)
  m = chi / 2.0
  sum = term = Math.exp(-m)

  for i in 1..(df / 2)
    term *= m / i
    sum += term
  end

  [sum, 1.0].min
end
method_missing(key, *values, &block) click to toggle source
# File lib/fisher_classifier/classifier.rb, line 97
def method_missing(key, *values, &block)
  if @config.respond_to?(key, values)
    @config.call key, *values
  else
    @config.get(key)
  end
end
probs_multiply(features, category) click to toggle source
# File lib/fisher_classifier/classifier.rb, line 48
def probs_multiply(features, category)
  fprobs = features.map { |f| weighted_prob(f, category) }
  probs_multiply = fprobs.inject(:*)
  probs_multiply ||= 0
end
weighted_prob(feature, category) click to toggle source
# File lib/fisher_classifier/classifier.rb, line 61
def weighted_prob(feature, category)
  current_prob = category_prob(category, feature)
  totals = feature_in_all_categories(feature)

  (weight * assumed_prob + totals * current_prob) / ( weight + totals).to_f
end