class FisherClassifier::Classifier
Public Class Methods
new(config)
click to toggle source
# File lib/fisher_classifier/classifier.rb, line 6 def initialize(config) @config = config end
Public Instance Methods
classify(text)
click to toggle source
# File lib/fisher_classifier/classifier.rb, line 17 def classify(text) features = get_features(text) best = default_category max = fisher_threshold categories.each do |category| prob = fisher_prob(category, features) if prob > max best = category max = prob end end best end
train(text, category)
click to toggle source
# File lib/fisher_classifier/classifier.rb, line 10 def train(text, category) get_features(text).each do |feature| inc_feature(feature, category) inc_category(category) end end
Private Instance Methods
category_prob(category, feature)
click to toggle source
# File lib/fisher_classifier/classifier.rb, line 73 def category_prob(category, feature) fp = feature_prob(feature, category) return fp if fp.zero? fp / feature_freqsum(feature, category) end
feature_freqsum(feature, category)
click to toggle source
# File lib/fisher_classifier/classifier.rb, line 80 def feature_freqsum(feature, category) counts = categories.map { |c| feature_prob(feature, c) } counts.inject(:+) end
feature_in_all_categories(feature)
click to toggle source
# File lib/fisher_classifier/classifier.rb, line 68 def feature_in_all_categories(feature) counts = categories.map { |c| features_count(feature, c) } counts.inject(:+) end
feature_prob(feature, category)
click to toggle source
# File lib/fisher_classifier/classifier.rb, line 54 def feature_prob(feature, category) cc = category_count(category) return cc if cc.zero? features_count(feature, category) / cc.to_f end
fisher_factor(probs_multiply)
click to toggle source
# File lib/fisher_classifier/classifier.rb, line 44 def fisher_factor(probs_multiply) -2 * Math.log(probs_multiply) end
fisher_prob(category, features)
click to toggle source
# File lib/fisher_classifier/classifier.rb, line 36 def fisher_prob(category, features) invchi2( fisher_factor( probs_multiply(features, category) ), features.size * 2 ) end
invchi2(chi, df)
click to toggle source
# File lib/fisher_classifier/classifier.rb, line 85 def invchi2(chi, df) m = chi / 2.0 sum = term = Math.exp(-m) for i in 1..(df / 2) term *= m / i sum += term end [sum, 1.0].min end
method_missing(key, *values, &block)
click to toggle source
# File lib/fisher_classifier/classifier.rb, line 97 def method_missing(key, *values, &block) if @config.respond_to?(key, values) @config.call key, *values else @config.get(key) end end
probs_multiply(features, category)
click to toggle source
# File lib/fisher_classifier/classifier.rb, line 48 def probs_multiply(features, category) fprobs = features.map { |f| weighted_prob(f, category) } probs_multiply = fprobs.inject(:*) probs_multiply ||= 0 end
weighted_prob(feature, category)
click to toggle source
# File lib/fisher_classifier/classifier.rb, line 61 def weighted_prob(feature, category) current_prob = category_prob(category, feature) totals = feature_in_all_categories(feature) (weight * assumed_prob + totals * current_prob) / ( weight + totals).to_f end