class Yanbi::Fisher
Private Instance Methods
category_prob(cat, word)
click to toggle source
# File lib/bayes/fisher.rb, line 19 def category_prob(cat, word) wp = word_prob(cat, word) sum = @categories.inject(0) {|s,c| s + word_prob(c, word)} return 0 if sum.zero? wp / sum end
invchi2(chi, df)
click to toggle source
# File lib/bayes/fisher.rb, line 37 def invchi2(chi, df) m = chi / 2.0 sum = Math.exp(-m) term = Math.exp(-m) (1..df/2).each do |i| term *= (m / i) sum += term end [sum, 1.0].min rescue 1.0 end
score(category, document)
click to toggle source
# File lib/bayes/fisher.rb, line 11 def score(category, document) features = document.words.uniq probs = features.map {|x| weighted_prob(x, category)} pscores = probs.reduce(&:*) score = -2 * Math.log(pscores) invchi2(score, features.count * 2) end
weighted_prob(word, category, basicprob=nil, weight=1.0, ap=0.5)
click to toggle source
# File lib/bayes/fisher.rb, line 31 def weighted_prob(word, category, basicprob=nil, weight=1.0, ap=0.5) basicprob = category_prob(category, word) totals = @category_counts.inject(0) {|sum, cat| sum += cat.last[word].to_i} ((weight * ap) + (totals*basicprob)) / (weight + totals) end
word_prob(cat, word)
click to toggle source
# File lib/bayes/fisher.rb, line 26 def word_prob(cat, word) count = @category_counts[cat].has_key?(word) ? @category_counts[cat][word].to_f : 0 count / @category_sizes[cat] end