class Yanbi::Fisher

Private Instance Methods

category_prob(cat, word) click to toggle source
# File lib/bayes/fisher.rb, line 19
def category_prob(cat, word)
  wp = word_prob(cat, word)
  sum = @categories.inject(0) {|s,c| s + word_prob(c, word)}
  return 0 if sum.zero?
  wp / sum
end
invchi2(chi, df) click to toggle source
# File lib/bayes/fisher.rb, line 37
def invchi2(chi, df)
  m = chi / 2.0
  sum = Math.exp(-m)
  term = Math.exp(-m)
  
  (1..df/2).each do |i|
    term *= (m / i)
    sum += term
  end
  
  [sum, 1.0].min 
  
rescue
  1.0 
end
score(category, document) click to toggle source
# File lib/bayes/fisher.rb, line 11
def score(category, document)
  features = document.words.uniq
  probs = features.map {|x| weighted_prob(x, category)}
  pscores = probs.reduce(&:*)
  score = -2 * Math.log(pscores)
  invchi2(score, features.count * 2)
end
weighted_prob(word, category, basicprob=nil, weight=1.0, ap=0.5) click to toggle source
# File lib/bayes/fisher.rb, line 31
def weighted_prob(word, category, basicprob=nil, weight=1.0, ap=0.5)
  basicprob = category_prob(category, word)
  totals = @category_counts.inject(0) {|sum, cat| sum += cat.last[word].to_i}
  ((weight * ap) + (totals*basicprob)) / (weight + totals)
end
word_prob(cat, word) click to toggle source
# File lib/bayes/fisher.rb, line 26
def word_prob(cat, word)
  count = @category_counts[cat].has_key?(word) ? @category_counts[cat][word].to_f : 0 
  count / @category_sizes[cat]
end