class Bayes::Category

Constants

MIN_SCORE

Public Class Methods

new() click to toggle source
# File lib/bayes/category.rb, line 5
def initialize
  reset
end

Public Instance Methods

apply_weighting(coeff) click to toggle source
# File lib/bayes/category.rb, line 29
def apply_weighting(coeff)
  top_words.each do |word|
    apply_weighting_for word, coeff
  end
end
apply_weighting_for(word, coeff) click to toggle source
# File lib/bayes/category.rb, line 35
def apply_weighting_for(word, coeff)
  if old_weight = @words[word]
    @words[word] = old_weight * coeff
    @words_count += @words[word] - old_weight
  end
end
blank?() click to toggle source
# File lib/bayes/category.rb, line 63
def blank?
  @words_count == 0
end
forget(text) click to toggle source
# File lib/bayes/category.rb, line 21
def forget(text)
  text.word_hash.each do |word, count|
    @words[word] = @words[word].to_i - count
    @words.delete(word) if @words[word] == 0
    @words_count -= count
  end
end
reset() click to toggle source
# File lib/bayes/category.rb, line 9
def reset
  @words = {}
  @words_count = 0
end
score_for(words) click to toggle source
# File lib/bayes/category.rb, line 46
def score_for(words)
  if @words_count > 0
    words = words.word_hash.keys unless words.is_a? Array

    if words.any?
      words.map do |word|
        word_value = @words[word] || MIN_SCORE
        Math.log(word_value / @words_count.to_f)
      end.inject(:+)
    else
      Math.log(MIN_SCORE / @words_count)
    end
  else
    -Float::INFINITY
  end
end
top_words(num = 100) click to toggle source
# File lib/bayes/category.rb, line 42
def top_words(num = 100)
  @words.sort_by{ |w,c| -c }.slice(0,num).map{ |w| w[0] }
end
train(text) click to toggle source
# File lib/bayes/category.rb, line 14
def train(text)
  text.word_hash.each do |word, count|
    @words[word] = @words[word].to_i + count
    @words_count += count
  end
end