class OmniCat::Classifiers::Bayes
Attributes
k_value[RW]
Public Class Methods
new(bayes_hash = {})
click to toggle source
Calls superclass method
# File lib/omnicat/classifiers/bayes.rb, line 9 def initialize(bayes_hash = {}) super(bayes_hash) if bayes_hash.has_key?(:categories) bayes_hash[:categories].each do |name, category| @categories[name] = ::OmniCat::Classifiers::BayesInternals::Category.new(category) end end @k_value = bayes_hash[:k_value] || 1.0 end
Public Instance Methods
add_category(category_name)
click to toggle source
Allows adding new classification category
Parameters¶ ↑
-
category_name
- Name for category
Examples¶ ↑
# Create a classification category bayes = Bayes.new bayes.add_category("positive")
# File lib/omnicat/classifiers/bayes.rb, line 30 def add_category(category_name) if category_exists?(category_name) raise StandardError, "Category with name '#{category_name}' is already exists!" else increment_category_count @categories[category_name] = ::OmniCat::Classifiers::BayesInternals::Category.new(name: category_name) end end
classify(doc_content)
click to toggle source
Classify the given document
Parameters¶ ↑
-
doc_content
- The document for classification
Returns¶ ↑
-
result
- OmniCat::Result object
Examples¶ ↑
# Classify a document bayes.classify("good documentation") =>
# File lib/omnicat/classifiers/bayes.rb, line 106 def classify(doc_content) return unless classifiable? doc = ::OmniCat::Doc.new(content: doc_content) result = ::OmniCat::Result.new @categories.each do |category_name, category| result.add_score( Score.new( key: category.name, value: doc_probability(category, doc) ) ) end auto_train(@categories[result.top_score.key], doc) result.calculate_percentages result end
train(category_name, doc_content)
click to toggle source
Train the desired category with a document
Parameters¶ ↑
-
category_name
- Name of the category from added categories list -
doc_content
- Document text
Examples¶ ↑
# Train the desired category bayes.train("positive", "clear documentation") bayes.train("positive", "good, very well") bayes.train("negative", "bad dog") bayes.train("neutral", "how is the management gui")
# File lib/omnicat/classifiers/bayes.rb, line 54 def train(category_name, doc_content) category_must_exist(category_name) doc = add_doc(category_name, doc_content) doc.tokens.each do |token, count| increment_token_counts(category_name, token, count) @categories[category_name].tokens[token] = @categories[category_name].tokens[token].to_i + count end increment_doc_counts(category_name) update_priors end
untrain(category_name, doc_content)
click to toggle source
Untrain the desired category with a document
Parameters¶ ↑
-
category_name
- Name of the category from added categories list -
doc_content
- Document text
Examples¶ ↑
# Untrain the desired category bayes.untrain("positive", "clear documentation") bayes.untrain("positive", "good, very well") bayes.untrain("negative", "bad dog") bayes.untrain("neutral", "how is the management gui")
# File lib/omnicat/classifiers/bayes.rb, line 79 def untrain(category_name, doc_content) category_must_exist(category_name) doc = remove_doc(category_name, doc_content) doc.tokens.each do |token, count| @categories[category_name].tokens[token] = @categories[category_name].tokens[token].to_i - count @categories[category_name].tokens.delete(token) if @categories[category_name].tokens[token] == 0 decrement_token_counts(category_name, token, count) end decrement_doc_counts(category_name) update_priors end
Private Instance Methods
add_doc(category_name, doc_content)
click to toggle source
nodoc
# File lib/omnicat/classifiers/bayes.rb, line 201 def add_doc(category_name, doc_content) doc_key = generate_doc_key(doc_content) if doc = @categories[category_name].docs[doc_key] doc.increment_count else @categories[category_name].docs[doc_key] = ::OmniCat::Doc.new(content: doc_content) end @categories[category_name].docs[doc_key] end
auto_train(category, doc)
click to toggle source
nodoc
# File lib/omnicat/classifiers/bayes.rb, line 125 def auto_train(category, doc) case ::OmniCat.config.auto_train when :continues train(category.name, doc.content) when :unique train(category.name, doc.content) unless category.docs.has_key?(doc.content_md5) end end
category_must_exist(category_name)
click to toggle source
nodoc
# File lib/omnicat/classifiers/bayes.rb, line 230 def category_must_exist(category_name) unless category_exists?(category_name) raise StandardError, "Category with name '#{category_name}' does not exist!" end end
decrement_token_counts(category_name, token, count)
click to toggle source
nodoc
# File lib/omnicat/classifiers/bayes.rb, line 147 def decrement_token_counts(category_name, token, count) modify_token_counts(category_name, token, -1 * count) end
decrement_unique_token_count(token)
click to toggle source
nodoc
# File lib/omnicat/classifiers/bayes.rb, line 164 def decrement_unique_token_count(token) modify_unique_token_count(token, -1) end
doc_probability(category, doc)
click to toggle source
nodoc
# File lib/omnicat/classifiers/bayes.rb, line 180 def doc_probability(category, doc) score = @k_value doc.tokens.each do |token, count| score *= token_probability(category, token, count) end category.prior * score end
generate_doc_key(doc_content)
click to toggle source
nodoc
# File lib/omnicat/classifiers/bayes.rb, line 225 def generate_doc_key(doc_content) Digest::MD5.hexdigest(doc_content) end
increment_token_counts(category_name, token, count)
click to toggle source
nodoc
# File lib/omnicat/classifiers/bayes.rb, line 142 def increment_token_counts(category_name, token, count) modify_token_counts(category_name, token, count) end
increment_unique_token_count(token)
click to toggle source
nodoc
# File lib/omnicat/classifiers/bayes.rb, line 159 def increment_unique_token_count(token) modify_unique_token_count(token, 1) end
modify_token_counts(category_name, token, count)
click to toggle source
nodoc
# File lib/omnicat/classifiers/bayes.rb, line 152 def modify_token_counts(category_name, token, count) modify_unique_token_count(token, count < 0 ? -1 : 1) @token_count += count @categories[category_name].token_count += count end
modify_unique_token_count(token, uniq_token_addition)
click to toggle source
nodoc
# File lib/omnicat/classifiers/bayes.rb, line 169 def modify_unique_token_count(token, uniq_token_addition) @categories.each do |_, category| if category.tokens.has_key?(token) uniq_token_addition = 0 break end end @unique_token_count += uniq_token_addition end
remove_doc(category_name, doc_content)
click to toggle source
nodoc
# File lib/omnicat/classifiers/bayes.rb, line 212 def remove_doc(category_name, doc_content) doc_key = generate_doc_key(doc_content) doc = @categories[category_name].docs[doc_key] unless doc raise StandardError, "Document is not found in #{category_name} documents!" end doc.decrement_count @categories[category_name].docs.delete(doc_key) if doc.count == 0 doc end
token_probability(category, token, count)
click to toggle source
nodoc
# File lib/omnicat/classifiers/bayes.rb, line 189 def token_probability(category, token, count) if category.tokens[token].to_i == 0 @k_value / (@unique_token_count * count) else count * ( (category.tokens[token].to_i + @k_value) / (category.token_count + @unique_token_count) ) end end
update_priors()
click to toggle source
nodoc
# File lib/omnicat/classifiers/bayes.rb, line 135 def update_priors @categories.each do |_, category| category.prior = category.doc_count / doc_count.to_f end end