class Classifier
Constants
- STOP_WORDS
- TOLERANCE
- UNKNOWN_WORD_PROBABILITY
- UNKNOWN_WORD_STRENGTH
Public Class Methods
new(positive_corpus, negative_corpus)
click to toggle source
# File lib/engine/classifier.rb, line 23 def initialize positive_corpus, negative_corpus @positive_corpus = positive_corpus @negative_corpus = negative_corpus end
Public Instance Methods
classify(sentence)
click to toggle source
# File lib/engine/classifier.rb, line 28 def classify sentence result = ClassificationResult.new sentence Document.new(sentence).each_token do |token| next if STOP_WORDS.include? token positive_count = @positive_corpus.token_count token negative_count = @negative_corpus.token_count token token_probability = calculate_probability( positive_count, @positive_corpus.entry_count, negative_count, @negative_corpus.entry_count) record_probability token_probability result.token_probabilities.push TokenProbability.new( token, token_probability, @positive_corpus.entry_count, positive_count, @negative_corpus.entry_count, negative_count, calculate_sentiment(token_probability) ) end result.overall_probability = combine_probabilities result.sentiment = calculate_sentiment result.overall_probability result end
Private Instance Methods
calculate_probability(positive_count, positive_total, negative_count, negative_total)
click to toggle source
# File lib/engine/classifier.rb, line 58 def calculate_probability positive_count, positive_total, negative_count, negative_total total = positive_count + negative_count positive_ratio = positive_count.to_f / positive_total negative_ratio = negative_count.to_f / negative_total probability = positive_ratio.to_f / (positive_ratio + negative_ratio) probability = 0 if probability.nan? ((UNKNOWN_WORD_STRENGTH*UNKNOWN_WORD_PROBABILITY) + (total * probability)) / (UNKNOWN_WORD_STRENGTH+total) end
calculate_sentiment(probability)
click to toggle source
# File lib/engine/classifier.rb, line 79 def calculate_sentiment probability return Sentiment::NEGATIVE if probability <= (UNKNOWN_WORD_PROBABILITY - TOLERANCE) return Sentiment::POSITIVE if probability >= (UNKNOWN_WORD_PROBABILITY + TOLERANCE) Sentiment::NEUTRAL end
combine_probabilities()
click to toggle source
# File lib/engine/classifier.rb, line 85 def combine_probabilities @total_probability / (@total_probability + @inverse_total_probability) end
record_probability(probability)
click to toggle source
# File lib/engine/classifier.rb, line 69 def record_probability probability return if probability.nan? @total_probability = 1 if @total_probability == 0 || @total_probability.nil? @inverse_total_probability = 1 if @inverse_total_probability == 0 || @inverse_total_probability.nil? @total_probability = @total_probability * probability @inverse_total_probability = @inverse_total_probability*(1-probability) end