class Classifier

Constants

STOP_WORDS
TOLERANCE
UNKNOWN_WORD_PROBABILITY
UNKNOWN_WORD_STRENGTH

Public Class Methods

new(positive_corpus, negative_corpus) click to toggle source
# File lib/engine/classifier.rb, line 23
def initialize positive_corpus, negative_corpus
  @positive_corpus = positive_corpus
  @negative_corpus = negative_corpus
end

Public Instance Methods

classify(sentence) click to toggle source
# File lib/engine/classifier.rb, line 28
def classify sentence
  result = ClassificationResult.new sentence

  Document.new(sentence).each_token do |token|
    next if STOP_WORDS.include? token

    positive_count = @positive_corpus.token_count token
    negative_count = @negative_corpus.token_count token

    token_probability = calculate_probability(
      positive_count, @positive_corpus.entry_count,
      negative_count, @negative_corpus.entry_count)

      record_probability token_probability

      result.token_probabilities.push TokenProbability.new(
        token, token_probability, @positive_corpus.entry_count,
        positive_count, @negative_corpus.entry_count,
        negative_count, calculate_sentiment(token_probability)
      )
  end

  result.overall_probability = combine_probabilities
  result.sentiment = calculate_sentiment result.overall_probability

  result
end

Private Instance Methods

calculate_probability(positive_count, positive_total, negative_count, negative_total) click to toggle source
# File lib/engine/classifier.rb, line 58
def calculate_probability positive_count, positive_total, negative_count, negative_total
  total = positive_count + negative_count
  positive_ratio = positive_count.to_f / positive_total
  negative_ratio = negative_count.to_f / negative_total

  probability = positive_ratio.to_f / (positive_ratio + negative_ratio)
  probability = 0 if probability.nan?

  ((UNKNOWN_WORD_STRENGTH*UNKNOWN_WORD_PROBABILITY) + (total * probability)) / (UNKNOWN_WORD_STRENGTH+total)
end
calculate_sentiment(probability) click to toggle source
# File lib/engine/classifier.rb, line 79
def calculate_sentiment probability
  return Sentiment::NEGATIVE if probability <= (UNKNOWN_WORD_PROBABILITY - TOLERANCE)
  return Sentiment::POSITIVE if probability >= (UNKNOWN_WORD_PROBABILITY + TOLERANCE)
  Sentiment::NEUTRAL
end
combine_probabilities() click to toggle source
# File lib/engine/classifier.rb, line 85
def combine_probabilities
  @total_probability / (@total_probability + @inverse_total_probability)
end
record_probability(probability) click to toggle source
# File lib/engine/classifier.rb, line 69
def record_probability probability
  return if probability.nan?

  @total_probability = 1 if @total_probability == 0 || @total_probability.nil?
  @inverse_total_probability = 1 if @inverse_total_probability == 0 || @inverse_total_probability.nil?

  @total_probability = @total_probability * probability
  @inverse_total_probability = @inverse_total_probability*(1-probability)
end