class Spell::Spell

Public Class Methods

new(*args) click to toggle source
# File lib/spell/spell.rb, line 3
def initialize(*args)
  fail "Too many arguments given" if args.count > 3

  if args[0].is_a? Hash
    @word_list = args[0]
    @alpha = args[1] || 0.3
  elsif args[0].is_a? Array
    fail "Word usage weights do not make sense with an Array" if args[1]
    @word_list = args[0]
  else
    fail "First argument must be an Array or Hash"
  end
end

Public Instance Methods

best_match(given_word) click to toggle source

Returns the closest matching word in the dictionary

# File lib/spell/spell.rb, line 18
def best_match(given_word)
  words = (@word_list.is_a? Array) ? @word_list : @word_list.keys

  word_bigrams = bigramate(given_word)
  word_hash = words.map do |key|
    [key, bigram_compare(word_bigrams, bigramate(key))]
  end
  word_hash = Hash[word_hash]

  # Weight by word usage, if logical
  word_hash = apply_usage_weights(word_hash) if @word_list.is_a? Hash

  word_hash.max_by { |key, value| value }.first
end
compare(word1, word2) click to toggle source

Return a value from 0.0-1.0 of how similar these two words are

# File lib/spell/spell.rb, line 43
def compare(word1, word2)
  bigram_compare(bigramate(word1), bigramate(word2))
end
spelled_correctly?(word) click to toggle source

Returns a boolean for whether or not 'word' is in the dictionary

# File lib/spell/spell.rb, line 34
def spelled_correctly?(word)
  if @word_list.is_a? Hash
    @word_list.keys.include?(word)
  else
    @word_list.include?(word)
  end
end

Private Instance Methods

apply_usage_weights(word_hash) click to toggle source

For each word, adjust it's score by usage

v = s * (1 - a) + u * a Where v is the new value a is @alpha s is the bigram score (0..1) u is the usage score (0..1)

# File lib/spell/spell.rb, line 89
def apply_usage_weights(word_hash)
  max_usage = @word_list.values.max.to_f
  max_usage = 1 if max_usage == 0

  weighted_array = word_hash.map do |word, bigram_score|
    usage_score = @word_list[word].to_f / max_usage
    [word, (bigram_score * (1 - @alpha)) + (usage_score * @alpha)]
  end

  Hash[weighted_array]
end
bigram_compare(word1_bigrams, word2_bigrams) click to toggle source

Returns a value from 0 to 1 for how likely these two words are to be a match

# File lib/spell/spell.rb, line 77
def bigram_compare(word1_bigrams, word2_bigrams)
  most_bigrams = [word1_bigrams.count, word2_bigrams.count].max
  num_matching(word1_bigrams, word2_bigrams).to_f / most_bigrams
end
bigramate(word) click to toggle source

Returns an array of the word's bigrams (in order)

# File lib/spell/spell.rb, line 72
def bigramate(word)
  (0..(word.length - 2)).map { |i| word.slice(i, 2) }
end
num_matching(one_bigrams, two_bigrams, acc = 0) click to toggle source

Returns the number of matching bigrams between the two sets of bigrams

# File lib/spell/spell.rb, line 50
def num_matching(one_bigrams, two_bigrams, acc = 0)
  return acc if (one_bigrams.empty? || two_bigrams.empty?)

  one_two = one_bigrams.index(two_bigrams[0])
  two_one = two_bigrams.index(one_bigrams[0])

  if (one_two.nil? && two_one.nil?)
    num_matching(one_bigrams.drop(1), two_bigrams.drop(1), acc)
  else
    # If one is nil, it is set to the other
    two_one ||= one_two
    one_two ||= two_one

    if one_two < two_one
      num_matching(one_bigrams.drop(one_two + 1), two_bigrams.drop(1), acc + 1)
    else
      num_matching(one_bigrams.drop(1), two_bigrams.drop(two_one + 1), acc + 1)
    end
  end
end