module Spellchecker::DetectTypo

Constants

ABBREVIATION_LENGTH
ABBREVIATION_REGEXP
LENGTH_LIMIT
NUMBER_SHORTENING_SUFFIX
PROPER_NAME_REGEXP
SHORTENINGS

Public Instance Methods

abbreviation?(token) click to toggle source

@param token [Spellchecker::Tokenizer::Token] @return [Boolean]

# File lib/spellchecker/detect_typo.rb, line 47
def abbreviation?(token)
  return true if ABBREVIATION_REGEXP.match?(token.text)
  return true if token.text.length <= ABBREVIATION_LENGTH &&
                 !token.prev.word? && !token.next.word?

  false
end
call(token) click to toggle source

@param token [Spellchecker::Tokenizer::Token] @return [Spellchecker::Mistake, nil]

# File lib/spellchecker/detect_typo.rb, line 17
def call(token)
  word = token.text

  return if word.length < LENGTH_LIMIT

  correction = Dictionaries::TyposList.match_token(token)

  return unless correction
  return if PROPER_NAME_REGEXP.match?(word)
  return if abbreviation?(token) || shortening?(token)
  return if Dictionaries::EnglishWords.include?(Utils.replace_quote(word))

  return if token.capital? && proper_noun?(word)

  correction = correction.sub(/\S/, &:upcase) if token.capital?

  Mistake.new(text: word, correction: correction,
              position: token.position, type: MistakeTypes::SPELLING)
end
proper_noun?(word) click to toggle source

@param word [String] @return [Boolean]

# File lib/spellchecker/detect_typo.rb, line 39
def proper_noun?(word)
  Dictionaries::HumanNames.include?(word) ||
    Dictionaries::CompanyNames.include?(word) ||
    Dictionaries::UsToponyms.include?(word)
end
shortening?(token) click to toggle source

@param token [Spellchecker::Tokenizer::Token] @return [Boolean]

# File lib/spellchecker/detect_typo.rb, line 57
def shortening?(token)
  return true if token.text == NUMBER_SHORTENING_SUFFIX && token.prev.digit?
  return true if SHORTENINGS.include?(token.downcased) &&
                 (token.next.dot? || token.next.digit?)

  false
end