module Spellchecker::DetectDuplicate

Constants

MIN_LENGTH
SKIP_PHRASES
SKIP_PHRASE_WORDS
SKIP_WORDS

Public Instance Methods

call(token) click to toggle source

@param token [Spellchecker::Tokenizer::Token] @return [Spellchecker::Mistake, nil]

# File lib/spellchecker/detect_duplicate.rb, line 37
def call(token)
  t1 = token

  return if t1.text.length < MIN_LENGTH
  return if SKIP_WORDS.include?(t1.downcased)

  t2 = t1.next
  t3 = t2.next
  t4 = t3.next

  text, correction = find_duplicate(t1, t2, t3, t4)

  return unless text
  return if SKIP_PHRASES.include?(correction.downcase)
  return unless Dictionaries::EnglishWords.include?(t2.text)

  return if skip_phrase?(t1, t2, t3, t4)
  return if repetition?(t1, t2, t3, t4)
  return if from_to_phrase?(t1, t2, t3)
  return if quoted?(t1, t2, t3, t4)

  Mistake.new(text: text, correction: correction,
              position: token.position, type: MistakeTypes::DUPLICATE)
end
find_duplicate(t1, t2, t3, t4) click to toggle source

@param t1 [Spellchecker::Tokenizer::Token] @param t2 [Spellchecker::Tokenizer::Token] @param t3 [Spellchecker::Tokenizer::Token] @param t4 [Spellchecker::Tokenizer::Token] @return [Spellchecker::Mistake, nil]

# File lib/spellchecker/detect_duplicate.rb, line 67
def find_duplicate(t1, t2, t3, t4)
  if t1.downcased == t2.downcased && !t2.capital? && !t2.digit?
    [[t1, t2].map(&:text).join(' '), t1.text]
  elsif [t1.downcased, t2.downcased] == [t3.downcased, t4.downcased] && !t3.capital? && !t3.digit?
    [[t1, t2, t3, t4].map(&:text).join(' '), [t1, t2].map(&:text).join(' ')]
  end
end
from_to_phrase?(t1, t2, t3) click to toggle source
# File lib/spellchecker/detect_duplicate.rb, line 99
def from_to_phrase?(t1, t2, t3)
  t1.prev.downcased == 'from' && t2.downcased == 'to' && t1.downcased == t3.downcased
end
quoted?(t1, _t2, t3, t4) click to toggle source

rubocop:enable Metrics/AbcSize

# File lib/spellchecker/detect_duplicate.rb, line 95
def quoted?(t1, _t2, t3, t4)
  t1.prev.text == '"' && (t3.text == '"' || t4.text == '"')
end
repetition?(t1, t2, t3, t4) click to toggle source

rubocop:disable Metrics/AbcSize

# File lib/spellchecker/detect_duplicate.rb, line 83
def repetition?(t1, t2, t3, t4)
  return true if t1.downcased == t3.downcased && t1.downcased == t4.next.downcased
  return true if t1.prev.downcased == t2.downcased && t2.downcased == t4.downcased
  return true if t1.prev.downcased == t1.downcased && t1.downcased == t3.downcased
  return true if t1.downcased == t2.downcased && (t1.downcased == t3.downcased ||
                                                  t1.downcased == t1.prev.downcased ||
                                                  t1.downcased == t4.downcased)

  false
end
skip_phrase?(t1, t2, t3, t4) click to toggle source
# File lib/spellchecker/detect_duplicate.rb, line 75
def skip_phrase?(t1, t2, t3, t4)
  return true if t1.downcased == t3.downcased && SKIP_PHRASE_WORDS.include?(t1.downcased)
  return true if t2.downcased == t4.downcased && SKIP_PHRASE_WORDS.include?(t2.downcased)

  false
end