module Spellchecker::DetectDuplicate
Constants
- MIN_LENGTH
- SKIP_PHRASES
- SKIP_PHRASE_WORDS
- SKIP_WORDS
Public Instance Methods
call(token)
click to toggle source
@param token [Spellchecker::Tokenizer::Token] @return [Spellchecker::Mistake, nil]
# File lib/spellchecker/detect_duplicate.rb, line 37 def call(token) t1 = token return if t1.text.length < MIN_LENGTH return if SKIP_WORDS.include?(t1.downcased) t2 = t1.next t3 = t2.next t4 = t3.next text, correction = find_duplicate(t1, t2, t3, t4) return unless text return if SKIP_PHRASES.include?(correction.downcase) return unless Dictionaries::EnglishWords.include?(t2.text) return if skip_phrase?(t1, t2, t3, t4) return if repetition?(t1, t2, t3, t4) return if from_to_phrase?(t1, t2, t3) return if quoted?(t1, t2, t3, t4) Mistake.new(text: text, correction: correction, position: token.position, type: MistakeTypes::DUPLICATE) end
find_duplicate(t1, t2, t3, t4)
click to toggle source
@param t1 [Spellchecker::Tokenizer::Token] @param t2 [Spellchecker::Tokenizer::Token] @param t3 [Spellchecker::Tokenizer::Token] @param t4 [Spellchecker::Tokenizer::Token] @return [Spellchecker::Mistake, nil]
# File lib/spellchecker/detect_duplicate.rb, line 67 def find_duplicate(t1, t2, t3, t4) if t1.downcased == t2.downcased && !t2.capital? && !t2.digit? [[t1, t2].map(&:text).join(' '), t1.text] elsif [t1.downcased, t2.downcased] == [t3.downcased, t4.downcased] && !t3.capital? && !t3.digit? [[t1, t2, t3, t4].map(&:text).join(' '), [t1, t2].map(&:text).join(' ')] end end
from_to_phrase?(t1, t2, t3)
click to toggle source
# File lib/spellchecker/detect_duplicate.rb, line 99 def from_to_phrase?(t1, t2, t3) t1.prev.downcased == 'from' && t2.downcased == 'to' && t1.downcased == t3.downcased end
quoted?(t1, _t2, t3, t4)
click to toggle source
rubocop:enable Metrics/AbcSize
# File lib/spellchecker/detect_duplicate.rb, line 95 def quoted?(t1, _t2, t3, t4) t1.prev.text == '"' && (t3.text == '"' || t4.text == '"') end
repetition?(t1, t2, t3, t4)
click to toggle source
rubocop:disable Metrics/AbcSize
# File lib/spellchecker/detect_duplicate.rb, line 83 def repetition?(t1, t2, t3, t4) return true if t1.downcased == t3.downcased && t1.downcased == t4.next.downcased return true if t1.prev.downcased == t2.downcased && t2.downcased == t4.downcased return true if t1.prev.downcased == t1.downcased && t1.downcased == t3.downcased return true if t1.downcased == t2.downcased && (t1.downcased == t3.downcased || t1.downcased == t1.prev.downcased || t1.downcased == t4.downcased) false end
skip_phrase?(t1, t2, t3, t4)
click to toggle source
# File lib/spellchecker/detect_duplicate.rb, line 75 def skip_phrase?(t1, t2, t3, t4) return true if t1.downcased == t3.downcased && SKIP_PHRASE_WORDS.include?(t1.downcased) return true if t2.downcased == t4.downcased && SKIP_PHRASE_WORDS.include?(t2.downcased) false end