module Spellchecker::DetectNgram

Constants

NGRAM_RANGE
SEPARATOR_REGEXP

Public Instance Methods

call(token) click to toggle source

@param token [Spellchecker::Tokenizer::Token] @return [Spellchecker::Mistake, nil]

# File lib/spellchecker/detect_ngram.rb, line 12
def call(token)
  text, correction = find_ngram(token)

  return unless correction

  correction = correction.sub(/\S/, &:upcase) if text.match?(/\A[A-Z]/)

  Mistake.new(text: text, correction: correction,
              position: token.position, type: MistakeTypes::GRAMMAR)
end
fetch_original_text(token, index) click to toggle source

@param token [Spellchecker::Tokenizer::Token] @param index [Integer] @return [String]

# File lib/spellchecker/detect_ngram.rb, line 42
def fetch_original_text(token, index)
  _, list =
    (index + 1).times.reduce([token, []]) do |(t, acc), _|
      [t.prev, acc.prepend(t.text)]
    end

  list.join(' ')
end
find_ngram(token) click to toggle source

@param token [Spellchecker::Tokenizer::Token] @return [Array<(String, String)>, nil]

# File lib/spellchecker/detect_ngram.rb, line 25
def find_ngram(token)
  NGRAM_RANGE.each_with_object([token.normalized]) do |i, list|
    token = token.next

    break if token.text.match?(SEPARATOR_REGEXP)

    list << token.normalized
    correction = Dictionaries::NgramList.match(list)

    break fetch_original_text(token, i), correction if correction
    break if i == NGRAM_RANGE.end
  end
end