class Adapt::Helpers::Parser::English

noinspection RubyLiteralArrayInspection,RubyQuotedStringsInspection English string normalization

Constants

ARTICLES
CONTRACTION
EXPANSION
TEXT_NUMBERS

Public Class Methods

normalize(text, remove_articles: true) click to toggle source
# File lib/adapt/helpers/en_parser.rb, line 64
def self.normalize(text, remove_articles: true)
  words = text.split

  normalized = ''

  words.each do |word|
    next if word.empty?
    next if remove_articles && ARTICLES.include?(word.downcase)

    # Expand common contractions, e.g. "isn't" -> "is not"
    if CONTRACTION.include?(word.downcase)
      capitalize = false
      if word[0] == word[0].upcase
        capitalize = true
      end
      word = EXPANSION[CONTRACTION.index(word.downcase)]

      word.capitalize! if capitalize
    end

    if TEXT_NUMBERS.include?(word.downcase)
      word = TEXT_NUMBERS.index(word.downcase)
    end

    normalized += " " + word
  end

  normalized.strip
end