class Adapt::Helpers::Parser::English
noinspection RubyLiteralArrayInspection,RubyQuotedStringsInspection English
string normalization
Constants
- ARTICLES
- CONTRACTION
- EXPANSION
- TEXT_NUMBERS
Public Class Methods
normalize(text, remove_articles: true)
click to toggle source
# File lib/adapt/helpers/en_parser.rb, line 64 def self.normalize(text, remove_articles: true) words = text.split normalized = '' words.each do |word| next if word.empty? next if remove_articles && ARTICLES.include?(word.downcase) # Expand common contractions, e.g. "isn't" -> "is not" if CONTRACTION.include?(word.downcase) capitalize = false if word[0] == word[0].upcase capitalize = true end word = EXPANSION[CONTRACTION.index(word.downcase)] word.capitalize! if capitalize end if TEXT_NUMBERS.include?(word.downcase) word = TEXT_NUMBERS.index(word.downcase) end normalized += " " + word end normalized.strip end