module Normalizer

Constants

ALEF
ALEF_MADDA
ALEF_WITH_HAMZA_ABOVE
ALEF_WITH_HAMZA_BELOW
ARABIC_ALEF_MAKSOURA
ARABIC_KAF
ARABIC_YEH
CHARACTERS_MAPPINGS
DAMMA
DAMMATAN
DIACRITICS
FARSI_KEHEH
FARSI_YEH
FATHA
FATHATAN
KASRA
KASRATAN
SHADDA
SUKUN
TATWIL

Public Instance Methods

normalize() click to toggle source
# File lib/normalizer.rb, line 46
def normalize
  map_charachters
  remove_diacritics
  word
end

Private Instance Methods

map_charachters() click to toggle source
# File lib/normalizer.rb, line 54
def map_charachters
  rules = filter_rules(CHARACTERS_MAPPINGS)
  return if rules.empty?

  @word = word.gsub(/[#{rules.keys.join}]/, rules)
end
remove_diacritics() click to toggle source
# File lib/normalizer.rb, line 61
def remove_diacritics
  rules = filter_rules(DIACRITICS)
  return if rules.empty?

  @word = word.gsub(/[#{rules.join}]/, '')
end