module Normalizer
Constants
- ALEF
- ALEF_MADDA
- ALEF_WITH_HAMZA_ABOVE
- ALEF_WITH_HAMZA_BELOW
- ARABIC_ALEF_MAKSOURA
- ARABIC_KAF
- ARABIC_YEH
- CHARACTERS_MAPPINGS
- DAMMA
- DAMMATAN
- DIACRITICS
- FARSI_KEHEH
- FARSI_YEH
- FATHA
- FATHATAN
- KASRA
- KASRATAN
- SHADDA
- SUKUN
- TATWIL
Public Instance Methods
normalize()
click to toggle source
# File lib/normalizer.rb, line 46 def normalize map_charachters remove_diacritics word end
Private Instance Methods
map_charachters()
click to toggle source
# File lib/normalizer.rb, line 54 def map_charachters rules = filter_rules(CHARACTERS_MAPPINGS) return if rules.empty? @word = word.gsub(/[#{rules.keys.join}]/, rules) end
remove_diacritics()
click to toggle source
# File lib/normalizer.rb, line 61 def remove_diacritics rules = filter_rules(DIACRITICS) return if rules.empty? @word = word.gsub(/[#{rules.join}]/, '') end