class ArStemmer
ArStemmer
is a ruby port of Lucene’s ArabicStemmer class with extensions
Constants
- ALEF
- BEH
- FEH
- HEH
- KAF
- LAM
- NOON
- PREFIXES
- SUFFIXES
- TEH
- TEH_MARBUTA
- WAW
- YEH
Attributes
excepts[R]
onlys[R]
word[R]
Public Class Methods
new(word, options = {})
click to toggle source
# File lib/ar_stemmer.rb, line 48 def initialize(word, options = {}) @word = word.dup @onlys = [] @excepts = [] if options[:only] @onlys = options[:only] elsif options[:except] @excepts = options[:except] end end
stem(word, options = {})
click to toggle source
# File lib/ar_stemmer.rb, line 42 def self.stem(word, options = {}) new(word, options).stem end
Public Instance Methods
stem()
click to toggle source
# File lib/ar_stemmer.rb, line 60 def stem stem_prefix stem_suffix word end
Private Instance Methods
ends_with_check_length(word, suffix)
click to toggle source
# File lib/ar_stemmer.rb, line 97 def ends_with_check_length(word, suffix) if word.length < suffix.length + 2 false else word.end_with?(suffix) end end
rules(rule_set)
click to toggle source
# File lib/ar_stemmer.rb, line 68 def rules(rule_set) rule_set .reject {|k, v| excepts.any? ? excepts.include?(k) : false } .select {|k, v| onlys.any? ? onlys.include?(k) : true } .values end
starts_with_check_length(word, prefix)
click to toggle source
# File lib/ar_stemmer.rb, line 87 def starts_with_check_length(word, prefix) if prefix.length == 1 && word.length < 3 # 'waw' and 'beh' prefix requires at least 3 characters false elsif word.length < prefix.length + 2 false else word.start_with?(prefix) end end
stem_prefix()
click to toggle source
# File lib/ar_stemmer.rb, line 75 def stem_prefix rules(PREFIXES).each do |prefix| return @word = word[prefix.length .. -1] if starts_with_check_length(word, prefix) end end
stem_suffix()
click to toggle source
# File lib/ar_stemmer.rb, line 81 def stem_suffix rules(SUFFIXES).each do |suffix| @word = word[0 .. -(suffix.length + 1)] if ends_with_check_length(word, suffix) end end