class ArStemmer

ArStemmer is a ruby port of Lucene’s ArabicStemmer class with extensions

github.com/apache/lucene-solr/blob/master/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemmer.java

Constants

ALEF
BEH
FEH
HEH
KAF
LAM
NOON
PREFIXES
SUFFIXES
TEH
TEH_MARBUTA
WAW
YEH

Attributes

excepts[R]
onlys[R]
word[R]

Public Class Methods

new(word, options = {}) click to toggle source
# File lib/ar_stemmer.rb, line 48
def initialize(word, options = {})
  @word = word.dup

  @onlys = []
  @excepts = []
  if options[:only]
    @onlys = options[:only]
  elsif options[:except]
    @excepts = options[:except]
  end
end
stem(word, options = {}) click to toggle source
# File lib/ar_stemmer.rb, line 42
def self.stem(word, options = {})
  new(word, options).stem
end

Public Instance Methods

stem() click to toggle source
# File lib/ar_stemmer.rb, line 60
def stem
  stem_prefix
  stem_suffix
  word
end

Private Instance Methods

ends_with_check_length(word, suffix) click to toggle source
# File lib/ar_stemmer.rb, line 97
def ends_with_check_length(word, suffix)
  if word.length < suffix.length + 2
    false
  else
    word.end_with?(suffix)
  end
end
rules(rule_set) click to toggle source
# File lib/ar_stemmer.rb, line 68
def rules(rule_set)
  rule_set
    .reject {|k, v| excepts.any? ? excepts.include?(k) : false }
    .select {|k, v| onlys.any? ? onlys.include?(k) : true }
    .values
end
starts_with_check_length(word, prefix) click to toggle source
# File lib/ar_stemmer.rb, line 87
def starts_with_check_length(word, prefix)
  if prefix.length == 1 && word.length < 3 # 'waw' and 'beh' prefix requires at least 3 characters
    false
  elsif word.length < prefix.length + 2
    false
  else
    word.start_with?(prefix)
  end
end
stem_prefix() click to toggle source
# File lib/ar_stemmer.rb, line 75
def stem_prefix
  rules(PREFIXES).each do |prefix|
    return @word = word[prefix.length .. -1] if starts_with_check_length(word, prefix)
  end
end
stem_suffix() click to toggle source
# File lib/ar_stemmer.rb, line 81
def stem_suffix
  rules(SUFFIXES).each do |suffix|
    @word = word[0 .. -(suffix.length + 1)] if ends_with_check_length(word, suffix)
  end
end