class Sastrawi::Stemmer::Stemmer

Attributes

dictionary[R]
visitor_provider[R]

Public Class Methods

new(dictionary) click to toggle source
# File lib/sastrawi/stemmer/stemmer.rb, line 16
def initialize(dictionary)
  @dictionary = dictionary
  @visitor_provider = Sastrawi::Stemmer::Context::Visitor::VisitorProvider.new
end

Public Instance Methods

plural?(word) click to toggle source
# File lib/sastrawi/stemmer/stemmer.rb, line 48
def plural?(word)
  matches = /^(.*)-(ku|mu|nya|lah|kah|tah|pun)$/.match(word)

  return matches[1].include?('-') if matches

  return word.include?('-')
end
stem(text) click to toggle source

Stem a string to its base form

# File lib/sastrawi/stemmer/stemmer.rb, line 24
def stem(text)
  normalized_text = Sastrawi::Stemmer::Filter::TextNormalizer.normalize_text(text)

  words = normalized_text.split(' ')
  stems = []

  words.each do |word|
    stems.push(stem_word(word))
  end

  stems.join(' ')
end
stem_plural_word(word) click to toggle source

Stem a plural word to its base form Asian J. (2007) “Effective Techniques for Indonesian Text Retrieval” page 76-77

# File lib/sastrawi/stemmer/stemmer.rb, line 61
def stem_plural_word(word)
  first_match = /^(.*)-(.*)$/.match(word)

  return word unless first_match

  words = [first_match[1], first_match[2]]
  suffix = words[1]
  suffixes = %w[ku mu nya lah kah tah pun]
  second_match = /^(.*)-(.*)$/.match(words[0])

  if suffixes.include?(suffix) && second_match
    words[0] = second_match[1]
    words[1] = "#{second_match[2]}-#{suffix}"
  end

  root_first_word = stem_singular_word(words[0])
  root_second_word = stem_singular_word(words[1])

  if !@dictionary.contains?(words[1]) && root_second_word == words[1]
    root_second_word = stem_singular_word("me#{words[1]}")
  end

  if root_first_word == root_second_word
    root_first_word
  else
    word
  end
end
stem_singular_word(word) click to toggle source

Stem a singular word to its base form

# File lib/sastrawi/stemmer/stemmer.rb, line 93
def stem_singular_word(word)
  context = Sastrawi::Stemmer::Context::Context.new(word, @dictionary, @visitor_provider)
  context.execute

  context.result
end
stem_word(word) click to toggle source

Stem a word to its base form

# File lib/sastrawi/stemmer/stemmer.rb, line 40
def stem_word(word)
  if plural?(word)
    stem_plural_word(word)
  else
    stem_singular_word(word)
  end
end