module SloveneStemmer
Inspired by: snowball.tartarus.org/archives/snowball-discuss/0725.html
Constants
- ALPHABET
- CONSONANTS
- Stem
- VERSION
- VOWELS
- WORD_ENDINGS
Public Instance Methods
load_endings()
click to toggle source
# File lib/slovene_stemmer.rb, line 8 def load_endings config_path = File.expand_path("../../config/slovene_stemmer.yml", __FILE__) YAML.load_file(config_path)['word_endings'].group_by(&:length) rescue => e raise "Please provide a valid config/stemmer.yml file, #{e}" end
stem(word)
click to toggle source
# File lib/slovene_stemmer.rb, line 21 def stem(word) stem = Stem.new(word.strip) stem.remove_symbols! 4.times do WORD_ENDINGS.each do |ending_length, endings| next if stem.length <= ending_length + 3 stem.remove_last_char! if stem.ends_with?(endings) end if stem.length > 6 && stem.ends_with?(CONSONANTS.chars) stem.remove_last_char! end if stem.length > 5 && stem.ends_with?(VOWELS.chars) stem.remove_last_char! end end stem.to_s end