class XapianFu::StopperFactory

Public Class Methods

stop_words_filename(lang) click to toggle source

Return the full path to the stop words file for the given language

   # File lib/xapian_fu/stopper_factory.rb
27 def self.stop_words_filename(lang)
28   File.join(File.dirname(__FILE__), 'stopwords', lang.to_s.downcase + '.txt')
29 end
stop_words_for(lang) click to toggle source

Read and parse the stop words file for the given language, returning an array of words

   # File lib/xapian_fu/stopper_factory.rb
32 def self.stop_words_for(lang)
33   raise UnsupportedStopperLanguage, lang.to_s unless File.exists?(stop_words_filename(lang))
34   words = []
35   # Open files with correct encoding in Ruby 1.9
36   open_args = [stop_words_filename(lang), "r"]
37   open_args << { :encoding => "UTF-8" } if String.new.respond_to? :encoding
38   open(*open_args) do |f|
39     while line = f.readline rescue nil
40       words << line.split(" ", 2).first.downcase.strip  unless line =~ /^ +|^$|^\|/
41     end
42   end
43   words
44 end
stopper_for(lang) click to toggle source

Return a SimpleStopper loaded with stop words for the given language

   # File lib/xapian_fu/stopper_factory.rb
 8 def self.stopper_for(lang)
 9   case lang
10   when Xapian::Stopper
11     lang
12   when false
13     false
14   else
15     lang = lang.to_s.downcase.strip
16     if @stoppers[lang]
17       @stoppers[lang]
18     else
19       stopper = Xapian::SimpleStopper.new
20       stop_words_for(lang).each { |word| stopper.add(word) }
21       @stoppers[lang] = stopper
22     end
23   end
24 end