class XapianFu::StopperFactory
Public Class Methods
stop_words_filename(lang)
click to toggle source
Return the full path to the stop words file for the given language
# File lib/xapian_fu/stopper_factory.rb 27 def self.stop_words_filename(lang) 28 File.join(File.dirname(__FILE__), 'stopwords', lang.to_s.downcase + '.txt') 29 end
stop_words_for(lang)
click to toggle source
Read and parse the stop words file for the given language, returning an array of words
# File lib/xapian_fu/stopper_factory.rb 32 def self.stop_words_for(lang) 33 raise UnsupportedStopperLanguage, lang.to_s unless File.exists?(stop_words_filename(lang)) 34 words = [] 35 # Open files with correct encoding in Ruby 1.9 36 open_args = [stop_words_filename(lang), "r"] 37 open_args << { :encoding => "UTF-8" } if String.new.respond_to? :encoding 38 open(*open_args) do |f| 39 while line = f.readline rescue nil 40 words << line.split(" ", 2).first.downcase.strip unless line =~ /^ +|^$|^\|/ 41 end 42 end 43 words 44 end
stopper_for(lang)
click to toggle source
Return a SimpleStopper loaded with stop words for the given language
# File lib/xapian_fu/stopper_factory.rb 8 def self.stopper_for(lang) 9 case lang 10 when Xapian::Stopper 11 lang 12 when false 13 false 14 else 15 lang = lang.to_s.downcase.strip 16 if @stoppers[lang] 17 @stoppers[lang] 18 else 19 stopper = Xapian::SimpleStopper.new 20 stop_words_for(lang).each { |word| stopper.add(word) } 21 @stoppers[lang] = stopper 22 end 23 end 24 end