class XapianDb::Indexer
The indexer creates a Xapian::Document from an object. They object must be an instance of a class that has a blueprint configuration. @author Gernot Kogler
Public Class Methods
new(database, document_blueprint)
click to toggle source
Constructor @param [XapianDb::Database] database The database to use (needed to build a spelling index) @param [XapianDb::DocumentBlueprint] document_blueprint The blueprint to use
# File lib/xapian_db/indexer.rb 13 def initialize(database, document_blueprint) 14 @database, @document_blueprint = database, document_blueprint 15 end
Public Instance Methods
build_document_for(obj)
click to toggle source
Build the document for an object. The object must respond to ‘xapian_id’. The configured adapter should implement this method. @return [Xapian::Document] The xapian document (see xapian.org/docs/sourcedoc/html/classXapian_1_1Document.html)
# File lib/xapian_db/indexer.rb 20 def build_document_for(obj) 21 @obj = obj 22 @blueprint = DocumentBlueprint.blueprint_for(@obj.class.name) 23 @xapian_doc = Xapian::Document.new 24 @xapian_doc.data = @obj.xapian_id 25 store_fields 26 index_text 27 @xapian_doc 28 end
Private Instance Methods
get_values_to_index_from(obj)
click to toggle source
Get the values to index from an object
# File lib/xapian_db/indexer.rb 107 def get_values_to_index_from(obj) 108 109 # if it's an array, we collect the values for its elements recursive 110 if obj.is_a? Array 111 return obj.map { |element| get_values_to_index_from element }.flatten.compact 112 end 113 114 # if the object responds to attributes and attributes is a hash, 115 # we use the attributes values (works well for active_record and datamapper objects) 116 return obj.attributes.values.compact if obj.respond_to?(:attributes) && obj.attributes.is_a?(Hash) 117 118 # The object is unkown and will be indexed by its to_s method; if to_s retruns nil, we 119 # will not index it 120 obj.to_s.nil? ? [] : [obj] 121 end
index_text()
click to toggle source
Index all configured text methods
# File lib/xapian_db/indexer.rb 60 def index_text 61 term_generator = Xapian::TermGenerator.new 62 term_generator.document = @xapian_doc 63 if XapianDb::Config.stemmer 64 term_generator.stemmer = XapianDb::Config.stemmer 65 term_generator.stopper = XapianDb::Config.stopper if XapianDb::Config.stopper 66 # Enable the creation of a spelling dictionary if the database is not in memory 67 if @database.is_a?(XapianDb::PersistentDatabase) && 68 XapianDb::Config.query_flags.include?(Xapian::QueryParser::FLAG_SPELLING_CORRECTION) 69 term_generator.database = @database.writer 70 term_generator.set_flags Xapian::TermGenerator::FLAG_SPELLING 71 end 72 end 73 74 # Index the primary key as a unique term 75 @xapian_doc.add_term("Q#{@obj.xapian_id}") 76 77 # Index the class with the field name 78 term_generator.index_text("#{@obj.class}".downcase, 1, "XINDEXED_CLASS") 79 @xapian_doc.add_term("C#{@obj.class}") 80 81 @blueprint.indexed_method_names.each do |method| 82 options = @blueprint.options_for_indexed_method method 83 if options.block 84 obj = @obj.instance_eval(&options.block) 85 else 86 obj = @obj.send(method) 87 end 88 unless obj.nil? 89 values = get_values_to_index_from obj 90 values.each do |value| 91 terms = value.to_s.downcase 92 terms = @blueprint.preprocess_terms.call(terms) if @blueprint.preprocess_terms 93 terms = split(terms) if XapianDb::Config.term_splitter_count > 0 && !options.no_split 94 # Add value with field name 95 term_generator.index_text(terms, options.weight, "X#{method.upcase}") if options.prefixed 96 # Add value without field name 97 term_generator.index_text(terms, options.weight) 98 end 99 end 100 end 101 102 terms_to_ignore = @xapian_doc.terms.select{ |term| term.term.length < XapianDb::Config.term_min_length } 103 terms_to_ignore.each { |term| @xapian_doc.remove_term term.term } 104 end
split(terms)
click to toggle source
# File lib/xapian_db/indexer.rb 125 def split(terms) 126 splitted_terms = [] 127 terms.split(" ").each do |term| 128 (1..XapianDb::Config.term_splitter_count).each { |i| splitted_terms << term[0...i] } 129 splitted_terms << term 130 end 131 splitted_terms.join " " 132 end
store_fields()
click to toggle source
Store all configured fields
# File lib/xapian_db/indexer.rb 33 def store_fields 34 35 # class name of the object goes to position 0 36 @xapian_doc.add_value 0, @obj.class.name 37 # natural sort order goes to position 1 38 if @blueprint._natural_sort_order.is_a? Proc 39 sort_value = @obj.instance_eval &@blueprint._natural_sort_order 40 else 41 sort_value = @obj.send @blueprint._natural_sort_order 42 end 43 @xapian_doc.add_value 1, sort_value.to_s 44 45 @blueprint.attribute_names.each do |attribute| 46 block = @blueprint.block_for_attribute attribute 47 if block 48 value = @obj.instance_eval &block 49 else 50 value = @obj.send attribute 51 end 52 53 codec = XapianDb::TypeCodec.codec_for @blueprint.type_map[attribute] 54 encoded_string = codec.encode value 55 @xapian_doc.add_value DocumentBlueprint.value_number_for(attribute), encoded_string unless encoded_string.nil? 56 end 57 end