module Mongoid::FullTextSearch::ClassMethods

Public Instance Methods

all_ngrams(str, config, bound_number_returned = true) click to toggle source
# File lib/mongoid/full_text_search.rb, line 205
def all_ngrams(str, config, bound_number_returned = true)
  return {} if str.nil?

  if config[:remove_accents]
    if defined?(UnicodeUtils)
      str = UnicodeUtils.nfkd(str)
    elsif defined?(DiacriticsFu)
      str = DiacriticsFu.escape(str)
    end
  end

  # Remove any characters that aren't in the alphabet and aren't word separators
  filtered_str = str.mb_chars.downcase.to_s.split('').find_all { |ch| config[:alphabet][ch] || config[:word_separators][ch] }.join('')

  # Figure out how many ngrams to extract from the string. If we can't afford to extract all ngrams,
  # step over the string in evenly spaced strides to extract ngrams. For example, to extract 3 3-letter
  # ngrams from 'abcdefghijk', we'd want to extract 'abc', 'efg', and 'ijk'.
  step_size = if bound_number_returned
                [((filtered_str.length - config[:ngram_width]).to_f / config[:max_ngrams_to_search]).ceil, 1].max
              else
                1
              end

  # Create an array of records of the form {:ngram => x, :score => y} for all ngrams that occur in the
  # input string using the step size that we just computed. Let score(x,y) be the score of string x
  # compared with string y - assigning scores to ngrams with the square root-based scoring function
  # below and multiplying scores of matching ngrams together yields a score function that has the
  # property that score(x,y) > score(x,z) for any string z containing y and score(x,y) > score(x,z)
  # for any string z contained in y.
  ngram_array = (0..filtered_str.length - config[:ngram_width]).step(step_size).map do |i|
    score = if i == 0 || (config[:apply_prefix_scoring_to_all_words] && \
                  config[:word_separators].key?(filtered_str[i - 1].chr))
              Math.sqrt(1 + 1.0 / filtered_str.length)
            else
              Math.sqrt(2.0 / filtered_str.length)
            end
    { ngram: filtered_str[i..i + config[:ngram_width] - 1], score: score }
  end

  # If an ngram appears multiple times in the query string, keep the max score
  ngram_array = ngram_array.group_by { |h| h[:ngram] }.map { |key, values| { ngram: key, score: values.map { |v| v[:score] }.max } }

  if config[:index_short_prefixes] || config[:index_full_words]
    split_regex_def = config[:word_separators].keys.map { |k| Regexp.escape(k) }.join
    split_regex = Regexp.compile("[#{split_regex_def}]")
    all_words = filtered_str.split(split_regex)
  end

  # Add 'short prefix' records to the array: prefixes of the string that are length (ngram_width - 1)
  if config[:index_short_prefixes]
    prefixes_seen = {}
    all_words.each do |word|
      next if word.length < config[:ngram_width] - 1
      prefix = word[0...config[:ngram_width] - 1]
      if prefixes_seen[prefix].nil? && (config[:stop_words][word].nil? || word == filtered_str)
        ngram_array << { ngram: prefix, score: 1 + 1.0 / filtered_str.length }
        prefixes_seen[prefix] = true
      end
    end
  end

  # Add records to the array of ngrams for each full word in the string that isn't a stop word
  if config[:index_full_words]
    full_words_seen = {}
    all_words.each do |word|
      if word.length > 1 && full_words_seen[word].nil? && (config[:stop_words][word].nil? || word == filtered_str)
        ngram_array << { ngram: word, score: 1 + 1.0 / filtered_str.length }
        full_words_seen[word] = true
      end
    end
  end

  # If an ngram appears as any combination of full word, short prefix, and ngram, keep the sum of the two scores
  Hash[ngram_array.group_by { |h| h[:ngram] }.map { |key, values| [key, values.map { |v| v[:score] }.sum] }]
end
create_fulltext_indexes() click to toggle source
# File lib/mongoid/full_text_search.rb, line 70
def create_fulltext_indexes
  return unless mongoid_fulltext_config
  mongoid_fulltext_config.each_pair do |index_name, fulltext_config|
    fulltext_search_ensure_indexes(index_name, fulltext_config)
  end
end
fulltext_search_ensure_indexes(index_name, config) click to toggle source
# File lib/mongoid/full_text_search.rb, line 77
def fulltext_search_ensure_indexes(index_name, config)
  db = collection.database
  coll = db[index_name]

  # The order of filters matters when the same index is used from two or more collections.
  filter_indexes = (config[:filters] || []).map do |key, _value|
    ["filter_values.#{key}", 1]
  end.sort_by { |filter_index| filter_index[0] }

  index_definition = [['ngram', 1], ['score', -1]].concat(filter_indexes)

  # Since the definition of the index could have changed, we'll clean up by
  # removing any indexes that aren't on the exact.
  correct_keys = index_definition.map { |field_def| field_def[0] }
  all_filter_keys = filter_indexes.map { |field_def| field_def[0] }
  coll.indexes.each do |idef|
    keys = idef['key'].keys
    next unless keys.member?('ngram')
    all_filter_keys |= keys.find_all { |key| key.starts_with?('filter_values.') }
    next unless keys & correct_keys != correct_keys
    Mongoid.logger.info "Dropping #{idef['name']} [#{keys & correct_keys} <=> #{correct_keys}]" if Mongoid.logger
    if Mongoid::Compatibility::Version.mongoid5_or_newer?
      coll.indexes.drop_one(idef['key'])
    else
      coll.indexes.drop(idef['key'])
    end
  end

  if all_filter_keys.length > filter_indexes.length
    filter_indexes = all_filter_keys.map { |key| [key, 1] }.sort_by { |filter_index| filter_index[0] }
    index_definition = [['ngram', 1], ['score', -1]].concat(filter_indexes)
  end

  Mongoid.logger.info "Ensuring fts_index on #{coll.name}: #{index_definition}" if Mongoid.logger
  if Mongoid::Compatibility::Version.mongoid5_or_newer?
    coll.indexes.create_one(Hash[index_definition], name: 'fts_index')
  else
    coll.indexes.create(Hash[index_definition], name: 'fts_index')
  end

  Mongoid.logger.info "Ensuring document_id index on #{coll.name}" if Mongoid.logger
  if Mongoid::Compatibility::Version.mongoid5_or_newer?
    coll.indexes.create_one('document_id' => 1) # to make removes fast
  else
    coll.indexes.create('document_id' => 1) # to make removes fast
  end
end
fulltext_search_in(*args) click to toggle source
# File lib/mongoid/full_text_search.rb, line 24
def fulltext_search_in(*args)
  self.mongoid_fulltext_config = {} if mongoid_fulltext_config.nil?
  options = args.last.is_a?(Hash) ? args.pop : {}
  index_name = if options.key?(:index_name)
                 options[:index_name]
               else
                 'mongoid_fulltext.index_%s_%s' % [name.downcase, mongoid_fulltext_config.count]
               end

  config = {
    alphabet: 'abcdefghijklmnopqrstuvwxyz0123456789 ',
    word_separators: "-_ \n\t",
    ngram_width: 3,
    max_ngrams_to_search: 6,
    apply_prefix_scoring_to_all_words: true,
    index_full_words: true,
    index_short_prefixes: false,
    max_candidate_set_size: 1000,
    remove_accents: true,
    reindex_immediately: true,
    stop_words: Hash[%w[i a s t me my we he it am is be do an if
                        or as of at by to up in on no so our you him
                        his she her its who are was has had did the and
                        but for out off why how all any few nor not own
                        too can don now ours your hers they them what whom
                        this that were been have does with into from down over
                        then once here when both each more most some such only
                        same than very will just yours their which these those
                        being doing until while about after above below under
                        again there where other myself itself theirs having during
                        before should himself herself because against between through
                        further yourself ourselves yourselves themselves].map { |x| [x, true] }]
  }

  config.update(options)

  args = [:to_s] if args.empty?
  config[:ngram_fields] = args
  config[:alphabet] = Hash[config[:alphabet].split('').map { |ch| [ch, ch] }]
  config[:word_separators] = Hash[config[:word_separators].split('').map { |ch| [ch, ch] }]
  mongoid_fulltext_config[index_name] = config

  before_save(:update_ngram_index) if config[:reindex_immediately]
  before_destroy :remove_from_ngram_index
end
instantiate_mapreduce_result(result) click to toggle source
# File lib/mongoid/full_text_search.rb, line 193
def instantiate_mapreduce_result(result)
  result[:clazz].constantize.find(result[:id])
end
instantiate_mapreduce_results(results, options) click to toggle source
# File lib/mongoid/full_text_search.rb, line 197
def instantiate_mapreduce_results(results, options)
  if options[:return_scores]
    results.map { |result| [instantiate_mapreduce_result(result), result[:score]] }.find_all { |result| !result[0].nil? }
  else
    results.map { |result| instantiate_mapreduce_result(result) }.compact
  end
end
remove_from_ngram_index() click to toggle source
# File lib/mongoid/full_text_search.rb, line 281
def remove_from_ngram_index
  mongoid_fulltext_config.each_pair do |index_name, _fulltext_config|
    coll = collection.database[index_name]
    if Mongoid::Compatibility::Version.mongoid5_or_newer?
      coll.find('class' => name).delete_many
    else
      coll.find('class' => name).remove_all
    end
  end
end
update_ngram_index() click to toggle source
# File lib/mongoid/full_text_search.rb, line 292
def update_ngram_index
  all.each(&:update_ngram_index)
end

Private Instance Methods

format_query_filter(operator, key, value) click to toggle source
# File lib/mongoid/full_text_search.rb, line 312
def format_query_filter(operator, key, value)
  ['filter_values.%s' % key, { operator => [value].flatten }]
end
map_query_filters(filters) click to toggle source

Take a list of filters to be mapped so they can update the query used upon the fulltext search of the ngrams

# File lib/mongoid/full_text_search.rb, line 300
def map_query_filters(filters)
  Hash[filters.map do |key, value|
    case value
    when Hash then
      if value.key? :any then format_query_filter('$in', key, value[:any])
      elsif value.key? :all then format_query_filter('$all', key, value[:all])
      else raise UnknownFilterQueryOperator, value.keys.join(','), caller end
    else format_query_filter('$all', key, value)
    end
  end]
end