class Searchkick::Index

Attributes

name[R]
options[R]

Public Class Methods

new(name, options = {}) click to toggle source
# File lib/searchkick/index.rb, line 5
def initialize(name, options = {})
  @name = name
  @options = options
end

Public Instance Methods

alias_exists?() click to toggle source
# File lib/searchkick/index.rb, line 26
def alias_exists?
  client.indices.exists_alias name: name
end
bulk_delete(records) click to toggle source
# File lib/searchkick/index.rb, line 55
def bulk_delete(records)
  Searchkick.queue_items(records.reject { |r| r.id.blank? }.map { |r| {delete: record_data(r)} })
end
bulk_index(records) click to toggle source
# File lib/searchkick/index.rb, line 59
def bulk_index(records)
  Searchkick.queue_items(records.map { |r| {index: record_data(r).merge(data: search_data(r))} })
end
Also aliased as: import
clean_indices() click to toggle source

remove old indices that start w/ index_name

# File lib/searchkick/index.rb, line 144
def clean_indices
  all_indices =
    begin
      client.indices.get_aliases
    rescue Elasticsearch::Transport::Transport::Errors::NotFound
      {}
    end
  indices = all_indices.select { |k, v| (v.empty? || v["aliases"].empty?) && k =~ /\A#{Regexp.escape(name)}_\d{14,17}\z/ }.keys
  indices.each do |index|
    Searchkick::Index.new(index).delete
  end
  indices
end
create(options = {}) click to toggle source
# File lib/searchkick/index.rb, line 10
def create(options = {})
  client.indices.create index: name, body: options
end
create_index(options = {}) click to toggle source

reindex

# File lib/searchkick/index.rb, line 136
def create_index(options = {})
  index_options = options[:index_options] || self.index_options
  index = Searchkick::Index.new("#{name}_#{Time.now.strftime('%Y%m%d%H%M%S%L')}", @options)
  index.create(index_options)
  index
end
delete() click to toggle source
# File lib/searchkick/index.rb, line 14
def delete
  client.indices.delete index: name
end
exists?() click to toggle source
# File lib/searchkick/index.rb, line 18
def exists?
  client.indices.exists index: name
end
import(records)
Alias for: bulk_index
import_scope(scope) click to toggle source
# File lib/searchkick/index.rb, line 188
def import_scope(scope)
  batch_size = @options[:batch_size] || 1000

  # use scope for import
  scope = scope.search_import if scope.respond_to?(:search_import)
  if scope.respond_to?(:find_in_batches)
    scope.find_in_batches batch_size: batch_size do |batch|
      import batch.select(&:should_index?)
    end
  else
    # https://github.com/karmi/tire/blob/master/lib/tire/model/import.rb
    # use cursor for Mongoid
    items = []
    scope.all.each do |item|
      items << item if item.should_index?
      if items.length == batch_size
        import items
        items = []
      end
    end
    import items
  end
end
index_options() click to toggle source
# File lib/searchkick/index.rb, line 212
def index_options
  options = @options
  language = options[:language]
  language = language.call if language.respond_to?(:call)

  if options[:mappings] && !options[:merge_mappings]
    settings = options[:settings] || {}
    mappings = options[:mappings]
  else
    below22 = Searchkick.server_below?("2.2.0")
    below50 = Searchkick.server_below?("5.0.0-alpha1")
    default_type = below50 ? "string" : "text"
    default_analyzer = below50 ? :default_index : :default
    keyword_mapping =
      if below50
        {
          type: default_type,
          index: "not_analyzed"
        }
      else
        {
          type: "keyword"
        }
      end

    keyword_mapping[:ignore_above] = 256 unless below22

    settings = {
      analysis: {
        analyzer: {
          searchkick_keyword: {
            type: "custom",
            tokenizer: "keyword",
            filter: ["lowercase"] + (options[:stem_conversions] == false ? [] : ["searchkick_stemmer"])
          },
          default_analyzer => {
            type: "custom",
            # character filters -> tokenizer -> token filters
            # https://www.elastic.co/guide/en/elasticsearch/guide/current/analysis-intro.html
            char_filter: ["ampersand"],
            tokenizer: "standard",
            # synonym should come last, after stemming and shingle
            # shingle must come before searchkick_stemmer
            filter: ["standard", "lowercase", "asciifolding", "searchkick_index_shingle", "searchkick_stemmer"]
          },
          searchkick_search: {
            type: "custom",
            char_filter: ["ampersand"],
            tokenizer: "standard",
            filter: ["standard", "lowercase", "asciifolding", "searchkick_search_shingle", "searchkick_stemmer"]
          },
          searchkick_search2: {
            type: "custom",
            char_filter: ["ampersand"],
            tokenizer: "standard",
            filter: ["standard", "lowercase", "asciifolding", "searchkick_stemmer"]
          },
          # https://github.com/leschenko/elasticsearch_autocomplete/blob/master/lib/elasticsearch_autocomplete/analyzers.rb
          searchkick_autocomplete_index: {
            type: "custom",
            tokenizer: "searchkick_autocomplete_ngram",
            filter: ["lowercase", "asciifolding"]
          },
          searchkick_autocomplete_search: {
            type: "custom",
            tokenizer: "keyword",
            filter: ["lowercase", "asciifolding"]
          },
          searchkick_word_search: {
            type: "custom",
            tokenizer: "standard",
            filter: ["lowercase", "asciifolding"]
          },
          searchkick_suggest_index: {
            type: "custom",
            tokenizer: "standard",
            filter: ["lowercase", "asciifolding", "searchkick_suggest_shingle"]
          },
          searchkick_text_start_index: {
            type: "custom",
            tokenizer: "keyword",
            filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
          },
          searchkick_text_middle_index: {
            type: "custom",
            tokenizer: "keyword",
            filter: ["lowercase", "asciifolding", "searchkick_ngram"]
          },
          searchkick_text_end_index: {
            type: "custom",
            tokenizer: "keyword",
            filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
          },
          searchkick_word_start_index: {
            type: "custom",
            tokenizer: "standard",
            filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
          },
          searchkick_word_middle_index: {
            type: "custom",
            tokenizer: "standard",
            filter: ["lowercase", "asciifolding", "searchkick_ngram"]
          },
          searchkick_word_end_index: {
            type: "custom",
            tokenizer: "standard",
            filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
          }
        },
        filter: {
          searchkick_index_shingle: {
            type: "shingle",
            token_separator: ""
          },
          # lucky find http://web.archiveorange.com/archive/v/AAfXfQ17f57FcRINsof7
          searchkick_search_shingle: {
            type: "shingle",
            token_separator: "",
            output_unigrams: false,
            output_unigrams_if_no_shingles: true
          },
          searchkick_suggest_shingle: {
            type: "shingle",
            max_shingle_size: 5
          },
          searchkick_edge_ngram: {
            type: "edgeNGram",
            min_gram: 1,
            max_gram: 50
          },
          searchkick_ngram: {
            type: "nGram",
            min_gram: 1,
            max_gram: 50
          },
          searchkick_stemmer: {
            # use stemmer if language is lowercase, snowball otherwise
            # TODO deprecate language option in favor of stemmer
            type: language == language.to_s.downcase ? "stemmer" : "snowball",
            language: language || "English"
          }
        },
        char_filter: {
          # https://www.elastic.co/guide/en/elasticsearch/guide/current/custom-analyzers.html
          # &_to_and
          ampersand: {
            type: "mapping",
            mappings: ["&=> and "]
          }
        },
        tokenizer: {
          searchkick_autocomplete_ngram: {
            type: "edgeNGram",
            min_gram: 1,
            max_gram: 50
          }
        }
      }
    }

    if Searchkick.env == "test"
      settings.merge!(number_of_shards: 1, number_of_replicas: 0)
    end

    if options[:similarity]
      settings[:similarity] = {default: {type: options[:similarity]}}
    end

    settings.deep_merge!(options[:settings] || {})

    # synonyms
    synonyms = options[:synonyms] || []

    synonyms = synonyms.call if synonyms.respond_to?(:call)

    if synonyms.any?
      settings[:analysis][:filter][:searchkick_synonym] = {
        type: "synonym",
        synonyms: synonyms.select { |s| s.size > 1 }.map { |s| s.join(",") }
      }
      # choosing a place for the synonym filter when stemming is not easy
      # https://groups.google.com/forum/#!topic/elasticsearch/p7qcQlgHdB8
      # TODO use a snowball stemmer on synonyms when creating the token filter

      # http://elasticsearch-users.115913.n3.nabble.com/synonym-multi-words-search-td4030811.html
      # I find the following approach effective if you are doing multi-word synonyms (synonym phrases):
      # - Only apply the synonym expansion at index time
      # - Don't have the synonym filter applied search
      # - Use directional synonyms where appropriate. You want to make sure that you're not injecting terms that are too general.
      settings[:analysis][:analyzer][default_analyzer][:filter].insert(4, "searchkick_synonym")
      settings[:analysis][:analyzer][default_analyzer][:filter] << "searchkick_synonym"

      %w(word_start word_middle word_end).each do |type|
        settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_synonym")
      end
    end

    if options[:wordnet]
      settings[:analysis][:filter][:searchkick_wordnet] = {
        type: "synonym",
        format: "wordnet",
        synonyms_path: Searchkick.wordnet_path
      }

      settings[:analysis][:analyzer][default_analyzer][:filter].insert(4, "searchkick_wordnet")
      settings[:analysis][:analyzer][default_analyzer][:filter] << "searchkick_wordnet"

      %w(word_start word_middle word_end).each do |type|
        settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_wordnet")
      end
    end

    if options[:special_characters] == false
      settings[:analysis][:analyzer].each do |_, analyzer_settings|
        analyzer_settings[:filter].reject! { |f| f == "asciifolding" }
      end
    end

    mapping = {}

    # conversions
    if (conversions_field = options[:conversions])
      mapping[conversions_field] = {
        type: "nested",
        properties: {
          query: {type: default_type, analyzer: "searchkick_keyword"},
          count: {type: "integer"}
        }
      }
    end

    mapping_options = Hash[
      [:autocomplete, :suggest, :word, :text_start, :text_middle, :text_end, :word_start, :word_middle, :word_end, :highlight, :searchable, :only_analyzed]
        .map { |type| [type, (options[type] || []).map(&:to_s)] }
    ]

    word = options[:word] != false && (!options[:match] || options[:match] == :word)

    mapping_options.values.flatten.uniq.each do |field|
      fields = {}

      if mapping_options[:only_analyzed].include?(field)
        fields[field] = {type: default_type, index: "no"}
      else
        fields[field] = keyword_mapping
      end

      if !options[:searchable] || mapping_options[:searchable].include?(field)
        if word
          fields["analyzed"] = {type: default_type, index: "analyzed", analyzer: default_analyzer}

          if mapping_options[:highlight].include?(field)
            fields["analyzed"][:term_vector] = "with_positions_offsets"
          end
        end

        mapping_options.except(:highlight, :searchable, :only_analyzed).each do |type, f|
          if options[:match] == type || f.include?(field)
            fields[type] = {type: default_type, index: "analyzed", analyzer: "searchkick_#{type}_index"}
          end
        end
      end

      mapping[field] =
        if below50
          {
            type: "multi_field",
            fields: fields
          }
        elsif fields[field]
          fields[field].merge(fields: fields.except(field))
       end
    end

    (options[:locations] || []).map(&:to_s).each do |field|
      mapping[field] = {
        type: "geo_point"
      }
    end

    (options[:unsearchable] || []).map(&:to_s).each do |field|
      mapping[field] = {
        type: default_type,
        index: "no"
      }
    end

    routing = {}
    if options[:routing]
      routing = {required: true}
      unless options[:routing] == true
        routing[:path] = options[:routing].to_s
      end
    end

    dynamic_fields = {
      # analyzed field must be the default field for include_in_all
      # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
      # however, we can include the not_analyzed field in _all
      # and the _all index analyzer will take care of it
      "{name}" => keyword_mapping.merge(include_in_all: !options[:searchable])
    }

    dynamic_fields["{name}"][:ignore_above] = 256 unless below22

    unless options[:searchable]
      if options[:match] && options[:match] != :word
        dynamic_fields[options[:match]] = {type: default_type, index: "analyzed", analyzer: "searchkick_#{options[:match]}_index"}
      end

      if word
        dynamic_fields["analyzed"] = {type: default_type, index: "analyzed"}
      end
    end

    # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
    multi_field =
      if below50
        {
          type: "multi_field",
          fields: dynamic_fields
        }
      else
        dynamic_fields["{name}"].merge(fields: dynamic_fields.except("{name}"))
      end

    mappings = {
      _default_: {
        _all: {type: default_type, index: "analyzed", analyzer: default_analyzer},
        properties: mapping,
        _routing: routing,
        # https://gist.github.com/kimchy/2898285
        dynamic_templates: [
          {
            string_template: {
              match: "*",
              match_mapping_type: "string",
              mapping: multi_field
            }
          }
        ]
      }
    }.deep_merge(options[:mappings] || {})
  end

  {
    settings: settings,
    mappings: mappings
  }
end
klass_document_type(klass) click to toggle source
# File lib/searchkick/index.rb, line 569
def klass_document_type(klass)
  if klass.respond_to?(:document_type)
    klass.document_type
  else
    klass.model_name.to_s.underscore
  end
end
mapping() click to toggle source
# File lib/searchkick/index.rb, line 30
def mapping
  client.indices.get_mapping index: name
end
record_data(r) click to toggle source
# File lib/searchkick/index.rb, line 64
def record_data(r)
  data = {
    _index: name,
    _id: search_id(r),
    _type: document_type(r)
  }
  data[:_routing] = r.search_routing if r.respond_to?(:search_routing)
  data
end
refresh() click to toggle source
# File lib/searchkick/index.rb, line 22
def refresh
  client.indices.refresh index: name
end
reindex_record(record) click to toggle source
# File lib/searchkick/index.rb, line 82
def reindex_record(record)
  if record.destroyed? || !record.should_index?
    begin
      remove(record)
    rescue Elasticsearch::Transport::Transport::Errors::NotFound
      # do nothing
    end
  else
    store(record)
  end
end
reindex_record_async(record) click to toggle source
# File lib/searchkick/index.rb, line 94
def reindex_record_async(record)
  if Searchkick.callbacks_value.nil?
    if defined?(Searchkick::ReindexV2Job)
      Searchkick::ReindexV2Job.perform_later(record.class.name, record.id.to_s)
    else
      Delayed::Job.enqueue Searchkick::ReindexJob.new(record.class.name, record.id.to_s)
    end
  else
    reindex_record(record)
  end
end
reindex_scope(scope, options = {}) click to toggle source

gist.github.com/jarosan/3124884 www.elasticsearch.org/blog/changing-mapping-with-zero-downtime/

# File lib/searchkick/index.rb, line 160
def reindex_scope(scope, options = {})
  skip_import = options[:import] == false

  clean_indices

  index = create_index(index_options: scope.searchkick_index_options)

  # check if alias exists
  if alias_exists?
    # import before swap
    index.import_scope(scope) unless skip_import

    # get existing indices to remove
    swap(index.name)
    clean_indices
  else
    delete if exists?
    swap(index.name)

    # import after swap
    index.import_scope(scope) unless skip_import
  end

  index.refresh

  true
end
remove(record) click to toggle source
# File lib/searchkick/index.rb, line 51
def remove(record)
  bulk_delete([record])
end
retrieve(record) click to toggle source
# File lib/searchkick/index.rb, line 74
def retrieve(record)
  client.get(
    index: name,
    type: document_type(record),
    id: search_id(record)
  )["_source"]
end
search_model(searchkick_klass, term = nil, options = {}, &block) click to toggle source

search

# File lib/searchkick/index.rb, line 124
def search_model(searchkick_klass, term = nil, options = {}, &block)
  query = Searchkick::Query.new(searchkick_klass, term, options)
  block.call(query.body) if block
  if options[:execute] == false
    query
  else
    query.execute
  end
end
similar_record(record, options = {}) click to toggle source
# File lib/searchkick/index.rb, line 106
def similar_record(record, options = {})
  like_text = retrieve(record).to_hash
    .keep_if { |k, _| !options[:fields] || options[:fields].map(&:to_s).include?(k) }
    .values.compact.join(" ")

  # TODO deep merge method
  options[:where] ||= {}
  options[:where][:_id] ||= {}
  options[:where][:_id][:not] = record.id.to_s
  options[:per_page] ||= 10
  options[:similar] = true

  # TODO use index class instead of record class
  search_model(record.class, like_text, options)
end
store(record) click to toggle source

record based

# File lib/searchkick/index.rb, line 47
def store(record)
  bulk_index([record])
end
swap(new_name) click to toggle source
# File lib/searchkick/index.rb, line 34
def swap(new_name)
  old_indices =
    begin
      client.indices.get_alias(name: name).keys
    rescue Elasticsearch::Transport::Transport::Errors::NotFound
      {}
    end
  actions = old_indices.map { |old_name| {remove: {index: old_name, alias: name}} } + [{add: {index: new_name, alias: name}}]
  client.indices.update_aliases body: {actions: actions}
end
tokens(text, options = {}) click to toggle source

other

# File lib/searchkick/index.rb, line 565
def tokens(text, options = {})
  client.indices.analyze({text: text, index: name}.merge(options))["tokens"].map { |t| t["token"] }
end

Protected Instance Methods

cast_big_decimal(obj) click to toggle source

change all BigDecimal values to floats due to github.com/rails/rails/issues/6033 possible loss of precision :/

# File lib/searchkick/index.rb, line 645
def cast_big_decimal(obj)
  case obj
  when BigDecimal
    obj.to_f
  when Hash
    obj.each do |k, v|
      obj[k] = cast_big_decimal(v)
    end
  when Enumerable
    obj.map do |v|
      cast_big_decimal(v)
    end
  else
    obj
  end
end
client() click to toggle source
# File lib/searchkick/index.rb, line 579
def client
  Searchkick.client
end
document_type(record) click to toggle source
# File lib/searchkick/index.rb, line 583
def document_type(record)
  if record.respond_to?(:search_document_type)
    record.search_document_type
  else
    klass_document_type(record.class)
  end
end
location_value(value) click to toggle source
# File lib/searchkick/index.rb, line 632
def location_value(value)
  if value.is_a?(Array)
    value.map(&:to_f).reverse
  elsif value.is_a?(Hash)
    {lat: value[:lat].to_f, lon: value[:lon].to_f}
  else
    value
  end
end
search_data(record) click to toggle source
# File lib/searchkick/index.rb, line 596
def search_data(record)
  source = record.search_data
  options = record.class.searchkick_options

  # stringify fields
  # remove _id since search_id is used instead
  source = source.inject({}) { |memo, (k, v)| memo[k.to_s] = v; memo }.except("_id")

  # conversions
  conversions_field = options[:conversions]
  if conversions_field && source[conversions_field]
    source[conversions_field] = source[conversions_field].map { |k, v| {query: k, count: v} }
  end

  # hack to prevent generator field doesn't exist error
  (options[:suggest] || []).map(&:to_s).each do |field|
    source[field] = nil unless source[field]
  end

  # locations
  (options[:locations] || []).map(&:to_s).each do |field|
    if source[field]
      if !source[field].is_a?(Hash) && (source[field].first.is_a?(Array) || source[field].first.is_a?(Hash))
        # multiple locations
        source[field] = source[field].map { |a| location_value(a) }
      else
        source[field] = location_value(source[field])
      end
    end
  end

  cast_big_decimal(source)

  source.as_json
end
search_id(record) click to toggle source
# File lib/searchkick/index.rb, line 591
def search_id(record)
  id = record.respond_to?(:search_document_id) ? record.search_document_id : record.id
  id.is_a?(Numeric) ? id : id.to_s
end