class Bunto::LunrJsSearch::Indexer

Public Class Methods

new(config = {}) click to toggle source
Calls superclass method
# File lib/bunto_lunr_js_search/indexer.rb, line 10
def initialize(config = {})
  super(config)

  lunr_config = {
    'excludes' => [],
    'strip_index_html' => false,
    'min_length' => 3,
    'stopwords' => 'stopwords.txt',
    'fields' => {
      'title' => 10,
      'categories' => 20,
      'tags' => 20,
      'body' => 1
    },
    'js_dir' => 'js'
  }.merge!(config['lunr_search'] || {})

  @js_dir = lunr_config['js_dir']
  gem_lunr = File.join(File.dirname(__FILE__), "../../build/lunr.min.js")
  @lunr_path = File.exist?(gem_lunr) ? gem_lunr : File.join(@js_dir, File.basename(gem_lunr))
  raise "Could not find #{@lunr_path}" if !File.exist?(@lunr_path)

  ctx = V8::Context.new
  ctx.load(@lunr_path)
  ctx['indexer'] = proc do |this|
    this.ref('id')
    lunr_config['fields'].each_pair do |name, boost|
      this.field(name, { 'boost' => boost })
    end
  end
  @index = ctx.eval('lunr(indexer)')
  @lunr_version = ctx.eval('lunr.version')
  @docs = {}
  @excludes = lunr_config['excludes']

  # if web host supports index.html as default doc, then optionally exclude it from the url
  @strip_index_html = lunr_config['strip_index_html']

  # stop word exclusion configuration
  @min_length = lunr_config['min_length']
  @stopwords_file = lunr_config['stopwords']
end

Public Instance Methods

generate(site) click to toggle source

Index all pages except pages matching any value in config or with date The main content from each page is extracted and saved to disk as json

# File lib/bunto_lunr_js_search/indexer.rb, line 55
def generate(site)
  Bunto.logger.info "Lunr:", 'Creating search index...'

  @site = site
  # gather pages and posts
  items = pages_to_index(site)
  content_renderer = PageRenderer.new(site)
  index = []

  items.each_with_index do |item, i|
    entry = SearchEntry.create(item, content_renderer)

    entry.strip_index_suffix_from_url! if @strip_index_html
    entry.strip_stopwords!(stopwords, @min_length) if File.exists?(@stopwords_file)

    doc = {
      "id" => i,
      "title" => entry.title,
      "url" => entry.url,
      "date" => entry.date,
      "categories" => entry.categories,
      "tags" => entry.tags,
      "is_post" => entry.is_post,
      "body" => entry.body
    }

    @index.add(doc)
    doc.delete("body")
    @docs[i] = doc

    Bunto.logger.debug "Lunr:", (entry.title ? "#{entry.title} (#{entry.url})" : entry.url)
  end

  FileUtils.mkdir_p(File.join(site.dest, @js_dir))
  filename = File.join(@js_dir, 'index.json')

  total = {
    "docs" => @docs,
    "index" => @index.to_hash
  }

  filepath = File.join(site.dest, filename)
  File.open(filepath, "w") { |f| f.write(JSON.dump(total)) }
  Bunto.logger.info "Lunr:", "Index ready (lunr.js v#{@lunr_version})"
  added_files = [filename]

  site_js = File.join(site.dest, @js_dir)
  # If we're using the gem, add the lunr and search JS files to the _site
  if File.expand_path(site_js) != File.dirname(@lunr_path)
    extras = Dir.glob(File.join(File.dirname(@lunr_path), "*.min.js"))
    FileUtils.cp(extras, site_js)
    extras.map! { |min| File.join(@js_dir, File.basename(min)) }
    Bunto.logger.debug "Lunr:", "Added JavaScript to #{@js_dir}"
    added_files.push(*extras)
  end

  # Keep the written files from being cleaned by Bunto
  added_files.each do |filename|
    site.static_files << SearchIndexFile.new(site, site.dest, "/", filename)
  end
end

Private Instance Methods

output_ext(doc) click to toggle source
# File lib/bunto_lunr_js_search/indexer.rb, line 124
def output_ext(doc)
  if doc.is_a?(Bunto::Document)
    Bunto::Renderer.new(@site, doc).output_ext
  else
    doc.output_ext
  end
end
pages_to_index(site) click to toggle source
# File lib/bunto_lunr_js_search/indexer.rb, line 132
def pages_to_index(site)
  items = []

  # deep copy pages and documents (all collections, including posts)
  site.pages.each {|page| items << page.dup }
  site.documents.each {|document| items << document.dup }

  # only process files that will be converted to .html and only non excluded files
  items.select! {|i| i.respond_to?(:output_ext) && output_ext(i) == '.html' && ! @excludes.any? {|s| (i.url =~ Regexp.new(s)) != nil } }
  items.reject! {|i| i.data['exclude_from_search'] }

  items
end
stopwords() click to toggle source

load the stopwords file

# File lib/bunto_lunr_js_search/indexer.rb, line 120
def stopwords
  @stopwords ||= IO.readlines(@stopwords_file).map { |l| l.strip }
end