module Bm25::Utils

Public Class Methods

separate_document(document) click to toggle source
# File lib/bm25/utils.rb, line 19
def separate_document(document)
  docs = document.scan(/[^。^\.^\,\r\n|\n|\r]+/)
  return docs
end
separate_words(document) click to toggle source
# File lib/bm25/utils.rb, line 7
def separate_words(document)
  nm = Natto::MeCab.new
  data = []
  nm.parse(document) do |n|
    if Bm25::Validator.validate_word(n)
      next
    end
    data << n.surface
  end
  return data
end