module Bm25::Utils
Public Class Methods
separate_document(document)
click to toggle source
# File lib/bm25/utils.rb, line 19 def separate_document(document) docs = document.scan(/[^。^\.^\,\r\n|\n|\r]+/) return docs end
separate_words(document)
click to toggle source
# File lib/bm25/utils.rb, line 7 def separate_words(document) nm = Natto::MeCab.new data = [] nm.parse(document) do |n| if Bm25::Validator.validate_word(n) next end data << n.surface end return data end