class Corpus
Public Class Methods
new()
click to toggle source
# File lib/engine/corpus.rb, line 5 def initialize @tokens = {} end
Public Instance Methods
add(document)
click to toggle source
# File lib/engine/corpus.rb, line 13 def add document document.each_token do |token| @tokens[token] = token_count(token) + 1 end end
entry_count()
click to toggle source
# File lib/engine/corpus.rb, line 9 def entry_count @tokens.values.inject(0, :+) end
load_from_directory(directory)
click to toggle source
# File lib/engine/corpus.rb, line 19 def load_from_directory directory Dir.glob("#{directory}/*.txt") do |entry| IO.foreach(entry, encoding: Encoding::UTF_8) do |line| add Document.new(line) end end end
token_count(token)
click to toggle source
# File lib/engine/corpus.rb, line 27 def token_count token @tokens[token] || 0 end