class Rlid::FrequencyModel
Constants
- N
Attributes
ngram_frequency[R]
Public Class Methods
filename()
click to toggle source
# File lib/rlid/models/cosine_distance_model.rb, line 41 def self.filename # FIXME should be frequency3000 return "cosine_distance3000" end
new(string, cutoff=3000)
click to toggle source
Calls superclass method
# File lib/rlid/models/cosine_distance_model.rb, line 10 def initialize(string, cutoff=3000) super(string, N, cutoff) end
Public Instance Methods
generate_model(ngram_count)
click to toggle source
# File lib/rlid/models/cosine_distance_model.rb, line 22 def generate_model(ngram_count) # top ngrams (transformed into arrays) arrays = ngram_count.to_a.sort{|x, y| y[1] <=> x[1]} top = arrays[0...@cutoff] # will be kept tot = 0.0 # total, for normalization @ngram_frequency = Hash.new # key is ngram value is position top.each_with_index do |ngram_and_count, i| ngram, count = ngram_and_count @ngram_frequency[ngram] = count tot += count end # normalization @ngram_frequency.each do |ngram, count| @ngram_frequency[ngram] /= tot end end
load(file)
click to toggle source
# File lib/rlid/models/cosine_distance_model.rb, line 18 def load(file) @ngram_frequency = Marshal.load(file.read) end
save(file)
click to toggle source
# File lib/rlid/models/cosine_distance_model.rb, line 14 def save(file) file.write Marshal.dump(@ngram_frequency) end