class Rlid::FrequencyModel

Constants

N

Attributes

ngram_frequency[R]

Public Class Methods

filename() click to toggle source
# File lib/rlid/models/cosine_distance_model.rb, line 41
def self.filename
  # FIXME should be frequency3000
  return "cosine_distance3000"
end
new(string, cutoff=3000) click to toggle source
Calls superclass method
# File lib/rlid/models/cosine_distance_model.rb, line 10
def initialize(string, cutoff=3000)
  super(string, N, cutoff)
end

Public Instance Methods

generate_model(ngram_count) click to toggle source
# File lib/rlid/models/cosine_distance_model.rb, line 22
def generate_model(ngram_count)
  # top ngrams (transformed into arrays)
  arrays = ngram_count.to_a.sort{|x, y| y[1] <=> x[1]}
  top = arrays[0...@cutoff] # will be kept

  tot = 0.0 # total, for normalization
  @ngram_frequency = Hash.new # key is ngram value is position
  top.each_with_index do |ngram_and_count, i|
    ngram, count = ngram_and_count
    @ngram_frequency[ngram] = count
    tot += count
  end
  
  # normalization
  @ngram_frequency.each do |ngram, count|
    @ngram_frequency[ngram] /= tot
  end
end
load(file) click to toggle source
# File lib/rlid/models/cosine_distance_model.rb, line 18
def load(file)
  @ngram_frequency = Marshal.load(file.read)
end
save(file) click to toggle source
# File lib/rlid/models/cosine_distance_model.rb, line 14
def save(file)
  file.write Marshal.dump(@ngram_frequency)
end