class Rlid::NGramModel

in subclasses generate_model filename, load and save should be implemented

Public Class Methods

language_models() click to toggle source
# File lib/rlid/models/model.rb, line 33
def self.language_models
  if not defined?(filename)
    raise "#{self.class} should implement 'filename' accessor!"
  end
  res = Hash.new
  Language.each_file(filename) do |file, lang|
    model = self.new(nil)
    model.load(file)
    res[lang] = model
  end
  res
end
new(string=nil, n=3, cutoff=300) click to toggle source
# File lib/rlid/models/model.rb, line 16
def initialize(string=nil, n=3, cutoff=300)
  @n = n
  @cutoff = cutoff

  if string == nil then return end

  # ngrams and count of each
  ngram_count = Hash.new(0)

  string.each_ngram(@n) do |ngram|
    ngram_count[ngram] += 1
  end

  generate_model(ngram_count)
end

Protected Instance Methods

generate_model(ngram_count) click to toggle source

should be implemented in the subclass ngram_count is a hash: ngram => count

# File lib/rlid/models/model.rb, line 49
def generate_model(ngram_count)
  raise "#{self.class} should be subclassed"
  ngram_count # never called, supresses unused variable warning
end