class Rlid::OrderedNGrams
a subclass should define the filename
Constants
- N
Attributes
ngram_pos[R]
Public Class Methods
new(string, cutoff=300)
click to toggle source
Calls superclass method
Rlid::NGramModel::new
# File lib/rlid/models/ordered_ngrams.rb, line 9 def initialize(string, cutoff=300) super(string, N, cutoff) end
Public Instance Methods
-(other)
click to toggle source
# File lib/rlid/models/ordered_ngrams.rb, line 38 def -(other) if not other.is_a?(OrderedNGrams) raise InvalidArgument end dist = 0 other.ngram_pos.each do |ngram, pos_other| pos_self = ngram_pos[ngram] if pos_self != nil dist += (pos_self - pos_other).abs else dist += @cutoff # max distance end end dist end
generate_model(ngram_count)
click to toggle source
# File lib/rlid/models/ordered_ngrams.rb, line 30 def generate_model(ngram_count) # top ngrams (transformed into arrays) top = ngram_count.to_a.sort{|x, y| y[1] <=> x[1]}[0...@cutoff] @ngram_pos = Hash.new # key is ngram value is position i = 0 top.each {|n,| @ngram_pos[n] = i; i +=1} end
load(file)
click to toggle source
# File lib/rlid/models/ordered_ngrams.rb, line 19 def load(file) @ngram_pos = Hash.new pos = 0 file.each_line do |line| # keep only the first @n characters of the line ngram = line.gsub(/^(.{#{N}}).*\n?/, '\1') @ngram_pos[ngram] = pos pos += 1 end end
save(file)
click to toggle source
# File lib/rlid/models/ordered_ngrams.rb, line 13 def save(file) @ngram_pos.each do |ngram, pos| file.write "#{ngram} #{pos}\n" end end