class Rlid::OrderedNGrams

a subclass should define the filename

Constants

N

Attributes

ngram_pos[R]

Public Class Methods

new(string, cutoff=300) click to toggle source
Calls superclass method Rlid::NGramModel::new
# File lib/rlid/models/ordered_ngrams.rb, line 9
def initialize(string, cutoff=300)
  super(string, N, cutoff)
end

Public Instance Methods

-(other) click to toggle source
# File lib/rlid/models/ordered_ngrams.rb, line 38
def -(other)
  if not other.is_a?(OrderedNGrams)
    raise InvalidArgument
  end
  dist = 0
  other.ngram_pos.each do |ngram, pos_other|
    pos_self = ngram_pos[ngram]
    if pos_self != nil
      dist += (pos_self - pos_other).abs
    else
      dist += @cutoff # max distance
    end
  end
  dist
end
generate_model(ngram_count) click to toggle source
# File lib/rlid/models/ordered_ngrams.rb, line 30
def generate_model(ngram_count)
  # top ngrams (transformed into arrays)
  top = ngram_count.to_a.sort{|x, y| y[1] <=> x[1]}[0...@cutoff]
  @ngram_pos = Hash.new # key is ngram value is position
  i = 0
  top.each {|n,| @ngram_pos[n] = i; i +=1}
end
load(file) click to toggle source
# File lib/rlid/models/ordered_ngrams.rb, line 19
def load(file)
  @ngram_pos = Hash.new
  pos = 0
  file.each_line do |line|
    # keep only the first @n characters of the line
    ngram = line.gsub(/^(.{#{N}}).*\n?/, '\1')
    @ngram_pos[ngram] = pos
    pos += 1
  end
end
save(file) click to toggle source
# File lib/rlid/models/ordered_ngrams.rb, line 13
def save(file)
  @ngram_pos.each do |ngram, pos|
    file.write "#{ngram}    #{pos}\n"
  end
end