class PerseusMatch::PhraseTokenSet

Public Class Methods

tokenize(form, unknowns = false) click to toggle source
    # File lib/perseus_match/token_set.rb
259 def tokenize(form, unknowns = false)
260   (@tokens ||= {})[form] ||= new(form, form.scan(PRINTABLE_CHAR_RE).map { |i|
261     TokenSet.tokenize(i, unknowns)
262   })
263 end

Public Instance Methods

distance(other) click to toggle source

(size1 - size2).abs <= distance <= [size1, size2].max

    # File lib/perseus_match/token_set.rb
271 def distance(other)
272   token_sets1, token_sets2 = token_sets, other.token_sets
273   size1, size2 = token_sets1.size, token_sets2.size
274 
275   return size2 if size1 == 0
276   return size1 if size2 == 0
277 
278   distance, costs = nil, (0..size2).to_a
279 
280   0.upto(size1 - 1) { |index1|
281     token_set1, cost = token_sets1[index1], index1 + 1
282 
283     0.upto(size2 - 1) { |index2|
284       penalty = token_set1.distance(token_sets2[index2])
285 
286       # rcov hack :-(
287       _ = [
288         costs[index2 + 1] + 1,   # insertion
289         cost + 1,                # deletion
290         costs[index2] + penalty  # substitution
291       ]
292       distance = _.min
293 
294       costs[index2], cost = cost, distance
295     }
296 
297     costs[size2] = distance
298   }
299 
300   distance
301 end
excl(wcs) click to toggle source
    # File lib/perseus_match/token_set.rb
311 def excl(wcs)
312   self.class.new(form, map { |token_set| token_set.excl(wcs) })
313 end
forms() click to toggle source
    # File lib/perseus_match/token_set.rb
303 def forms
304   @forms ||= map { |token_set| token_set.forms }
305 end
incl(wcs) click to toggle source
    # File lib/perseus_match/token_set.rb
307 def incl(wcs)
308   self.class.new(form, map { |token_set| token_set.incl(wcs) })
309 end
soundex() click to toggle source
    # File lib/perseus_match/token_set.rb
315 def soundex
316   ensure_soundex!
317   @soundex ||= self.class.new(form, map { |token_set| token_set.soundex })
318 end