class PerseusMatch::PhraseTokenSet
Public Class Methods
tokenize(form, unknowns = false)
click to toggle source
# File lib/perseus_match/token_set.rb 259 def tokenize(form, unknowns = false) 260 (@tokens ||= {})[form] ||= new(form, form.scan(PRINTABLE_CHAR_RE).map { |i| 261 TokenSet.tokenize(i, unknowns) 262 }) 263 end
Public Instance Methods
distance(other)
click to toggle source
(size1 - size2).abs <= distance <= [size1, size2].max
# File lib/perseus_match/token_set.rb 271 def distance(other) 272 token_sets1, token_sets2 = token_sets, other.token_sets 273 size1, size2 = token_sets1.size, token_sets2.size 274 275 return size2 if size1 == 0 276 return size1 if size2 == 0 277 278 distance, costs = nil, (0..size2).to_a 279 280 0.upto(size1 - 1) { |index1| 281 token_set1, cost = token_sets1[index1], index1 + 1 282 283 0.upto(size2 - 1) { |index2| 284 penalty = token_set1.distance(token_sets2[index2]) 285 286 # rcov hack :-( 287 _ = [ 288 costs[index2 + 1] + 1, # insertion 289 cost + 1, # deletion 290 costs[index2] + penalty # substitution 291 ] 292 distance = _.min 293 294 costs[index2], cost = cost, distance 295 } 296 297 costs[size2] = distance 298 } 299 300 distance 301 end
excl(wcs)
click to toggle source
# File lib/perseus_match/token_set.rb 311 def excl(wcs) 312 self.class.new(form, map { |token_set| token_set.excl(wcs) }) 313 end
forms()
click to toggle source
# File lib/perseus_match/token_set.rb 303 def forms 304 @forms ||= map { |token_set| token_set.forms } 305 end
incl(wcs)
click to toggle source
# File lib/perseus_match/token_set.rb 307 def incl(wcs) 308 self.class.new(form, map { |token_set| token_set.incl(wcs) }) 309 end
soundex()
click to toggle source
# File lib/perseus_match/token_set.rb 315 def soundex 316 ensure_soundex! 317 @soundex ||= self.class.new(form, map { |token_set| token_set.soundex }) 318 end