class PerseusMatch

Constants

DEFAULT_COEFF
DISTANCE_SPEC
Infinity
VERSION

Attributes

default_coeff[R]
distance_spec[R]
phrase[R]
target[R]
verbose[R]

Public Class Methods

check(*args) click to toggle source
   # File lib/perseus_match.rb
69 def check(*args)
70   check!(*args)
71 rescue CheckFailedError
72   false
73 end
check!(phrase, target, threshold = 0, operator = :>, pm_options = {}, attribute = :similarity) click to toggle source
   # File lib/perseus_match.rb
75 def check!(phrase, target, threshold = 0, operator = :>, pm_options = {}, attribute = :similarity)
76   pm = new(phrase, target, pm_options)
77   value = pm.send(attribute)
78 
79   if value.send(operator, threshold)
80     Struct.new(:pm, :value, :threshold, :operator).new(pm, value, threshold, operator)
81   else
82     raise CheckFailedError.new(pm, value, threshold, operator)
83   end
84 end
cluster(phrases, options = {}, pm_options = {}) click to toggle source
   # File lib/perseus_match.rb
65 def cluster(phrases, options = {}, pm_options = {})
66   Cluster.new(phrases, pm_options).rank(options)
67 end
distance(*args) click to toggle source
   # File lib/perseus_match.rb
57 def distance(*args)
58   new(*args).distance
59 end
match(phrases, pm_options = {}) click to toggle source
   # File lib/perseus_match.rb
61 def match(phrases, pm_options = {})
62   List.new(phrases, pm_options)
63 end
new(phrase, target, options = {}) click to toggle source
    # File lib/perseus_match.rb
 98 def initialize(phrase, target, options = {})
 99   @phrase = sanitize(phrase.to_s)
100   @target = sanitize(target.to_s)
101 
102   @default_coeff = options[:default_coeff] || DEFAULT_COEFF
103   @distance_spec = options[:distance_spec] || DISTANCE_SPEC
104 
105   @verbose = options[:verbose]
106 
107   @similarity = {}
108 end
tokenize(form, unknowns = false) click to toggle source
   # File lib/perseus_match.rb
86 def tokenize(form, unknowns = false)
87   if file = TokenSet.file?(form)
88     TokenSet.tokenize(file, unknowns)
89   else
90     PhraseTokenSet.tokenize(form, unknowns)
91   end
92 end

Public Instance Methods

distance() click to toggle source

0 <= distance <= Infinity

    # File lib/perseus_match.rb
119 def distance
120   @distance ||= calculate_distance
121 end
phrase_tokens() click to toggle source
    # File lib/perseus_match.rb
110 def phrase_tokens
111   @phrase_tokens ||= self.class.tokenize(phrase)
112 end
similarity(coeff = nil) click to toggle source

1 >= similarity >= 0

    # File lib/perseus_match.rb
124 def similarity(coeff = nil)
125   coeff ||= default_coeff  # passed arg may be nil
126   @similarity[coeff] ||= normalize_distance(coeff)
127 end
target_tokens() click to toggle source
    # File lib/perseus_match.rb
114 def target_tokens
115   @target_tokens ||= self.class.tokenize(target)
116 end

Private Instance Methods

calculate_distance() click to toggle source
    # File lib/perseus_match.rb
135 def calculate_distance
136   return Infinity if phrase_tokens.disjoint?(target_tokens)
137   return 0        if phrase_tokens.eql?(target_tokens)
138 
139   distance_spec.inject(0) { |distance, (options, weight)|
140     distance + token_distance(options) * weight
141   }
142 end
normalize_distance(coeff) click to toggle source
    # File lib/perseus_match.rb
170 def normalize_distance(coeff)
171   length = phrase_tokens.size + target_tokens.size
172   return 0 if length == 0
173 
174   norm = Math.log(length ** Math.sqrt(2)) * coeff * total_weight * Math::E
175 
176   1 / Math.exp(distance / norm)
177 end
sanitize(str) click to toggle source
    # File lib/perseus_match.rb
131 def sanitize(str)
132   str.gsub(/\s*\(.*?\)|\s*\[.*?\]/, '').sub(/\s*[\/:].*/, '')
133 end
token_distance(options = {}) click to toggle source
    # File lib/perseus_match.rb
144   def token_distance(options = {})
145     tokens1 = phrase_tokens.inclexcl(options)
146     tokens2 = target_tokens.inclexcl(options)
147 
148     if options[:sort]
149       tokens1 = tokens1.sort
150       tokens2 = tokens2.sort
151     end
152 
153     if options[:soundex]
154       tokens1 = tokens1.soundex
155       tokens2 = tokens2.soundex
156     end
157 
158     distance = tokens1.distance(tokens2)
159 
160     warn <<-EOT if verbose
161 #{options.inspect}:
162   #{tokens1.inspect}
163   #{tokens2.inspect}
164 => #{distance}
165     EOT
166 
167     distance
168   end
total_weight() click to toggle source
    # File lib/perseus_match.rb
179 def total_weight
180   @total_weight ||= distance_spec.inject(0.0) { |total, (_, weight)| total + weight }
181 end