class PerseusMatch
Constants
- DEFAULT_COEFF
- DISTANCE_SPEC
- Infinity
- VERSION
Attributes
default_coeff[R]
distance_spec[R]
phrase[R]
target[R]
verbose[R]
Public Class Methods
check(*args)
click to toggle source
# File lib/perseus_match.rb 69 def check(*args) 70 check!(*args) 71 rescue CheckFailedError 72 false 73 end
check!(phrase, target, threshold = 0, operator = :>, pm_options = {}, attribute = :similarity)
click to toggle source
# File lib/perseus_match.rb 75 def check!(phrase, target, threshold = 0, operator = :>, pm_options = {}, attribute = :similarity) 76 pm = new(phrase, target, pm_options) 77 value = pm.send(attribute) 78 79 if value.send(operator, threshold) 80 Struct.new(:pm, :value, :threshold, :operator).new(pm, value, threshold, operator) 81 else 82 raise CheckFailedError.new(pm, value, threshold, operator) 83 end 84 end
cluster(phrases, options = {}, pm_options = {})
click to toggle source
# File lib/perseus_match.rb 65 def cluster(phrases, options = {}, pm_options = {}) 66 Cluster.new(phrases, pm_options).rank(options) 67 end
distance(*args)
click to toggle source
# File lib/perseus_match.rb 57 def distance(*args) 58 new(*args).distance 59 end
match(phrases, pm_options = {})
click to toggle source
# File lib/perseus_match.rb 61 def match(phrases, pm_options = {}) 62 List.new(phrases, pm_options) 63 end
new(phrase, target, options = {})
click to toggle source
# File lib/perseus_match.rb 98 def initialize(phrase, target, options = {}) 99 @phrase = sanitize(phrase.to_s) 100 @target = sanitize(target.to_s) 101 102 @default_coeff = options[:default_coeff] || DEFAULT_COEFF 103 @distance_spec = options[:distance_spec] || DISTANCE_SPEC 104 105 @verbose = options[:verbose] 106 107 @similarity = {} 108 end
tokenize(form, unknowns = false)
click to toggle source
# File lib/perseus_match.rb 86 def tokenize(form, unknowns = false) 87 if file = TokenSet.file?(form) 88 TokenSet.tokenize(file, unknowns) 89 else 90 PhraseTokenSet.tokenize(form, unknowns) 91 end 92 end
Public Instance Methods
distance()
click to toggle source
0 <= distance <= Infinity
# File lib/perseus_match.rb 119 def distance 120 @distance ||= calculate_distance 121 end
phrase_tokens()
click to toggle source
# File lib/perseus_match.rb 110 def phrase_tokens 111 @phrase_tokens ||= self.class.tokenize(phrase) 112 end
similarity(coeff = nil)
click to toggle source
1 >= similarity >= 0
# File lib/perseus_match.rb 124 def similarity(coeff = nil) 125 coeff ||= default_coeff # passed arg may be nil 126 @similarity[coeff] ||= normalize_distance(coeff) 127 end
target_tokens()
click to toggle source
# File lib/perseus_match.rb 114 def target_tokens 115 @target_tokens ||= self.class.tokenize(target) 116 end
Private Instance Methods
calculate_distance()
click to toggle source
# File lib/perseus_match.rb 135 def calculate_distance 136 return Infinity if phrase_tokens.disjoint?(target_tokens) 137 return 0 if phrase_tokens.eql?(target_tokens) 138 139 distance_spec.inject(0) { |distance, (options, weight)| 140 distance + token_distance(options) * weight 141 } 142 end
normalize_distance(coeff)
click to toggle source
# File lib/perseus_match.rb 170 def normalize_distance(coeff) 171 length = phrase_tokens.size + target_tokens.size 172 return 0 if length == 0 173 174 norm = Math.log(length ** Math.sqrt(2)) * coeff * total_weight * Math::E 175 176 1 / Math.exp(distance / norm) 177 end
sanitize(str)
click to toggle source
# File lib/perseus_match.rb 131 def sanitize(str) 132 str.gsub(/\s*\(.*?\)|\s*\[.*?\]/, '').sub(/\s*[\/:].*/, '') 133 end
token_distance(options = {})
click to toggle source
# File lib/perseus_match.rb 144 def token_distance(options = {}) 145 tokens1 = phrase_tokens.inclexcl(options) 146 tokens2 = target_tokens.inclexcl(options) 147 148 if options[:sort] 149 tokens1 = tokens1.sort 150 tokens2 = tokens2.sort 151 end 152 153 if options[:soundex] 154 tokens1 = tokens1.soundex 155 tokens2 = tokens2.soundex 156 end 157 158 distance = tokens1.distance(tokens2) 159 160 warn <<-EOT if verbose 161 #{options.inspect}: 162 #{tokens1.inspect} 163 #{tokens2.inspect} 164 => #{distance} 165 EOT 166 167 distance 168 end
total_weight()
click to toggle source
# File lib/perseus_match.rb 179 def total_weight 180 @total_weight ||= distance_spec.inject(0.0) { |total, (_, weight)| total + weight } 181 end