class String
Public Instance Methods
^(other)
click to toggle source
Jaro-Winkler distance @param [String] other string @return [Float] distance, normalized between 0.0 (no match) and 1.0 (perfect match)
# File lib/music_videos.rb, line 182 def ^(other) return 1.0 if self == other return 0.0 if self.empty? or other.empty? s1 = self.codepoints.to_a s2 = other.codepoints.to_a s1, s2 = s2, s1 if s1.size > s2.size s1s, s2s = s1.size, s2.size m, t = 0.0, 0 max_dist = s2s/2 - 1 m1 = Array.new(s1s, -1) m2 = Array.new(s2s, false) # find m s1.each_with_index do |a, ia| lower = ia > max_dist ? ia-max_dist : 0 upper = ia+max_dist < s2s ? ia+max_dist : s2s s2[lower..upper].each_with_index do |b, ib| ib += lower if a == b and !m2[ib] m, m1[ia], m2[ib] = m+1, ib, true break end end end return 0.0 if m.zero? m1.reduce do |a, b| # if either a or b are nil, that means there was no match # if a > b, that means the previous value is greater than the current # which means it went down if a > -1 and b > -1 and a > b t += (a-b > 1 ? 1 : 2) end b end dj = (m/s1s + m/s2s + (m - t/2)/m) / 3 # winkler adjustment l = 0 for i in 0..3 if s1[i] != s2[i] l = i break end end # standard weight (p) for winkler == 0.1 dj + l*0.1*(1-dj) end
clean_name()
click to toggle source
# File lib/music_videos.rb, line 174 def clean_name return self.gsub(/\(.*?\)/, '').strip end