module JaroWinkler

Constants

DEFAULT_ADJ_TABLE
DEFAULT_OPTIONS
DEFAULT_THRESHOLD
DEFAULT_WEIGHT
VERSION

Public Class Methods

distance(*args) click to toggle source
VALUE rb_jaro_winkler_distance(size_t argc, VALUE *argv, VALUE self) {
  return distance(argc, argv, self, jaro_winkler_distance_from_codes);
}
jaro_distance(*args) click to toggle source
VALUE rb_jaro_distance(size_t argc, VALUE *argv, VALUE self) {
  return distance(argc, argv, self, jaro_distance_from_codes);
}

Private Class Methods

_distance(codes1, codes2, options = {}) click to toggle source
# File lib/jaro_winkler/jaro_winkler_pure.rb, line 27
def _distance(codes1, codes2, options = {})
  options = DEFAULT_OPTIONS[:jaro_winkler].merge options
  raise InvalidWeightError if options[:weight] > 0.25
  jaro_distance = _jaro_distance(codes1, codes2, options)

  if jaro_distance < options[:threshold]
    jaro_distance
  else
    codes1, codes2 = codes2, codes1 if codes1.length > codes2.length
    len1 = codes1.length
    len2 = codes2.length
    max_4 = len1 > 4 ? 4 : len1
    prefix = 0
    prefix += 1 while prefix < max_4 && codes1[prefix] == codes2[prefix]
    jaro_distance + prefix * options[:weight] * (1 - jaro_distance)
  end
end
_jaro_distance(codes1, codes2, options = {}) click to toggle source
# File lib/jaro_winkler/jaro_winkler_pure.rb, line 45
def _jaro_distance(codes1, codes2, options = {})
  options = DEFAULT_OPTIONS[:jaro].merge options

  codes1, codes2 = codes2, codes1 if codes1.length > codes2.length
  len1 = codes1.length
  len2 = codes2.length
  return 0.0 if len1 == 0 || len2 == 0

  if options[:ignore_case]
    codes1.map! { |c| c >= 97 && c <= 122 ? c -= 32 : c }
    codes2.map! { |c| c >= 97 && c <= 122 ? c -= 32 : c }
  end

  window = len2 / 2 - 1
  window = 0 if window < 0
  flags1 = 0
  flags2 = 0

  # // count number of matching characters
  match_count = 0
  i = 0
  while i < len1
    left = i >= window ? i - window : 0
    right = i + window <= len2 - 1 ? (i + window) : (len2 - 1)
    right = len2 - 1 if right > len2 - 1
    j = left
    while j <= right
      if flags2[j] == 0 && codes1[i] == codes2[j]
        flags1 |= (1 << i)
        flags2 |= (1 << j)
        match_count += 1
        break
      end
      j += 1
    end
    i += 1
  end

  return 0.0 if match_count == 0

  # // count number of transpositions
  transposition_count = j = k = 0
  i = 0
  while i < len1
    if flags1[i] == 1
      j = k
      while j < len2
        if flags2[j] == 1
          k = j + 1
          break
        end
        j += 1
      end
      transposition_count += 1 if codes1[i] != codes2[j]
    end
    i += 1
  end

  # // count similarities in nonmatched characters
  similar_count = 0
  if options[:adj_table] && len1 > match_count
    i = 0
    while i < len1
      if flags1[i] == 0
        j = 0
        while j < len2
          if flags2[j] == 0
            if DEFAULT_ADJ_TABLE[codes1[i].chr(Encoding::UTF_8)][codes2[j].chr(Encoding::UTF_8)]
              similar_count += 3
              break
            end
          end
          j += 1
        end
      end
      i += 1
    end
  end

  m = match_count.to_f
  t = transposition_count / 2
  m = similar_count / 10.0 + m if options[:adj_table]
  (m / len1 + m / len2 + (m - t) / m) / 3
end
validate!(str1, str2) click to toggle source
# File lib/jaro_winkler/jaro_winkler_pure.rb, line 130
def validate!(str1, str2)
  raise TypeError unless str1.is_a?(String) && str2.is_a?(String)
end