module Fias::Name::Extract

Constants

REWARD
SMALL_LETTER

Public Class Methods

extract(name) click to toggle source
# File lib/fias/name/extract.rb, line 5
def extract(name)
  return if name.blank?
  name = cleanup(name)

  matches = find(name)
  rates = assign_rates(name, matches)
  winner = pick_winner(rates)
  return [name] unless winner

  extract_name(name, winner)
end

Private Class Methods

assign_rates(name, matches) click to toggle source
# File lib/fias/name/extract.rb, line 31
def assign_rates(name, matches)
  matches.map { |match| rate_match(name, match) }
end
border_proximity(name, match) click to toggle source
# File lib/fias/name/extract.rb, line 49
def border_proximity(name, match)
  head = name.size - match.begin(1) + REWARD[:head]
  tail = match.end(2)
  [head, tail].max
end
cleanup(name) click to toggle source
# File lib/fias/name/extract.rb, line 19
def cleanup(name)
  name.split(' ').join(' ').strip
end
ends_with_dot?(value) click to toggle source
# File lib/fias/name/extract.rb, line 55
def ends_with_dot?(value)
  value[-1] == '.' ? 1 : 0
end
extract_name(name, winner) click to toggle source
# File lib/fias/name/extract.rb, line 70
def extract_name(name, winner)
  short_name = winner[2]
  toponym = cleanup(name.gsub(winner.regexp, ' '))
  return [name] if toponym.strip.blank?
  [cleanup(toponym), Canonical.canonical(short_name)].flatten
end
find(name) click to toggle source
# File lib/fias/name/extract.rb, line 23
def find(name)
  matches = Fias.config.index.keys.map do |query|
    match = name.match(/(\s|^)(#{Regexp.escape(query)})(\.|\s|$)/ui)
    match if match && match[2]
  end
  matches.compact
end
pick_winner(rates) click to toggle source
# File lib/fias/name/extract.rb, line 63
def pick_winner(rates)
  rates = rates.sort_by(&:first).reverse
  rate, match = rates.first
  return if (rates[1..-1] || []).any? { |(r, _)| rate == r }
  match
end
rate_match(name, match) click to toggle source
# File lib/fias/name/extract.rb, line 35
def rate_match(name, match)
  short_name = match[2]

  rate =
    (ends_with_dot?(short_name) * REWARD[:dot]) +
    (starts_with_small_letter?(short_name) * REWARD[:small_letter]) +
    (border_proximity(name, match))

  rate *= 100
  rate += short_name.size

  [rate, match]
end
starts_with_small_letter?(value) click to toggle source
# File lib/fias/name/extract.rb, line 59
def starts_with_small_letter?(value)
  value[0] =~ SMALL_LETTER ? 1 : 0
end