class NameTamer::Name

Attributes

Public Class Methods

new(new_name, args = {}) click to toggle source
# File lib/name_tamer/name.rb, line 296
def initialize(new_name, args = {})
  @name = new_name || ''
  @contact_type = contact_type_from args

  @tidy_name = nil
  @nice_name = nil
  @simple_name = nil
  @slug = nil

  @last_name = nil
  @remainder = nil

  @adfix_found = false
end

Public Instance Methods

array() click to toggle source
# File lib/name_tamer/name.rb, line 63
def array
  @array ||= slug.split(SLUG_DELIMITER)
end
contact_type() click to toggle source
# File lib/name_tamer/name.rb, line 67
def contact_type
  nice_name # make sure we've done the bit which infers contact_type
  contact_type_best_effort
end
contact_type=(new_contact_type) click to toggle source
# File lib/name_tamer/name.rb, line 72
def contact_type=(new_contact_type)
  ct_as_sym = new_contact_type.to_sym

  unless @contact_type.nil? || @contact_type == ct_as_sym
    puts "Changing contact type of #{@name} from #{@contact_type} to #{new_contact_type}"
  end

  @contact_type = ct_as_sym
end
each_word(&block) click to toggle source

Useful method for iterating through the words in the name

# File lib/name_tamer/name.rb, line 83
def each_word(&block)
  @words ||= slug.split(SLUG_DELIMITER)
  @words.each(&block)
end
nice_name() click to toggle source
# File lib/name_tamer/name.rb, line 29
def nice_name
  unless @nice_name
    @nice_name = tidy_name.dup # Start with the tidied name

    remove_adfixes # prefixes and suffixes: "Smith, John, Jr." -> "Smith, John"
    fixup_last_name_first # "Smith, John" -> "John Smith"
    fixup_mismatched_braces # "Ceres (AZ" -> "Ceres (AZ)"
    remove_adfixes # prefixes and suffixes: "Mr John Smith Jr." -> "John Smith"
    name_wrangle # proper name case and non-breaking spaces
    use_nonbreaking_spaces_in_compound_names
  end

  @nice_name
end
simple_name() click to toggle source
# File lib/name_tamer/name.rb, line 44
def simple_name
  unless @simple_name
    @simple_name = nice_name.dup # Start with nice name

    remove_initials # "John Q. Doe" -> "John Doe"
    remove_middle_names # "Philip Seymour Hoffman" -> "Philip Hoffman"
    remove_periods_from_initials # "J.P.R. Williams" -> "JPR Williams"
    standardize_words # "B&Q Intl" -> "B and Q International"

    @simple_name.whitespace_to!(ASCII_SPACE)
  end

  @simple_name
end
slug() click to toggle source
# File lib/name_tamer/name.rb, line 59
def slug
  @slug ||= NameTamer.parameterize simple_name.dup # "John Doe" -> "john-doe"
end
tidy_name() click to toggle source
# File lib/name_tamer/name.rb, line 15
def tidy_name
  unless @tidy_name
    @tidy_name = name.dup # Start with the name we've received

    unescape # Unescape percent-encoded characters and fix UTF-8 encoding
    remove_zero_width # remove zero-width characters
    tidy_spacing # " John   Smith " -> "John Smith"
    fix_encoding_errors # "Ren\u00c3\u00a9 Descartes" -> "Ren\u00e9 Descartes"
    consolidate_initials # "I. B. M." -> "I.B.M."
  end

  @tidy_name
end

Private Instance Methods

consolidate_initials() click to toggle source

Remove spaces from groups of initials

# File lib/name_tamer/name.rb, line 132
def consolidate_initials
  @tidy_name
    .remove_spaces_from_initials!
    .ensure_space_after_initials!
end
contact_type_best_effort() click to toggle source

If we don’t know the contact type, what’s our best guess?

# File lib/name_tamer/name.rb, line 325
def contact_type_best_effort
  if @contact_type
    @contact_type
  else
    # If it's just one word we'll assume organization.
    # If more then we'll assume a person
    @name.include?(ASCII_SPACE) ? :person : :organization
  end
end
contact_type_from(args) click to toggle source
# File lib/name_tamer/name.rb, line 311
def contact_type_from(args)
  args_ct = args[:contact_type]
  return unless args_ct

  ct = args_ct.is_a?(Symbol) ? args_ct : args_ct.dup
  ct = ct.to_s unless [String, Symbol].include? ct.class
  ct.downcase! if ct.class == String
  ct = ct.to_sym
  ct = nil unless [:person, :organization].include? ct

  ct
end
find_contact_type_and_parts(adfixes, name_part) click to toggle source
# File lib/name_tamer/name.rb, line 350
def find_contact_type_and_parts(adfixes, name_part)
  ct = contact_type_best_effort
  parts = name_part.partition adfixes[ct]
  @adfix_found = !parts[1].empty?

  return [ct, parts] if @contact_type || @adfix_found

  # If the contact type is indeterminate and we didn't find a diagnostic adfix
  # for a person then try again for an organization
  ct = :organization
  parts = name_part.partition adfixes[ct]
  @adfix_found = !parts[1].empty?

  [ct, parts]
end
find_first_usable_name(parts) click to toggle source
# File lib/name_tamer/name.rb, line 255
def find_first_usable_name(parts)
  part = nil

  parts.each_index do |i|
    part = parts[i]
    next if part.gsub(FILTER_COMPAT, '').empty?
    parts = parts.slice(i + 1, parts.length) # don't use "slice!"
    break
  end

  [part, parts]
end
find_last_usable_name(parts) click to toggle source
# File lib/name_tamer/name.rb, line 268
def find_last_usable_name(parts)
  part = nil

  parts.reverse_each do |p|
    next if p.gsub(FILTER_COMPAT, '').empty?
    part = p
    break
  end

  part
end
fix_encoding_errors() click to toggle source
# File lib/name_tamer/name.rb, line 127
def fix_encoding_errors
  @tidy_name.fix_encoding_errors!
end
fixup_last_name_first() click to toggle source

Names in the form “Smith, John” need to be turned around to “John Smith”

# File lib/name_tamer/name.rb, line 166
def fixup_last_name_first
  return if @contact_type == :organization

  parts = @nice_name.split ', '

  return unless parts.count == 2

  @last_name = parts[0] # Sometimes the last name alone is all caps and we can name-case it
  @remainder = parts[1]
end
fixup_mismatched_braces() click to toggle source

Sometimes we end up with mismatched braces after adfix stripping e.g. “Ceres (Ceres Holdings LLC)” -> “Ceres (Ceres Holdings”

# File lib/name_tamer/name.rb, line 179
def fixup_mismatched_braces
  left_brace_count = @nice_name.count '('
  right_brace_count = @nice_name.count ')'

  if left_brace_count > right_brace_count
    @nice_name += ')'
  elsif left_brace_count < right_brace_count
    @nice_name = '(' + @nice_name
  end
end
name_case(lowercase) click to toggle source

Original Version of NameCase: Copyright © Mark Summerfield 1998-2008. All Rights Reserved This module may be used/distributed/modified under the same terms as Perl itself dev.perl.org/licenses/ (GPL)

Ruby Version: Copyright © Aaron Patterson 2006 NameCase is distributed under the GPL license.

Substantially modified for Xendata Improved in several areas, also now adds non-breaking spaces for compound names like “van der Pump”

# File lib/name_tamer/name.rb, line 378
def name_case(lowercase)
  n = lowercase.dup # We assume the name is passed already downcased

  n
    .upcase_first_letter!
    .downcase_after_apostrophe!
    .fix_mac!
    .fix_ff!
    .fix_name_modifiers!
    .upcase_initials!
end
name_wrangle() click to toggle source
# File lib/name_tamer/name.rb, line 190
def name_wrangle
  # Fix case if all caps or all lowercase
  if @last_name.nil?
    name_wrangle_single_name
  else
    name_wrangle_split_name
  end
end
name_wrangle_single_name() click to toggle source
# File lib/name_tamer/name.rb, line 199
def name_wrangle_single_name
  lowercase = @nice_name.downcase
  uppercase = @nice_name.upcase
  fix_case = false

  if @contact_type == :organization
    fix_case = true if @nice_name == uppercase && @nice_name.length > 4
  elsif [uppercase, lowercase].include?(@nice_name)
    fix_case = true
  end

  @nice_name = name_case(lowercase) if fix_case
end
name_wrangle_split_name() click to toggle source
# File lib/name_tamer/name.rb, line 213
def name_wrangle_split_name
  # It's a person if we've split the name, so no organization logic here
  lowercase = @last_name.downcase
  uppercase = @last_name.upcase
  @last_name = name_case(lowercase) if [uppercase, lowercase].include?(@last_name)
  @nice_name = "#{@remainder} #{@last_name}"
end
remove_adfixes() click to toggle source

An adfix is either a prefix or a suffix

# File lib/name_tamer/name.rb, line 139
def remove_adfixes
  if @last_name.nil?
    # Our name is still in one part, not two
    loop do
      @nice_name = remove_outermost_adfix(:suffix, @nice_name)
      break unless @adfix_found
    end

    loop do
      @nice_name = remove_outermost_adfix(:prefix, @nice_name)
      break unless @adfix_found
    end
  else
    # Our name is currently in two halves
    loop do
      @last_name = remove_outermost_adfix(:suffix, @last_name)
      break unless @adfix_found
    end

    loop do
      @remainder = remove_outermost_adfix(:prefix, @remainder)
      break unless @adfix_found
    end
  end
end
remove_initials() click to toggle source

Remove initials from personal names unless they are the only identifier. i.e. only remove initials if there’s also a proper name there

# File lib/name_tamer/name.rb, line 234
def remove_initials
  return unless @contact_type == :person

  temp_name = @simple_name.gsub(/\b([a-z](?:\.*\s+|\.))/i, '')

  # If the name still has at least one space we're OK
  @simple_name = temp_name if temp_name.include?(ASCII_SPACE)
end
remove_middle_names() click to toggle source
# File lib/name_tamer/name.rb, line 243
def remove_middle_names
  return unless @contact_type == :person

  first_name, parts = find_first_usable_name(@simple_name.split)
  last_name, = find_last_usable_name(parts)

  return unless first_name || last_name

  separator = first_name && last_name ? ' ' : ''
  @simple_name = "#{first_name}#{separator}#{last_name}"
end
remove_outermost_adfix(adfix_type, name_part) click to toggle source

We pass to this routine either prefixes or suffixes

# File lib/name_tamer/name.rb, line 336
def remove_outermost_adfix(adfix_type, name_part)
  ct, parts = find_contact_type_and_parts(ADFIX_PATTERNS[adfix_type], name_part)

  return name_part unless @adfix_found

  # If we've found a diagnostic adfix then set the contact type
  self.contact_type = ct

  # The remainder of the name will be in parts[0] or parts[2] depending
  # on whether this is a prefix or a suffix.
  # We'll also remove any trailing commas we've exposed.
  (parts[0] + parts[2]).gsub(/\s*,\s*$/, '')
end
remove_periods_from_initials() click to toggle source
# File lib/name_tamer/name.rb, line 280
def remove_periods_from_initials
  @simple_name.remove_periods_from_initials!
end
remove_zero_width() click to toggle source
# File lib/name_tamer/name.rb, line 116
def remove_zero_width
  @tidy_name.strip_unwanted!(ZERO_WIDTH_FILTER)
end
standardize_words() click to toggle source
# File lib/name_tamer/name.rb, line 284
def standardize_words
  @simple_name.gsub!(/ *& */, ' and ') # replace ampersand characters with ' and '
  @simple_name.gsub!(/ *\+ */, ' plus ') # replace plus signs with ' plus '
  @simple_name.gsub!(/\bintl\b/i, 'International') # replace 'intl' with 'International'
  @simple_name.gsub!(/[־‐‑‒–—―−﹘﹣-]/, SLUG_DELIMITER) # Replace Unicode dashes with ASCII hyphen
  @simple_name.strip_unwanted!(/["“”™℠®©℗]/) # remove quotes and commercial decoration
end
tidy_spacing() click to toggle source
# File lib/name_tamer/name.rb, line 120
def tidy_spacing
  @tidy_name
    .space_around_comma!
    .strip_or_self!
    .whitespace_to!(ASCII_SPACE)
end
unescape() click to toggle source
# File lib/name_tamer/name.rb, line 112
def unescape
  @tidy_name.ensure_safe!.safe_unescape!.unescape_html!
end
use_nonbreaking_spaces_in_compound_names() click to toggle source

Conjoin compound names with non-breaking spaces

# File lib/name_tamer/name.rb, line 222
def use_nonbreaking_spaces_in_compound_names
  @nice_name
    .nbsp_in_compound_name!
    .nbsp_in_name_modifier!
end