class DwcAgent::Cleaner

Public Class Methods

instance() click to toggle source
# File lib/dwc_agent/cleaner.rb, line 6
def instance
  Thread.current[:dwc_agent_cleaner] ||= new
end
new() click to toggle source
# File lib/dwc_agent/cleaner.rb, line 11
def initialize
end

Public Instance Methods

clean(parsed_namae) click to toggle source

Cleans the passed-in namae object from the parse method and re-organizes it to better match expected Darwin Core output.

@param parsed_namae [Namae::Name] a Namae object @return Namae::Name [Object] a new Namae object

# File lib/dwc_agent/cleaner.rb, line 19
def clean(parsed_namae)

  if parsed_namae.given && GIVEN_BLACKLIST.any?{ |s| s.casecmp(parsed_namae.given) == 0 }
    return Namae::Name.new
  end

  if parsed_namae.family && parsed_namae.family.length == 3 && parsed_namae.family.count('.') == 1
    return Namae::Name.new
  end

  if parsed_namae.given && parsed_namae.given.length > 35
    return Namae::Name.new
  end

  if parsed_namae.given && parsed_namae.given.count('.') >= 3 && /\.\s*[a-zA-Z]{4,}\s+[a-zA-Z]{1,}\./.match(parsed_namae.given)
    return Namae::Name.new
  end

  if parsed_namae.display_order =~ BLACKLIST
    return Namae::Name.new
  end

  if parsed_namae.given &&
     parsed_namae.family &&
     parsed_namae.family.count(".") > 0 &&
     parsed_namae.family.length - parsed_namae.family.count(".") <= 3
      given = parsed_namae.given
      family = parsed_namae.family
      parsed_namae.family = given
      parsed_namae.given = family
  end

  if parsed_namae.given &&
     parsed_namae.family &&
     parsed_namae.family.length <=3 &&
     parsed_namae.family == parsed_namae.family.upcase &&
     parsed_namae.given[-1] != "."
      given = parsed_namae.given
      family = parsed_namae.family
      parsed_namae.family = given
      parsed_namae.given = family
  end

  if parsed_namae.given &&
    (parsed_namae.given == parsed_namae.given.upcase ||
    parsed_namae.given == parsed_namae.given.downcase) &&
    !parsed_namae.given.include?(".") &&
    parsed_namae.given.tr(".","").length >= 4
      parsed_namae.given = NameCase(parsed_namae.given)
  end

  if parsed_namae.given && /\.[A-Z]$/.match(parsed_namae.given)
    parsed_namae.given += "."
  end

  if parsed_namae.given && /[A-Za-z]\./.match(parsed_namae.given)
    parsed_namae.given = NameCase(parsed_namae.given)
  end

  if parsed_namae.family && FAMILY_BLACKLIST.any?{ |s| s.casecmp(parsed_namae.family) == 0 }
    return Namae::Name.new
  end

  parsed_namae.normalize_initials

  family = parsed_namae.family.gsub(/\.\z/, '').strip rescue nil
  given = parsed_namae.given.strip rescue nil
  particle = parsed_namae.particle.strip rescue nil
  appellation = parsed_namae.appellation.strip rescue nil
  suffix = parsed_namae.suffix.strip rescue nil
  title = parsed_namae.title.strip rescue nil

  if !given.nil? && given.match(/[A-Z]\.[A-Za-z]{2,}/)
    given = given.gsub(".", ". ").strip
  end

  if family.nil? && !given.nil? && !given.include?(".")
    family = given
    given = nil
  end

  if !family.nil? && given.nil? && !particle.nil?
    given = particle.sub(/[a-z]\./, &:upcase).sub(/^(.)/) { $1.capitalize }
    particle = nil
  end

  if !particle.nil? && particle.include?(".")
    particle = nil
  end

  if !family.nil? && (family == family.upcase || family == family.downcase)
    family = NameCase(family)
  end

  if !family.nil? && family.match(/[A-Z]$/)
    return Namae::Name.new
  end

  if given.nil? && !family.nil? && family.match(/^[A-Z]{2}/)
    return Namae::Name.new
  end

  if !family.nil? && FAMILY_BLACKLIST.any?{ |s| s.casecmp(family) == 0 }
    return Namae::Name.new
  end

  if !given.nil? && GIVEN_BLACKLIST.any?{ |s| s.casecmp(given) == 0 }
    return Namae::Name.new
  end

  name = {
    title: title,
    appellation: appellation,
    given: given,
    particle: particle,
    family: family,
    suffix: suffix
  }
  Namae::Name.new(name)
end