class SportDb::Import::CountryIndex
built-in countries for (quick starter) auto-add
Attributes
countries[R]
Public Class Methods
new( recs )
click to toggle source
# File lib/sportdb/formats/country/country_index.rb, line 11 def initialize( recs ) @countries = [] @countries_by_code = {} @countries_by_name = {} add( recs ) end
Public Instance Methods
add( recs )
click to toggle source
incl. strip_year( name )
has_year?( name) strip_lang( name ) normalize( name )
# File lib/sportdb/formats/country/country_index.rb, line 28 def add( recs ) ########################################### ## auto-fill countries ## pp recs recs.each do |rec| ## rec e.g. { key:'af', code:'AFG', name:'Afghanistan'} @countries << rec ## add codes lookups - key, code, ... if @countries_by_code[ rec.key ] puts "** !! ERROR !! country code (key) >#{rec.key}< already exits!!" exit 1 else @countries_by_code[ rec.key ] = rec end ## add code (only) if different from key if rec.key != rec.code.downcase if @countries_by_code[ rec.code.downcase ] puts "** !! ERROR !! country code >#{rec.code.downcase}< already exits!!" exit 1 else @countries_by_code[ rec.code.downcase ] = rec end end ## add all names (canonical name + alt names names = [rec.name] + rec.alt_names more_names = [] ## check "hand-typed" names for year (auto-add) ## check for year(s) e.g. (1887-1911), (-2013), ## (1946-2001,2013-) etc. names.each do |name| if has_year?( name ) more_names << strip_year( name ) end end names += more_names ## check for duplicates - simple check for now - fix/improve ## todo/fix: (auto)remove duplicates - why? why not? count = names.size count_uniq = names.uniq.size if count != count_uniq puts "** !!! ERROR !!! - #{count-count_uniq} duplicate name(s):" pp names pp rec exit 1 end names.each_with_index do |name,i| ## check lang codes e.g. [en], [fr], etc. ## todo/check/fix: move strip_lang up in the chain - check for duplicates (e.g. only lang code marker different etc.) - why? why not? name = strip_lang( name ) norm = normalize( name ) old_rec = @countries_by_name[ norm ] if old_rec ## check if country name already is included or is new country rec msg = "** !!! ERROR !!! - name conflict/duplicate - >#{name}< will overwrite >#{old_rec.name}< with >#{rec.name}<" puts msg exit 1 else @countries_by_name[ norm ] = rec end end end ## each record end
find( key )
click to toggle source
# File lib/sportdb/formats/country/country_index.rb, line 112 def find( key ) country = find_by_code( key ) country = find_by_name( key ) if country.nil? ## try lookup / find by (normalized) name country end
Also aliased as: []
find_by_code( code )
click to toggle source
fix/todo: add find_by (alias for find_by_name/find_by_code)
# File lib/sportdb/formats/country/country_index.rb, line 102 def find_by_code( code ) code = code.to_s.downcase ## allow symbols (and always downcase e.g. AUT to aut etc.) @countries_by_code[ code ] end
find_by_name( name )
click to toggle source
# File lib/sportdb/formats/country/country_index.rb, line 107 def find_by_name( name ) name = normalize( name.to_s ) ## allow symbols too (e.g. use to.s first) @countries_by_name[ name ] end
parse( line )
click to toggle source
split/parse country line
split on bullet e.g.
split into name and code with regex - make code optional
Examples:
Österreich • Austria (at) Österreich • Austria Austria Deutschland (de) • Germany todo/check: support more formats - why? why not? e.g. Austria, AUT (e.g. with comma - why? why not?)
# File lib/sportdb/formats/country/country_index.rb, line 134 def parse( line ) values = line.split( '•' ) ## use/support multi-lingual separator country = nil values.each do |value| value = value.strip ## check for trailing country code e.g. (at), (eng), etc. if value =~ /[ ]+\((?<code>[a-z]{1,4})\)$/ ## e.g. Austria (at) code = $~[:code] name = value[0...(value.size-code.size-2)].strip ## note: add -2 for brackets candidates = [ find_by_code( code ), find_by_name( name ) ] if candidates[0].nil? puts "** !!! ERROR !!! country - unknown code >#{code}< in line: #{line}" pp line exit 1 end if candidates[1].nil? puts "** !!! ERROR !!! country - unknown name >#{code}< in line: #{line}" pp line exit 1 end if candidates[0] != candidates[1] puts "** !!! ERROR !!! country - name and code do NOT match the same country:" pp line pp candidates exit 1 end if country && country != candidates[0] puts "** !!! ERROR !!! country - names do NOT match the same country:" pp line pp country pp candidates exit 1 end country = candidates[0] else ## just assume value is name or code candidate = find( value ) if candidate.nil? puts "** !!! ERROR !!! country - unknown name or code >#{value}< in line: #{line}" pp line exit 1 end if country && country != candidate puts "** !!! ERROR !!! country - names do NOT match the same country:" pp line pp country pp candidate exit 1 end country = candidate end end country end