class SportDb::Import::ClubHistoryReader

Constants

KEYWORD_LINE_RE

RENAME/RENAMED MOVE/MOVED BANKRUPT/BANKRUPTED REFORM/REFORMED MERGE/MERGED - allow + or ++ or +++ or ; for “inline” - why? why not?

Public Class Methods

new( txt ) click to toggle source
# File lib/sportdb/formats/team/club_reader_history.rb, line 23
def initialize( txt )
  @txt = txt
end
parse( txt ) click to toggle source
# File lib/sportdb/formats/team/club_reader_history.rb, line 19
def self.parse( txt )
  new( txt ).parse
end
read( path ) click to toggle source
# File lib/sportdb/formats/team/club_reader_history.rb, line 14
def self.read( path )   ## use - rename to read_file or from_file etc. - why? why not?
  txt = File.open( path, 'r:utf-8' ) { |f| f.read }
  parse( txt )
end

Public Instance Methods

catalog() click to toggle source
# File lib/sportdb/formats/team/club_reader_history.rb, line 10
def catalog() Import.catalog; end
parse() click to toggle source
# File lib/sportdb/formats/team/club_reader_history.rb, line 48
def parse
  recs = []
  last_rec  = nil

  last_country = nil
  last_season  = nil
  last_keyword = nil
  last_teams   = []

  OutlineReader.parse( @txt ).each do |node|
    if [:h1,:h2,:h3,:h4,:h5,:h6].include?( node[0] )
      heading_level  = node[0][1].to_i
      heading        = node[1]

      puts "heading #{heading_level} >#{heading}<"


        if heading_level == 1
            ## assume country in heading; allow all "formats" supported by parse e.g.
            ##   Österreich • Austria (at)
            ##   Österreich • Austria
            ##   Austria
            ##   Deutschland (de) • Germany
            country = catalog.countries.parse( heading )
            ## check country code - MUST exist for now!!!!
            if country.nil?
              puts "!!! error [club history reader] - unknown country >#{heading}< - sorry - add country to config to fix"
              exit 1
            end
            puts "  country >#{heading}< => #{country.name}, #{country.key}"
            last_country = country
            last_season  = nil  ## reset "lower levels" - season & keyword
            last_keyword = nil
         elsif heading_level == 2
            ## assume season
            season = Season.parse( heading )
            puts "  season >#{heading}< => #{season.key}"
            last_season  = season  ## reset "lowwer levels" - keyword
            last_keyword = nil
         else
            puts "!!! ERROR [club history reader] - for now only heading 1 & 2 supported; sorry"
            exit 1
         end

    elsif node[0] == :p   ## paragraph with (text) lines
      if last_country.nil?
        puts "!!! ERROR [club history reader] - country heading 1 required, sorry"
        exit 1
      end
      if last_season.nil?
        puts "!!! ERROR [club history reader] - season heading 2 required, sorry"
        exit 1
      end

      lines = node[1]
      lines.each do |line|
        if m=line.match(KEYWORD_LINE_RE)   ## extract keyword and continue
          keyword = m[:keyword]
          line    = m[:text].strip

          puts "    keyword #{keyword}"
          last_keyword = case keyword   ## "normalize" keywords
                         when 'BANKRUPT', 'BANKRUPTED'
                           'BANKRUPT'
                         when 'RENAME', 'RENAMED'
                           'RENAME'
                         when 'REFORM', 'REFORMED'
                           'REFORM'
                         when 'MOVE',   'MOVED'
                           'MOVE'
                         when 'MERGE',  'MERGED'
                           'MERGE'
                         else
                           puts "!!! ERROR [club history reader] - unexpected keyword >#{keyword}<; sorry - don't know how to normalize"
                           exit 1
                         end

          last_teams   = []
        end

        if last_keyword.nil?
          puts "!!! ERROR [club history reader] - line with keyword expected - got:"
          puts line
          exit 1
        end

        if    last_keyword == 'BANKRUPT'
           ## requires / expects one team in one line
           recs << [ last_keyword, last_season.key,
                     [ squish(line), last_country.key ]
                   ]
        elsif last_keyword == 'RENAME' ||
              last_keyword == 'REFORM' ||
              last_keyword == 'MOVE'
           ## requires / expects two teams in one line (separated by ⇒ or such)
           teams = line.split( '⇒' )
           if teams.size != 2
            puts "!!! ERROR [club history reader] - expected two teams - got:"
            pp teams
            exit 1
           end
           teams = teams.map {|team| squish(team.strip) }  ## remove whitespaces
           recs << [ last_keyword, last_season.key,
                     [ teams[0], last_country.key ],
                     [ teams[1], last_country.key ]
                   ]
        elsif last_keyword == 'MERGE'
            ## check if line starts with separator
            ## otherwise collect to be merged teams
            if line.start_with?( '⇒' )
              if last_teams.size < 2
                puts "!!! ERROR [club history reader] - expected two or more teams for MERGE - got:"
                pp last_teams
                exit 1
              end
              ## auto-add country to all teams
              teams = last_teams.map {|team| [team, last_country.key]}
              recs << [ last_keyword, last_season.key,
                        teams,
                        [ squish(line.sub('⇒','').strip), last_country.key ]
                      ]

              last_teams = []
            else
              last_teams << squish(line)
            end
        else
          puts "!!! ERROR [club history reader] - unknown keyword >#{last_keyword}<; cannot process; sorry"
          exit 1
        end
      end  # each line (in paragraph)
    else
      puts "** !!! ERROR [club history reader] - unknown line type:"
      pp node
      exit 1
    end
  end

  recs
end
squish( str ) click to toggle source

helper

# File lib/sportdb/formats/team/club_reader_history.rb, line 193
def squish( str )
  ## colapse all whitespace to one
  str.gsub( /[ ]+/,' ' )
end