class SportDb::Import::WikiReader

Public Class Methods

new( txt ) click to toggle source
# File lib/sportdb/formats/team/wiki_reader.rb, line 30
def initialize( txt )
  @txt = txt
end
parse( txt ) click to toggle source
# File lib/sportdb/formats/team/wiki_reader.rb, line 26
def self.parse( txt )
  new( txt ).parse
end
read( path ) click to toggle source
# File lib/sportdb/formats/team/wiki_reader.rb, line 21
def self.read( path )   ## use - rename to read_file or from_file etc. - why? why not?
  txt = File.open( path, 'r:utf-8' ) { |f| f.read }
  parse( txt )
end

Public Instance Methods

catalog() click to toggle source
# File lib/sportdb/formats/team/wiki_reader.rb, line 18
def catalog() Import.catalog; end
parse() click to toggle source
# File lib/sportdb/formats/team/wiki_reader.rb, line 34
def parse
  recs = []
  last_country = nil  ## note: supports only one level of headings for now (and that is a country)

  @txt.each_line do |line|
    line = line.strip

    next if line.empty?
    next if line.start_with?( '#' )   ## skip comments too

    ## strip inline (until end-of-line) comments too
    ##  e.g Eupen        => KAS Eupen,    ## [de]
    ##   => Eupen        => KAS Eupen,
    line = line.sub( /#.*/, '' ).strip
    pp line


    next if line =~ /^={1,}$/          ## skip "decorative" only heading e.g. ========

     ## note: like in wikimedia markup (and markdown) all optional trailing ==== too
     ##  todo/check:  allow ===  Text  =-=-=-=-=-=   too - why? why not?
    if line =~ /^(={1,})       ## leading ======
                 ([^=]+?)      ##  text   (note: for now no "inline" = allowed)
                 =*            ## (optional) trailing ====
                 $/x
       heading_marker = $1
       heading_level  = $1.length   ## count number of = for heading level
       heading        = $2.strip

       puts "heading #{heading_level} >#{heading}<"

       if heading_level > 1
         puts "** !!! ERROR [wiki reader] !!! -  - headings level too deep - only top / one level supported for now; sorry"
         exit 1
       end

      ## assume country in heading; allow all "formats" supported by parse e.g.
      ##   Österreich • Austria (at)
      ##   Österreich • Austria
      ##   Austria
      ##   Deutschland (de) • Germany
      country = catalog.countries.parse( heading )
      ## check country code - MUST exist for now!!!!
      if country.nil?
        puts "!!! error [wiki reader] - unknown country >#{heading}< - sorry - add country to config to fix"
        exit 1
      end

      last_country = country
      pp last_country
    else
      ## strip and  squish (white)spaces
      #   e.g. New York FC      (2011-)  => New York FC (2011-)
      value = line.strip.gsub( /[ \t]+/, ' ' )

      ## normalize (allow underscore (-) - replace with space)
      ##  e.g. Cercle_Brugge_K.S.V. =>  Cercle Brugge K.S.V.
      value = value.gsub( '_', ' ' )

      if last_country.nil?
        puts "** !!! ERROR [wiki reader] !!! - country heading missing for club name; sorry - add country heading to fix"
        exit 1
      end

      rec = WikiClub.new( value, last_country )
      recs << rec
    end
  end  # each_line
  recs
end