module Fbref

Constants

MAX_HEADERS

vacuum helper stuff - todo/fix - (re)use - make more generic - why? why not?

MIN_HEADERS

Public Class Methods

build( rows, league:, season: ) click to toggle source
# File lib/football-sources/fbref/build.rb, line 4
def self.build( rows, league:, season: )
  season = Season( season )  ## cast (ensure) season class (NOT string, integer, etc.)

  raise ArgumentError, "league key as string expected"  unless league.is_a?(String)  ## note: do NOT pass in league struct! pass in key (string)

  print "  #{rows.size} rows - build #{league} #{season}"
  print "\n"


  recs = []
  rows.each do |row|

    stage  =  row[:stage] || ''

    ## todo/check:  assert that only matchweek or round can be present NOT both!!
    round  =  if row[:matchweek] && row[:matchweek].size > 0
                row[:matchweek]
              elsif row[:round] && row[:round].size > 0
                row[:round]
              else
                ''
              end

    date_str  = row[:date]
    time_str  = row[:time]
    team1_str = row[:team1]
    team2_str = row[:team2]
    score_str = row[:score]

    ## convert date from string e.g. 2019-25-10
    date = Date.strptime( date_str, '%Y-%m-%d' )

    comments = row[:comments]
    ht, ft, et, pen, comments = parse_score( score_str, comments )


    venue_str =      row[:venue]
    attendance_str = row[:attendance]


    recs <<  [stage,
              round,
              date.strftime( '%Y-%m-%d' ),
              time_str,
              team1_str,
              ft,
              ht,
              team2_str,
              et,              # extra: incl. extra time
              pen,             # extra: incl. penalties
              venue_str,
              attendance_str,
              comments]
  end

  recs
end
convert( league:, season: ) click to toggle source
# File lib/football-sources/fbref/convert.rb, line 3
def self.convert( league:, season: )
  page = Page::Schedule.from_cache( league: league,
                                    season: season )

  puts page.title

  rows = page.matches
  recs = build( rows, league: league, season: season )
  ## pp rows

  ## reformat date / beautify e.g. Sat Aug 7 1993
  recs.each { |rec| rec[2] = Date.strptime( rec[2], '%Y-%m-%d' ).strftime( '%a %b %-d %Y' ) }

  recs, headers = vacuum( recs )
  pp recs[0..2]

  season = Season.parse( season )
  path = "#{config.convert.out_dir}/#{league}_#{season.to_path}.csv"
  puts "write #{path}..."
  Cache::CsvMatchWriter.write( path, recs, headers: headers )
end
parse_score( score_str, comments ) click to toggle source
# File lib/football-sources/fbref/build.rb, line 63
def self.parse_score( score_str, comments )

  ## split score
  ft  = ''
  ht  = ''
  et  = ''
  pen = ''

  if score_str.size > 0
    ## note: replace unicode "fancy" dash with ascii-dash
    #  check other columns too - possible in teams?
    score_str = score_str.gsub( /[–]/, '-' ).strip

    if score_str =~ /^\(([0-9]+)\)
                        [ ]+ ([0-9]+) - ([0-9+]) [ ]+
                      \(([0-9]+)\)$/x
      ft  = '?'
      et  = "#{$2}-#{$3}"
      pen = "#{$1}-#{$4}"
    else  ## assume "regular" score e.g. 0-0
          ## check if notes include extra time otherwise assume regular time
      if comments =~ /extra time/i
        ft = '?'
        et = score_str
      else
        ft = score_str
      end
    end
  end

  [ht, ft, et, pen, comments]
end
vacuum( rows, headers: MAX_HEADERS, fixed_headers: MIN_HEADERS ) click to toggle source
# File lib/football-sources/fbref/convert.rb, line 54
def self.vacuum( rows, headers: MAX_HEADERS, fixed_headers: MIN_HEADERS )
  ## check for unused columns and strip/remove
  counter = Array.new( MAX_HEADERS.size, 0 )
  rows.each do |row|
     row.each_with_index do |col, idx|
       counter[idx] += 1  unless col.nil? || col.empty?
     end
  end

  pp counter

  ## check empty columns
  headers       = []
  indices       = []
  empty_headers = []
  empty_indices = []

  counter.each_with_index do |num, idx|
     header = MAX_HEADERS[ idx ]
     if num > 0 || (num == 0 && fixed_headers.include?( header ))
       headers << header
       indices << idx
     else
       empty_headers << header
       empty_indices << idx
     end
  end

  if empty_indices.size > 0
    rows = rows.map do |row|
             row_vacuumed = []
             row.each_with_index do |col, idx|
               ## todo/fix: use values or such??
               row_vacuumed << col   unless empty_indices.include?( idx )
             end
             row_vacuumed
         end
    end

  [rows, headers]
end