module Fbref
Constants
- MAX_HEADERS
vacuum helper stuff - todo/fix - (re)use - make more generic - why? why not?
- MIN_HEADERS
Public Class Methods
build( rows, league:, season: )
click to toggle source
# File lib/football-sources/fbref/build.rb, line 4 def self.build( rows, league:, season: ) season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.) raise ArgumentError, "league key as string expected" unless league.is_a?(String) ## note: do NOT pass in league struct! pass in key (string) print " #{rows.size} rows - build #{league} #{season}" print "\n" recs = [] rows.each do |row| stage = row[:stage] || '' ## todo/check: assert that only matchweek or round can be present NOT both!! round = if row[:matchweek] && row[:matchweek].size > 0 row[:matchweek] elsif row[:round] && row[:round].size > 0 row[:round] else '' end date_str = row[:date] time_str = row[:time] team1_str = row[:team1] team2_str = row[:team2] score_str = row[:score] ## convert date from string e.g. 2019-25-10 date = Date.strptime( date_str, '%Y-%m-%d' ) comments = row[:comments] ht, ft, et, pen, comments = parse_score( score_str, comments ) venue_str = row[:venue] attendance_str = row[:attendance] recs << [stage, round, date.strftime( '%Y-%m-%d' ), time_str, team1_str, ft, ht, team2_str, et, # extra: incl. extra time pen, # extra: incl. penalties venue_str, attendance_str, comments] end recs end
convert( league:, season: )
click to toggle source
# File lib/football-sources/fbref/convert.rb, line 3 def self.convert( league:, season: ) page = Page::Schedule.from_cache( league: league, season: season ) puts page.title rows = page.matches recs = build( rows, league: league, season: season ) ## pp rows ## reformat date / beautify e.g. Sat Aug 7 1993 recs.each { |rec| rec[2] = Date.strptime( rec[2], '%Y-%m-%d' ).strftime( '%a %b %-d %Y' ) } recs, headers = vacuum( recs ) pp recs[0..2] season = Season.parse( season ) path = "#{config.convert.out_dir}/#{league}_#{season.to_path}.csv" puts "write #{path}..." Cache::CsvMatchWriter.write( path, recs, headers: headers ) end
parse_score( score_str, comments )
click to toggle source
# File lib/football-sources/fbref/build.rb, line 63 def self.parse_score( score_str, comments ) ## split score ft = '' ht = '' et = '' pen = '' if score_str.size > 0 ## note: replace unicode "fancy" dash with ascii-dash # check other columns too - possible in teams? score_str = score_str.gsub( /[–]/, '-' ).strip if score_str =~ /^\(([0-9]+)\) [ ]+ ([0-9]+) - ([0-9+]) [ ]+ \(([0-9]+)\)$/x ft = '?' et = "#{$2}-#{$3}" pen = "#{$1}-#{$4}" else ## assume "regular" score e.g. 0-0 ## check if notes include extra time otherwise assume regular time if comments =~ /extra time/i ft = '?' et = score_str else ft = score_str end end end [ht, ft, et, pen, comments] end
vacuum( rows, headers: MAX_HEADERS, fixed_headers: MIN_HEADERS )
click to toggle source
# File lib/football-sources/fbref/convert.rb, line 54 def self.vacuum( rows, headers: MAX_HEADERS, fixed_headers: MIN_HEADERS ) ## check for unused columns and strip/remove counter = Array.new( MAX_HEADERS.size, 0 ) rows.each do |row| row.each_with_index do |col, idx| counter[idx] += 1 unless col.nil? || col.empty? end end pp counter ## check empty columns headers = [] indices = [] empty_headers = [] empty_indices = [] counter.each_with_index do |num, idx| header = MAX_HEADERS[ idx ] if num > 0 || (num == 0 && fixed_headers.include?( header )) headers << header indices << idx else empty_headers << header empty_indices << idx end end if empty_indices.size > 0 rows = rows.map do |row| row_vacuumed = [] row.each_with_index do |col, idx| ## todo/fix: use values or such?? row_vacuumed << col unless empty_indices.include?( idx ) end row_vacuumed end end [rows, headers] end