class Worldfootball::Page::Schedule

Constants

REF_SCORE_RE

todo/check - rename/use HREF and not REF - why? why not?

REF_SEASON_RE
REF_TEAM_RE

Public Class Methods

from_cache( slug ) click to toggle source
# File lib/webget-football/worldfootball/page_schedule.rb, line 8
def self.from_cache( slug )
  url  = Metal.schedule_url( slug )
  html = Webcache.read( url )
  new( html )
end

Public Instance Methods

matches() click to toggle source
# File lib/webget-football/worldfootball/page_schedule.rb, line 16
 def matches
   @matches ||= begin

# <div class="data">
# <table class="standard_tabelle" cellpadding="3" cellspacing="1">

## note: use > for "strict" sibling (child without any in-betweens)
table = doc.css( 'div.data > table.standard_tabelle' ).first    ## get table
# puts table.class.name  #=> Nokogiri::XML::Element
# puts table.text

trs   = table.css( 'tr' )
# puts trs.size
i = 0

last_date_str = nil
last_round    = nil

rows = []

trs.each do |tr|
  i += 1


  if tr.text.strip =~ /Spieltag/ ||
     tr.text.strip =~ /[1-9]\.[ ]Runde|
                          Qual\.[ ][1-9]\.[ ]Runde|  # see EL or CL Quali
                          Qualifikation|     # see CA Championship
                          Sechzehntelfinale|   # see EL
                          Achtelfinale|
                          Viertelfinale|
                          Halbfinale|
                          Finale|
                          Gruppe[ ][A-Z]|    # see CL
                          Playoffs           # see EL Quali
                          /x
    puts
    print '[%03d] ' % i
    ## print squish( tr.text )
    print "round >#{tr.text.strip}<"
    print "\n"

    last_round = tr.text.strip
  else   ## assume table row (tr) is match line
    tds = tr.css( 'td' )

    date_str  = squish( tds[0].text )
    time_str  = squish( tds[1].text )

    # was: team1_str = squish( tds[2].text )

    ## <td><a href="/teams/hibernian-fc/" title="Hibernian FC">Hibernian FC</a></td>
    ##  todo/check: check if tooltip title always equals text - why? why not?
    team1_anchor = tds[2].css( 'a' )[0]
    if team1_anchor  # note: <a> might be optional (and team name only be plain text)
      team1_str    = squish( team1_anchor.text )
      team1_ref    = norm_team_ref( team1_anchor[:href] )
    else
      team1_str    = squish( tds[2].text )
      team1_ref    = nil
      puts "!! WARN: no team1_ref for >#{team1_str}< found"
    end

    ##  <td> - </td>
    ## e.g. -
    vs_str =    squish( tds[3].text )  ## use to assert column!!!
    assert( vs_str == '-',  "- for vs. expected; got #{vs_str}")
    ## was: team2_str = squish( tds[4].text )

    ## <td><a href="/teams/st-johnstone-fc/" title="St. Johnstone FC">St. Johnstone FC</a></td>
    team2_anchor = tds[4].css( 'a' )[0]
    if team2_anchor
      team2_str    = squish( team2_anchor.text )
      team2_ref    = norm_team_ref( team2_anchor[:href] )
    else
      team2_str    = squish( tds[4].text )
      team2_ref    = nil
      puts "!! WARN: no team2_ref for >#{team2_str}< found"
    end

    ### was: score_str = squish( tds[5].text )
    ## <a href="/spielbericht/premiership-2020-2021-hibernian-fc-st-johnstone-fc/" title="Spielschema Hibernian FC - St. Johnstone FC">-:-</a>

    score_anchor = tds[5].css( 'a' )[0]
    if score_anchor   ## note: score ref (match report) is optional!!!!
      score_str    = squish( score_anchor.text )
      score_ref    = norm_score_ref( score_anchor[:href] )
    else
      score_str    = squish( tds[5].text )
      score_ref    = nil
    end


    ##  todo - find a better way to check for live match
    ## check for live badge image
    ## <td>
    ##   <img src="https://s.hs-data.com/bilder/shared/live/2.png" /></a>
    ## </td>
    img = tds[6].css( 'img' )[0]
    if img && img[:src].index( '/live/')
      puts "!! WARN: live match badge, resetting score from #{score_str} to -:-"
      score_str = '-:-'  # note: -:- gets replaced to ---
    end


    date_str = last_date_str    if date_str.empty?

    print '[%03d]    ' % i
    print "%-10s | " % date_str
    print "%-5s | " % time_str
    print "%-22s | " % team1_str
    print "%-22s | " % team2_str
    print "%-10s | " % score_str
    print (score_ref ? score_ref : 'n/a')
    print "\n"


    ## change  2:1 (1:1)  to 2-1 (1-1)
    score_str = score_str.gsub( ':', '-' )

    ## convert date from 25.10.2019 to 2019-25-10
    date     = Date.strptime( date_str, '%d.%m.%Y' )

    ## note: keep structure flat for now
    ##        (AND not nested e.g. team:{text:,ref:}) - why? why not?
    rows << { round:      last_round,
              date:       date.strftime( '%Y-%m-%d' ),
              time:       time_str,
              team1:      team1_str,
              team1_ref:  team1_ref,
              score:      score_str,
              team2:      team2_str,
              team2_ref:  team2_ref,
              report_ref: score_ref
            }

    last_date_str = date_str
  end
 end # each tr (table row)

   rows
  end
end
norm_score_ref( str ) click to toggle source
# File lib/webget-football/worldfootball/page_schedule.rb, line 221
def norm_score_ref( str )
  ## check ref format / path
  if m=REF_SCORE_RE.match( str )
    m[1]
  else
    puts "!! ERROR: unexpected score href format >#{str}<"
    exit 1
  end
end
norm_season_ref( str ) click to toggle source
# File lib/webget-football/worldfootball/page_schedule.rb, line 249
def norm_season_ref( str )
  ## check ref format / path
  if m=REF_SEASON_RE.match( str )
    m[1]
  else
    puts "!! ERROR: unexpected season href format >#{str}<"
    exit 1
  end
end
norm_team_ref( str ) click to toggle source
# File lib/webget-football/worldfootball/page_schedule.rb, line 235
def norm_team_ref( str )
  ## check ref format / path
  if m=REF_TEAM_RE.match( str )
    m[1]
  else
    puts "!! ERROR: unexpected team href format >#{str}<"
    exit 1
  end
end
rounds() click to toggle source
# File lib/webget-football/worldfootball/page_schedule.rb, line 183
def rounds
 @rounds ||= begin
    h = {}
    matches.each do |match|
      rec = h[ match[:round] ] ||= { count: 0,
                                     name: match[ :round] }
       rec[ :count ] += 1
    end

    h.values
 end
end
seasons() click to toggle source
# File lib/webget-football/worldfootball/page_schedule.rb, line 197
 def seasons
  # <select name="saison" ...
  @seasons ||= begin
     recs = []
     season = doc.css( 'select[name="saison"]').first
     options = season.css( 'option' )

     options.each do |option|
        recs << { text: squish( option.text ),
                  ref:  norm_season_ref( option[:value] )
                }
     end
     recs
  end
end
teams() click to toggle source
# File lib/webget-football/worldfootball/page_schedule.rb, line 162
def teams
 @teams ||= begin
    h = {}
    matches.each do |match|
      ## index by name/text for now NOT ref - why? why not?
      [{text: match[:team1],
        ref:  match[:team1_ref]},
       {text: match[:team2],
        ref:  match[:team2_ref]}].each do |team|
        rec = h[ team[:text] ] ||= { count: 0,
                                     name: team[ :text],
                                     ref:  team[ :ref ] }
        rec[ :count ] += 1
        ## todo/check:  check/assert that name and ref are always equal - why? why not?
      end
    end

    h.values
 end
end