class SportDb::MatchParser

Constants

HEADER_SEP_RE

split by or || or |||

or ++ or +++
or -- or ---
or // or ///

note: allow Final | First Leg as ONE name same as

           Final - First Leg or
           Final, First Leg
for cut-off always MUST be more than two chars

todo/check: find a better name than HEADER_SEP(ARATOR) - why? why not?

todo/fix: move to parser utils and add a method split_name or such?
ROUND_EXTRA_WORDS_RE

quick hack- collect all “fillwords” by language!!!!

  change later  and add to sportdb-langs!!!!

  strip all "fillwords" e.g.:
    Nachtrag/Postponed/Addition/Supplemento names

todo/change: find a better name for ROUND_EXTRA_WORDS - why? why not?

Public Class Methods

new( lines, teams, start ) click to toggle source
# File lib/sportdb/formats/match/match_parser.rb, line 20
def initialize( lines, teams, start )
  # for convenience split string into lines
  ##    note: removes/strips empty lines
  ## todo/check: change to text instead of array of lines - why? why not?
  @lines        = lines.is_a?( String ) ? read_lines( lines ) : lines

  @mapper_teams = TeamMapper.new( teams )
  @start        = start
end
parse( lines, teams, start: ) click to toggle source
# File lib/sportdb/formats/match/match_parser.rb, line 7
def self.parse( lines, teams, start: )
  ##  todo/fix: add support for txt and lines
  ##    check if lines_or_txt is an array or just a string
  ##   use teams: like start:  why? why not?
  parser = new( lines, teams, start )
  parser.parse
end

Public Instance Methods

find_date!( line, start: ) click to toggle source
# File lib/sportdb/formats/match/match_parser.rb, line 550
def find_date!( line, start: )
  ## NB: lets us pass in start_at/end_at date (for event)
  #   for auto-complete year

  # extract date from line
  # and return it
  # NB: side effect - removes date from line string
  DateFormats.find!( line, start: start )
end
find_group_name!( line ) click to toggle source
# File lib/sportdb/formats/match/match_parser.rb, line 118
def find_group_name!( line )
  ## group pos - for now support single digit e.g 1,2,3 or letter e.g. A,B,C or HEX
  ## nb:  (?:)  = is for non-capturing group(ing)

  ## fix:
  ##   get Group|Gruppe|Grupo from lang!!!! do NOT hardcode in place

  ## todo:
  ##   check if Group A:  or [Group A]  works e.g. : or ] get matched by \b ???
  regex = /\b
            (?:
              (Group | Gruppe | Grupo)
                 [ ]+
              (\d+ | [A-Z]+)
            )
          \b/x

  m = regex.match( line )

  return nil    if m.nil?

  name = m[0]

  logger.debug "   name: >#{name}<"

  line.sub!( name, '[GROUP.NAME]' )

  name
end
find_round_def_name!( line ) click to toggle source
# File lib/sportdb/formats/match/match_parser.rb, line 193
def find_round_def_name!( line )
  # assume everything before pipe (\) is the round name
  #  strip [ROUND.POS],  todo:?? [ROUND.NAME2]

  # todo/fix: add name2 w/  // or /  why? why not?
  #  -- strip / or / chars

  buf = line.dup
  logger.debug "  find_round_def_name! line-before: >>#{buf}<<"

  ## cut-off everything after (including) pipe (|)
  buf = buf[ 0...buf.index('|') ]
  buf.strip!

  logger.debug "  find_round_def_name! line-after: >>#{buf}<<"

  logger.debug "   name: >>#{buf}<<"
  line.sub!( buf, '[ROUND.NAME]' )

  buf
end
find_round_header_name!( line ) click to toggle source
# File lib/sportdb/formats/match/match_parser.rb, line 236
def find_round_header_name!( line )
  # assume everything left is the round name
  #  extract all other items first (round name2, round pos, group name n pos, etc.)

  buf = line.dup
  logger.debug "  find_round_header_name! line-before: >>#{buf}<<"


  parts = buf.split( HEADER_SEP_RE )
  buf = parts[0]

  buf.strip!    # remove leading and trailing whitespace

  logger.debug "  find_round_name! line-after: >>#{buf}<<"

  ### bingo - assume what's left is the round name

  logger.debug "   name: >>#{buf}<<"
  line.sub!( buf, '[ROUND.NAME]' )

  buf
end
find_score!( line ) click to toggle source
# File lib/sportdb/formats/match/match_parser.rb, line 297
def find_score!( line )
  # note: always call after find_dates !!!
  #  scores match date-like patterns!!  e.g. 10-11  or 10:00 etc.
  #   -- note: score might have two digits too

  ScoreFormats.find!( line )
end
find_status!( line ) click to toggle source
# File lib/sportdb/formats/match/match_parser.rb, line 305
def find_status!( line )
  StatusParser.find!( line )
end
parse() click to toggle source
# File lib/sportdb/formats/match/match_parser.rb, line 31
def parse
  @last_date    = nil
  @last_round   = nil
  @last_group   = nil

  @rounds  = {}
  @groups  = {}
  @matches = []

  @warns        = []    ## track list of warnings (unmatched lines)  too - why? why not?


  @lines.each do |line|
    if is_goals?( line )
      logger.debug "skipping matched goals line: >#{line}<"
    elsif is_round_def?( line )
      ## todo/fix:  add round definition (w begin n end date)
      ## todo: do not patch rounds with definition (already assume begin/end date is good)
      ##  -- how to deal with matches that get rescheduled/postponed?
      parse_round_def( line )
    elsif is_round?( line )
      parse_round_header( line )
    elsif is_group_def?( line ) ## NB: group goes after round (round may contain group marker too)
      ### todo: add pipe (|) marker (required)
      parse_group_def( line )
    elsif is_group?( line )
      ##  -- lets you set group  e.g. Group A etc.
      parse_group_header( line )
    elsif try_parse_game( line )
      # do nothing here
    elsif try_parse_date_header( line )
      # do nothing here
    else
      logger.warn "skipping line (no match found): >#{line}<"
      @warns << line
    end
  end # lines.each

  [@matches, @rounds.values, @groups.values]
end
parse_date_header( line ) click to toggle source
# File lib/sportdb/formats/match/match_parser.rb, line 561
def parse_date_header( line )
  # note: returns true if parsed, false if no match

  # line with NO teams  plus include date e.g.
  #   [Fri Jun/17]  or
  #   Jun/17  or
  #   Jun/17:   etc.

  @mapper_teams.map_teams!( line )
  teams = @mapper_teams.find_teams!( line )
  team1 = teams[0]
  team2 = teams[1]

  date = find_date!( line, start: @start )

  if date && team1.nil? && team2.nil?
    logger.debug( "date header line found: >#{line}<")
    logger.debug( "    date: #{date} with start: #{@start}")

    @last_date = date   # keep a reference for later use

    ###  quick "corona" hack - support seasons going beyond 12 month (see swiss league 2019/20 and others!!)
    ##    find a better way??
    ##  set @start date to full year (e.g. 1.1.) if date.year  is @start.year+1
    ##   todo/fix: add to linter to check for chronological dates!! - warn if NOT chronological
    ###  todo/check: just turn on for 2019/20 season or always? why? why not?

    ## todo/fix: add switch back to old @start_org
    ##   if year is date.year == @start.year-1    -- possible when full date with year set!!!
    if @start.month != 1
       if date.year == @start.year+1
         logger.debug( "!! hack - extending start date to full (next/end) year; assumes all dates are chronologigal - always moving forward" )
         @start_org = @start   ## keep a copy of the original (old) start date - why? why not? - not used for now
         @start = Date.new( @start.year+1, 1, 1 )
       end
    end

    true
  else
    false
  end
end
parse_game( line ) click to toggle source
# File lib/sportdb/formats/match/match_parser.rb, line 317
  def parse_game( line )
    logger.debug "parsing game (fixture) line: >#{line}<"

    ## split by geo (@) - remove for now
    ##   split into parts e.g. break using @ !!!
    values = line.split( '@' )
    line = values[0]


    @mapper_teams.map_teams!( line )   ### todo/fix: limit mapping to two(2) teams - why? why not?  might avoid matching @ Barcelona ??
    teams = @mapper_teams.find_teams!( line )
    team1 = teams[0]
    team2 = teams[1]

    ## note: if we do NOT find two teams; return false - no match found
    if team1.nil? || team2.nil?
      logger.debug "  no game match (two teams required) found for line: >#{line}<"
      return false
    end

    ## find (optional) match status e.g. [abandoned] or [replay] or [awarded]
    ##                                   or [cancelled] or [postponed] etc.
    status = find_status!( line )   ## todo/check: allow match status also in geo part (e.g. after @) - why? why not?

    ## pos = find_game_pos!( line )

      date      = find_date!( line, start: @start )

    ###
    # check if date found?
    #   note: ruby falsey is nil & false only (not 0 or empty array etc.)
    if date
      ### check: use date_v2 if present? why? why not?
      @last_date = date    # keep a reference for later use
    else
      date = @last_date    # no date found; (re)use last seen date
    end


    score = find_score!( line )

    logger.debug "  line: >#{line}<"


    round = nil
    if @last_round
      round = @last_round
    else
      ## find (first) matching round by date if rounds / matchdays defined
      ##   if not rounds / matchdays defined - YES, allow matches WITHOUT rounds!!!
      if @rounds.size > 0
        @rounds.values.each do |round_rec|
          ## note: convert date to date only (no time) with to_date!!!
          if (round_rec.start_date && round_rec.end_date) &&
             (date.to_date >= round_rec.start_date &&
             date.to_date <= round_rec.end_date)
            round = round_rec
            break
          end
        end
        if round.nil?
          puts "!! ERROR - no matching round found for match date:"
          pp date
          exit 1
        end
      end
    end


    ## todo/check: scores are integers or strings?

    ## todo/check: pass along round and group refs or just string (canonical names) - why? why not?

    @matches << Import::Match.new( date:    date,
                                   team1:   team1,  ## note: for now always use mapping value e.g. rec (NOT string e.g. team1.name)
                                   team2:   team2,  ## note: for now always use mapping value e.g. rec (NOT string e.g. team2.name)
                                   score:   score,
                                   round:   round       ? round.name       : nil,   ## note: for now always use string (assume unique canonical name for event)
                                   group:   @last_group ? @last_group.name : nil,   ## note: for now always use string (assume unique canonical name for event)
                                   status:  status )
    ### todo: cache team lookups in hash?

=begin
    team1 = Team.find_by_key!( team1_key )
    team2 = Team.find_by_key!( team2_key )

    @last_team1 = team1    # store for later use for goals etc.
    @last_team2 = team2


    if @round.nil?
      ## no round header found; calculate round from date

      ###
      ## todo/fix: add some unit tests for round look up
      #  fix: use date_v2 if present!! (old/original date; otherwise use date)

      #
      # fix: check - what to do with hours e.g. start_at use 00:00 and for end_at use 23.59 ??
      #  -- for now - remove hours (e.g. use end_of_day and beginnig_of_day)

      ##
      # note: start_at and end_at are dates ONLY (note datetime)
      #  - do NOT pass in hours etc. in query
      #  again use -->  date.end_of_day, date.beginning_of_day
      #  new: not working:  date.to_date, date.to_date
      #    will not find round if  start_at same as date !! (in theory hours do not matter)

      ###
      # hack:
      #  special case for sqlite3 (date compare not working reliable; use casts)
      #  fix: move to  adapter_name to activerecord_utils as sqlite? or similar?

      if ActiveRecord::Base.connection.adapter_name.downcase.starts_with?( 'sqlite' )
        logger.debug( "  [sqlite] using sqlite-specific query for date compare for rounds finder" )
        round = Round.where( 'event_id = ? AND (    julianday(start_at) <= julianday(?)'+
                                               'AND julianday(end_at)   >= julianday(?))',
                               @event.id, date.to_date, date.to_date).first
      else  # all other dbs (postgresql, mysql, etc.)
        round = Round.where( 'event_id = ? AND (start_at <= ? AND end_at >= ?)',
                             @event.id, date.to_date, date.to_date).first
      end

      pp round
      if round.nil?
        logger.warn( "  !!!! no round match found for date #{date}" )
        pp Round.all

        ###################################
        # -- try auto-adding matchday
        round = Round.new

        round_attribs = {
          event_id: @event.id,
          name: "Matchday #{date.to_date}",
          pos: 999001+@patch_round_ids_pos.length,   # e.g. 999<count> - 999001,999002,etc.
          start_at:  date.to_date,
          end_at:    date.to_date
        }

        logger.info( "  auto-add round >Matchday #{date.to_date}<" )
        logger.debug round_attribs.to_json

        round.update_attributes!( round_attribs )

        @patch_round_ids_pos << round.id   # todo/check - add just id or "full" record as now - why? why not?
      end

      # store pos for auto-number next round if missing
      #  - note: only if greater/bigger than last; use max
      #  - note: last_round_pos might be nil - thus set to 0
      if round.pos > 999000
        # note: do NOT update last_round_pos for to-be-patched rounds
      else
        @last_round_pos = [round.pos,@last_round_pos||0].max
      end

      ## note: will crash (round.pos) if round is nil
      logger.debug( "  using round #{round.pos} >#{round.name}< start_at: #{round.start_at}, end_at: #{round.end_at}" )
    else
      ## use round from last round header
      round = @round
    end


    ### check if games exists
    ##  with this teams in this round if yes only update
    game = Game.find_by_round_id_and_team1_id_and_team2_id(
                         round.id, team1.id, team2.id
    )

    game_attribs = {
      score1i:   scores[0],
      score2i:   scores[1],
      score1:    scores[2],
      score2:    scores[3],
      score1et:  scores[4],
      score2et:  scores[5],
      score1p:   scores[6],
      score2p:   scores[7],
      play_at:    date,
      play_at_v2: date_v2,
      postponed: postponed,
      knockout:  round.knockout,   ## note: for now always use knockout flag from round - why? why not??
      ground_id: ground.present? ? ground.id : nil,
      group_id:  @group.present? ? @group.id : nil
    }

    game_attribs[ :pos ] = pos   if pos.present?

    ####
    # note: only update if any changes (or create if new record)
    if game.present? &&
       game.check_for_changes( game_attribs ) == false
          logger.debug "  skip update game #{game.id}; no changes found"
    else
      if game.present?
        logger.debug "update game #{game.id}:"
      else
        logger.debug "create game:"
        game = Game.new

        more_game_attribs = {
          round_id: round.id,
          team1_id: team1.id,
          team2_id: team2.id
        }

        ## NB: use round.games.count for pos
        ##  lets us add games out of order if later needed
        more_game_attribs[ :pos ] = round.games.count+1   if pos.nil?

        game_attribs = game_attribs.merge( more_game_attribs )
      end

      logger.debug game_attribs.to_json
      game.update_attributes!( game_attribs )
    end

    @last_game = game   # store for later reference (e.g. used for goals etc.)
=end

    return true   # game match found
  end
parse_group_def( line ) click to toggle source
# File lib/sportdb/formats/match/match_parser.rb, line 100
def parse_group_def( line )
  logger.debug "parsing group def line: >#{line}<"

  @mapper_teams.map_teams!( line )
  teams = @mapper_teams.find_teams!( line )

  name = find_group_name!( line )

  logger.debug "  line: >#{line}<"

  ## todo/check/fix: add back group key - why? why not?
  group = Import::Group.new( name:  name,
                             teams: teams.map {|team| team.name } )

  @groups[ name ] = group
end
parse_group_header( line ) click to toggle source
# File lib/sportdb/formats/match/match_parser.rb, line 74
def parse_group_header( line )
  logger.debug "parsing group header line: >#{line}<"

  # note: group header resets (last) round  (allows, for example):
  #  e.g.
  #  Group Playoffs/Replays       -- round header
  #    team1 team2                -- match
  #  Group B:                     -- group header
  #    team1 team2 - match  (will get new auto-matchday! not last round)
  @last_round     = nil

  name = find_group_name!( line )

  logger.debug "    name: >#{name}<"
  logger.debug "  line: >#{line}<"

  group = @groups[ name ]
  if group.nil?
    puts "!! ERROR - no group def found for >#{name}<"
    exit 1
  end

  # set group for games
  @last_group = group
end
parse_round_def( line ) click to toggle source
# File lib/sportdb/formats/match/match_parser.rb, line 149
def parse_round_def( line )
  logger.debug "parsing round def line: >#{line}<"

  start_date = find_date!( line, start: @start )
  end_date   = find_date!( line, start: @start )

  # note: if end_date missing -- assume start_date is (==) end_at
  end_date = start_date  if end_date.nil?

  # note: - NOT needed; start_at and end_at are saved as date only (NOT datetime)
  #  set hours,minutes,secs to beginning and end of day (do NOT use default 12.00)
  #   e.g. use 00.00 and 23.59
  # start_at = start_at.beginning_of_day
  # end_at   = end_at.end_of_day

  # note: make sure start_at/end_at is date only (e.g. use start_at.to_date)
  #   sqlite3 saves datetime in date field as datetime, for example (will break date compares later!)
  start_date = start_date.to_date
  end_date   = end_date.to_date


  name  = find_round_def_name!( line )
  # NB: use extracted round name for knockout check
  knockout_flag = is_knockout_round?( name )


  logger.debug "    start_date: #{start_date}"
  logger.debug "    end_date:   #{end_date}"
  logger.debug "    name:    >#{name}<"
  logger.debug "    knockout_flag:   #{knockout_flag}"

  logger.debug "  line: >#{line}<"

  round = Import::Round.new( name:       name,
                             start_date: start_date,
                             end_date:   end_date,
                             knockout:   knockout_flag,
                             auto:       false )

  @rounds[ name ] = round
end
parse_round_header( line ) click to toggle source
# File lib/sportdb/formats/match/match_parser.rb, line 274
def parse_round_header( line )
  logger.debug "parsing round header line: >#{line}<"

  name = find_round_header_name!( line )

  logger.debug "  line: >#{line}<"

  name = name.sub( ROUND_EXTRA_WORDS_RE, '' )
  name = name.strip

  round = @rounds[ name ]
  if round.nil?    ## auto-add / create if missing
    ## todo/check: add num (was pos) if present - why? why not?
    round = Import::Round.new( name: name )
    @rounds[ name ] = round
  end

  ## todo/check: if pos match (MUST always match for now)
  @last_round = round
  @last_group = nil   # note: reset group to no group - why? why not?
end
try_parse_date_header( line ) click to toggle source
# File lib/sportdb/formats/match/match_parser.rb, line 544
def try_parse_date_header( line )
  # note: clone line; for possible test do NOT modify in place for now
  # note: returns true if parsed, false if no match
  parse_date_header( line.dup )
end
try_parse_game( line ) click to toggle source
# File lib/sportdb/formats/match/match_parser.rb, line 310
def try_parse_game( line )
  # note: clone line; for possible test do NOT modify in place for now
  # note: returns true if parsed, false if no match
  parse_game( line.dup )
end