class SportDb::Package
Constants
- CLUBS_HISTORY_RE
- CLUBS_PROPS_RE
todo/fix: rename to CLUBS too e.g. CLUBS_PROPS to reflect filename - why? why not?
- CLUBS_RE
clubs.txt or clubs_en.txt
remove support for en.clubs.txt - why? why not?
- CLUBS_WIKI_RE
- CLUB_PROPS_RE
- CONF_RE
todo/fix: make all regexes case-insensitive with /i option - why? why not?
e.g. .TXT and .txt yes!! use /i option!!!!!
- CSV_RE
add “generic” pattern to find all csv datafiles
- LEAGUES_RE
leagues.txt or leagues_en.txt
remove support for en.leagues.txt - why? why not?
- MATCH_CSV_RE
- MATCH_RE
note: if pattern includes directory add here
(otherwise move to more "generic" datafile) - why? why not?
- SEASON
- SEASONS_RE
seasons.txt or seasons_en.txt
remove support for br.seasons.txt - why? why not?
- SEASON_RE
todo/fix: change SEASON_RE to SEASON_KEY_RE (avoid confusion w/ SEASONS_RE for datafile?) - why? why not? !!!!!!!
season folder:
e.g. /2019-20 or
year-only e.g. /2019 or
/2016--france
- TEAMS_RE
teams.txt or teams_history.txt
Attributes
attr_reader :pack ## allow access to embedded (“low-level”) delegate package (or hide!?) - why? why not?
attr_reader :pack ## allow access to embedded (“low-level”) delegate package (or hide!?) - why? why not?
Public Class Methods
move class-level “static” finders to DirPackage
(do NOT work for now for zip packages) - why? why not?
# File lib/sportdb/formats/package.rb, line 106 def self.find( path, pattern ) datafiles = [] ## check all txt files ## note: incl. files starting with dot (.)) as candidates (normally excluded with just *) candidates = Dir.glob( "#{path}/**/{*,.*}.*" ) pp candidates candidates.each do |candidate| datafiles << candidate if pattern.match( candidate ) end pp datafiles datafiles end
# File lib/sportdb/formats/package.rb, line 125 def self.find_clubs( path, pattern: CLUBS_RE ) find( path, pattern ); end
# File lib/sportdb/formats/package.rb, line 127 def self.find_clubs_history( path, pattern: CLUBS_HISTORY_RE ) find( path, pattern ); end
# File lib/sportdb/formats/package.rb, line 126 def self.find_clubs_wiki( path, pattern: CLUBS_WIKI_RE ) find( path, pattern ); end
# File lib/sportdb/formats/package.rb, line 141 def self.find_conf( path, pattern: CONF_RE ) find( path, pattern ); end
# File lib/sportdb/formats/package.rb, line 134 def self.find_leagues( path, pattern: LEAGUES_RE ) find( path, pattern ); end
# File lib/sportdb/formats/package.rb, line 144 def self.find_match( path, format: 'txt' ) if format == 'csv' find( path, MATCH_CSV_RE ) else ## otherwise always assume txt for now find( path, MATCH_RE ) end end
# File lib/sportdb/formats/package.rb, line 137 def self.find_seasons( path, pattern: SEASONS_RE ) find( path, pattern ); end
# File lib/sportdb/formats/package.rb, line 122 def self.find_teams( path, pattern: TEAMS_RE ) find( path, pattern ); end
# File lib/sportdb/formats/package.rb, line 129 def self.match_clubs( path ) CLUBS_RE.match( path ); end
# File lib/sportdb/formats/package.rb, line 131 def self.match_clubs_history( path ) CLUBS_HISTORY_RE.match( path); end
# File lib/sportdb/formats/package.rb, line 132 def self.match_clubs_props( path, pattern: CLUBS_PROPS_RE ) pattern.match( path ); end
# File lib/sportdb/formats/package.rb, line 130 def self.match_clubs_wiki( path ) CLUBS_WIKI_RE.match( path ); end
# File lib/sportdb/formats/package.rb, line 142 def self.match_conf( path ) CONF_RE.match( path ); end
# File lib/sportdb/formats/package.rb, line 135 def self.match_leagues( path ) LEAGUES_RE.match( path ); end
# File lib/sportdb/formats/package.rb, line 138 def self.match_seasons( path ) SEASONS_RE.match( path ); end
# File lib/sportdb/formats/package.rb, line 123 def self.match_teams( path ) TEAMS_RE.match( path ); end
# File lib/sportdb/formats/package.rb, line 216 def initialize( path_or_pack ) @include = nil @exclude = nil if path_or_pack.is_a?( Datafile::Package ) @pack = path_or_pack else ## assume it's a (string) path path = path_or_pack if !File.exist?( path ) ## file or directory puts "** !!! ERROR !!! file NOT found >#{path}<; cannot open package" exit 1 end if File.directory?( path ) @pack = Datafile::DirPackage.new( path ) ## delegate to "generic" package elsif File.file?( path ) && File.extname( path ) == '.zip' # note: includes dot (.) eg .zip @pack = Datafile::ZipPackage.new( path ) else puts "** !!! ERROR !!! cannot open package - directory or file with .zip extension required" exit 1 end end end
Public Instance Methods
# File lib/sportdb/formats/package.rb, line 241 def each( pattern:, &blk ) @pack.each( pattern: pattern ) do |entry| next unless filter( entry ) ## lets you use include/exclude filters blk.call( entry ) end end
# File lib/sportdb/formats/package.rb, line 259 def each_club_props( &blk ) each( pattern: CLUB_PROPS_RE, &blk ); end
# File lib/sportdb/formats/package.rb, line 262 def each_clubs( &blk ) each( pattern: CLUBS_RE, &blk ); end
# File lib/sportdb/formats/package.rb, line 264 def each_clubs_history( &blk ) each( pattern: CLUBS_HISTORY_RE, &blk ); end
# File lib/sportdb/formats/package.rb, line 263 def each_clubs_wiki( &blk ) each( pattern: CLUBS_WIKI_RE, &blk ); end
# File lib/sportdb/formats/package.rb, line 248 def each_conf( &blk ) each( pattern: CONF_RE, &blk ); end
# File lib/sportdb/formats/package.rb, line 257 def each_csv( &blk ) each( pattern: CSV_RE, &blk ); end
# File lib/sportdb/formats/package.rb, line 261 def each_leagues( &blk ) each( pattern: LEAGUES_RE, &blk ); end
# File lib/sportdb/formats/package.rb, line 249 def each_match( format: 'txt', &blk ) if format == 'csv' each( pattern: MATCH_CSV_RE, &blk ); else each( pattern: MATCH_RE, &blk ); end end
# File lib/sportdb/formats/package.rb, line 256 def each_match_csv( &blk ) each( pattern: MATCH_CSV_RE, &blk ); end
# File lib/sportdb/formats/package.rb, line 266 def each_seasons( &blk ) each( pattern: SEASONS_RE, &blk ); end
# File lib/sportdb/formats/package.rb, line 199 def filter( entry ) if @include if filter_clause( @include, entry ) ## todo/check: is include a reserved keyword???? true ## todo/check: check for exclude here too - why? why not? else false end else if @exclude && filter_clause( @exclude, entry ) false else true end end end
private helpers - like select returns true for keeping and false for skipping entry
# File lib/sportdb/formats/package.rb, line 188 def filter_clause( filter, entry ) if filter.is_a?( String ) entry.name.index( filter ) ? true : false elsif filter.is_a?( Regexp ) filter.match( entry.name ) ? true : false else ## assume ## todo/check: pass in entry (and NOT entry.name) - why? why not? filter.call( entry ) end end
return all match datafile entries
# File lib/sportdb/formats/package.rb, line 270 def match( format: 'txt' ) ary=[]; each_match( format: format ) {|entry| ary << entry }; ary; end
# File lib/sportdb/formats/package.rb, line 302 def match_by_season( format: 'txt', start: nil ) ## change/rename to by_season_key - why? why not? ## todo/note: in the future - season might be anything (e.g. part of a filename and NOT a directory) - why? why not? ## note: fold all sames seasons (even if in different directories) ## into same datafile list e.g. ## ["1957/58", ## ["1950s/1957-58/1-division1.csv", ## "1950s/1957-58/2-division2.csv", ## "1950s/1957-58/3a-division3n.csv", ## "1950s/1957-58/3b-division3s.csv"]], ## and ## ["1957/58", ## ["archives/1950s/1957-58/1-division1.csv", ## "archives/1950s/1957-58/2-division2.csv", ## "archives/1950s/1957-58/3a-division3n.csv", ## "archives/1950s/1957-58/3b-division3s.csv"]], ## should be together - why? why not? #### # Example package: # [["2012/13", ["2012-13/1-proleague.csv"]], # ["2013/14", ["2013-14/1-proleague.csv"]], # ["2014/15", ["2014-15/1-proleague.csv"]], # ["2015/16", ["2015-16/1-proleague.csv"]], # ["2016/17", ["2016-17/1-proleague.csv"]], # ["2017/18", ["2017-18/1-proleague.csv"]]] ## todo/fix: (re)use a more generic filter instead of start for start of season only ## todo/fix: use a "generic" filter_season helper for easy reuse ## filter_season( clause, season_key ) ## or better filter = SeasonFilter.new( clause ) ## filter.skip? filter.include? ( season_sason_key )? ## fiteer.before?( season_key ) etc. ## find some good method names!!!! season_start = start ? Season( start ) : nil h = {} match( format: format ).each do |entry| ## note: assume last directory in datafile path is the season part/key season_q = File.basename( File.dirname( entry.name )) season = Season.parse( season_q ) ## normalize season ## skip if start season before this season next if season_start && season_start.start_year > season.start_year h[ season.key ] ||= [] h[ season.key ] << entry end ## todo/fix: - add sort entries by name - why? why not? ## note: assume 1-,2- etc. gets us back sorted leagues ## - use sort. (will not sort by default?) ## sort by season ## latest / newest first (and oldest last) h.to_a.sort do |l,r| ## return as array (or keep hash) - why? why not? r[0] <=> l[0] end end
todo/check: rename/change to match_by_dir - why? why not?
still in use somewhere? move to attic? use match_by_season and delete by_season_dir? - why? why not?
# File lib/sportdb/formats/package.rb, line 278 def match_by_season_dir( format: 'txt' ) ## ## [["1950s/1956-57", ## ["1950s/1956-57/1-division1.csv", ## "1950s/1956-57/2-division2.csv", ## "1950s/1956-57/3a-division3n.csv", ## "1950s/1956-57/3b-division3s.csv"]], ## ...] h = {} match( format: format ).each do |entry| season_path = File.dirname( entry.name ) h[ season_path ] ||= [] h[ season_path ] << entry end ## todo/fix: - add sort entries by name - why? why not? ## note: assume 1-,2- etc. gets us back sorted leagues ## - use sort. (will not sort by default?) h.to_a ## return as array (or keep hash) - why? why not? end