class Rsssf::Repo
Public Class Methods
new( path, opts )
click to toggle source
# File lib/rsssf/repo.rb, line 29 def initialize( path, opts ) ## pass in title etc. @repo_path = path @opts = opts end
Public Instance Methods
fetch_pages()
click to toggle source
# File lib/rsssf/repo.rb, line 35 def fetch_pages puts "fetch_pages:" cfg = YAML.load_file( "#{@repo_path}/tables/config.yml") pp cfg dl_base = 'http://rsssf.com' cfg.each do |k,v| ## season = k # as string e.g. 2011-12 or 2011 etc. path = v # as string e.g. tablesd/duit2011.html ## note: assumes extension is .html # e.g. tablesd/duit2011.html => duit2011 basename = File.basename( path, '.html' ) src_url = "#{dl_base}/#{path}" dest_path = "#{@repo_path}/tables/#{basename}.txt" page = Page.from_url( src_url ) page.save( dest_path ) end # each year end
make_pages_summary()
click to toggle source
# File lib/rsssf/repo.rb, line 59 def make_pages_summary stats = [] files = Dir[ "#{@repo_path}/tables/*.txt" ] files.each do |file| page = Page.from_file( file ) stats << page.build_stat end ### save report as README.md in tables/ folder in repo report = PageReport.new( stats, @opts ) ## pass in title etc. report.save( "#{@repo_path}/tables/README.md" ) end
make_schedules( cfg )
click to toggle source
# File lib/rsssf/repo.rb, line 97 def make_schedules( cfg ) ## note: return stats (for report eg. README) stats = [] files = Dir[ "#{@repo_path}/tables/*.txt" ] files.each do |file| ## todo/check/fix: ## use source: prop in rsssf page - why? why not??? ## move year/season/basename into page ??? # # assume every rsssf page has at least: ## - basename e.g. duit2014 ## - year e.g. 2014 (numeric) ## - season (derived from config lookup???) - string e.g. 2014-15 or 2014 etc. extname = File.extname( file ) basename = File.basename( file, extname ) year = year_from_name( basename ) season = year_to_season( year ) if cfg.includes && cfg.includes.include?( year ) == false puts " skipping #{basename}; not listed in includes" next end puts " reading >#{basename}<" page = Page.from_file( file ) # note: always assume sources (already) converted to utf-8 if cfg.opts_for_year.is_a?( Hash ) opts = cfg.opts_for_year ## just use as is 1:1 (constant/same for all years) else ## assume it's a proc/lambda (call to calculate) opts = cfg.opts_for_year.call( year ) end pp opts schedule = page.find_schedule( opts ) ## pp schedule if cfg.dir_for_year.nil? ## use default setting, that is, archive for dir (e.g. archive/1980s/1985-86 etc.) dir_for_year = archive_dir_for_year( year ) else ## assume it's a proc/lambda dir_for_year = cfg.dir_for_year.call( year ) end ## -- cfg.name e.g. => 1-liga dest_path = "#{@repo_path}/#{dir_for_year}/#{cfg.name}.txt" puts " save to >#{dest_path}<" FileUtils.mkdir_p( File.dirname( dest_path )) schedule.save( dest_path ) rec = ScheduleStat.new rec.path = dir_for_year rec.filename = "#{cfg.name}.txt" ## change to basename - why?? why not?? rec.year = year rec.season = season rec.rounds = schedule.rounds stats << rec end stats # return stats for reporting end
make_schedules_summary( stats )
click to toggle source
# File lib/rsssf/repo.rb, line 74 def make_schedules_summary( stats ) ## note: requires stats to be passed in for now report = ScheduleReport.new( stats, @opts ) ## pass in title etc. report.save( "#{@repo_path}/README.md" ) end
patch_pages( patcher )
click to toggle source
# File lib/rsssf/repo.rb, line 81 def patch_pages( patcher ) ## lets you run/use custom (repo/country-specific patches e.g. for adding/patching headings etc.) patch_dir( "#{@repo_path}/tables" ) do |txt, name, year| puts "patching #{year} (#{name}) (#{@repo_path})..." patcher.patch( txt, name, year ) ## note: must be last (that is, must return (patcher) t(e)xt) end end
sanitize_pages()
click to toggle source
# File lib/rsssf/repo.rb, line 90 def sanitize_pages ## for debugging/testing lets you (re)run sanitize (alreay incl. in html2txt filter by default) sanitize_dir( "#{@repo_path}/tables" ) end
Private Instance Methods
patch_dir( root ) { |txt, basename, year| ... }
click to toggle source
# File lib/rsssf/repo.rb, line 170 def patch_dir( root ) files = Dir[ "#{root}/*.txt" ] ## pp files ## sort files by year (latest first) files = files.sort do |l,r| lyear = year_from_file( l ) ryear = year_from_file( r ) ryear <=> lyear end files.each do |file| txt = File.read_utf8( file ) ## note: assumes already converted to utf-8 basename = File.basename( file, '.txt' ) ## e.g. duit92.txt => duit92 year = year_from_name( basename ) new_txt = yield( txt, basename, year ) ## calculate hash to see if anything changed ?? why? why not?? File.open( file, 'w' ) do |f| f.write new_txt end end # each file end
sanitize_dir( root )
click to toggle source
# File lib/rsssf/repo.rb, line 197 def sanitize_dir( root ) files = Dir[ "#{root}/*.txt" ] files.each do |file| txt = File.read_utf8( file ) ## note: assumes already converted to utf-8 new_txt = sanitize( txt ) File.open( file, 'w' ) do |f| f.write new_txt end end # each file end