class SportDb::OutlineReader
Constants
- HEADING_BLANK_RE
note: skip “decorative” only heading e.g. ========
todo/check: find a better name e.g. HEADING_EMPTY_RE or HEADING_LINE_RE or ???
- HEADING_RE
note: like in wikimedia markup (and markdown) all optional trailing ==== too
Public Class Methods
new( txt )
click to toggle source
# File lib/sportdb/formats/outline_reader.rb, line 16 def initialize( txt ) @txt = txt end
parse( txt )
click to toggle source
# File lib/sportdb/formats/outline_reader.rb, line 12 def self.parse( txt ) new( txt ).parse end
read( path )
click to toggle source
# File lib/sportdb/formats/outline_reader.rb, line 7 def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not? txt = File.open( path, 'r:utf-8' ) {|f| f.read } parse( txt ) end
Public Instance Methods
parse()
click to toggle source
# File lib/sportdb/formats/outline_reader.rb, line 35 def parse outline=[] ## outline structure start_para = true ## start new para(graph) on new text line? @txt.each_line do |line| line = line.strip ## todo/fix: keep leading and trailing spaces - why? why not? if line.empty? ## todo/fix: keep blank line nodes?? and just remove comments and process headings?! - why? why not? start_para = true next end break if line == '__END__' next if line.start_with?( '#' ) ## skip comments too ## strip inline (until end-of-line) comments too ## e.g Eupen | KAS Eupen ## [de] ## => Eupen | KAS Eupen ## e.g bq Bonaire, BOE # CONCACAF ## => bq Bonaire, BOE line = line.sub( /#.*/, '' ).strip pp line ## todo/check: also use heading blank as paragraph "breaker" or treat it like a comment ?? - why? why not? next if HEADING_BLANK_RE.match( line ) # skip "decorative" only heading e.g. ======== ## note: like in wikimedia markup (and markdown) all optional trailing ==== too if m=HEADING_RE.match( line ) start_para = true heading_marker = m[:marker] heading_level = m[:marker].length ## count number of = for heading level heading = m[:text].strip puts "heading #{heading_level} >#{heading}<" outline << [:"h#{heading_level}", heading] else ## assume it's a (plain/regular) text line if start_para outline << [:p, [line]] start_para = false else node = outline[-1] ## get last entry if node[0] == :p ## assert it's a p(aragraph) node!!! node[1] << line ## add line to p(aragraph) else puts "!! ERROR - invalid outline state / format - expected p(aragraph) node; got:" pp node exit 1 end end end end outline end