class EventDb::EventReader::MarkdownParser
Constants
- DATE_ENTRY_RE
examples:
-
2015 @ Salzburg, Austria; Oct/17+18
-
2015 @ Brussels / Brussel / Bruxelles; Jan/31+Feb/1
-
2014 @ Porto de Galinhas, Pernambuco; Apr/24-27 (formerly: Abril Pro Ruby)
-
- LINK_ENTRY_RE
example:
-
[RubyWorld Conference - rubyworldconf](www.rubyworld-conf.org/en)
-
- MONTH_EN
- MONTH_EN_TO_MM
Public Class Methods
new( text )
click to toggle source
# File lib/eventdb/reader.rb, line 151 def initialize( text ) @text = text end
parse( text )
click to toggle source
# File lib/eventdb/reader.rb, line 146 def self.parse( text ) new( text ).parse; end
Public Instance Methods
find_title_and_link( line )
click to toggle source
helper
# File lib/eventdb/reader.rb, line 326 def find_title_and_link( line ) title = nil link = nil ## note: extract title and link from line ### 1) try "new" format first e.g. ## - **European Ruby Konference - EuRuKo** (web: [euruko.org](http://euruko.org), t: [euruko](https://twitter.com/euruko)) - _since 2003_ if m = (line =~ /^\*{2}([^*]+)\*{2}/) ## note: **title** must start line title = $1 puts " adding (new/modern format) => #{title}" ## 2) try "old" classic format - get title from first (markdown) link e.g. ## - [Oktoberfest ("Die Wiesn")](http://www.muenchen.de/veranstaltungen/oktoberfest.html) elsif m = (line =~ /^\[([^\]]+)\]/) ## note: [title](link) must start line title = $1 puts " adding (old/classic format) => #{title}" else puts "*** !! ERROR !!: cannot find event title in #{line}" exit 1 end ## try extract link - use first (markdown) link ## todo/fix: use shared markdown link regex!!!!! if m = (line =~ /\[[^\]]+\]\(([^\)]+)\)/) link = $1 puts " => @ #{link}" else link = nil puts "*** !! WARN !!: cannot find event link in #{line}" end [title,link] end
parse()
click to toggle source
# File lib/eventdb/reader.rb, line 210 def parse events = [] stack = [] ## header/heading stack; note: last_stack is stack.size; starts w/ 0 last_link_entry = nil nodes = OutlineReader.parse( @text ) nodes.each do |node| if [:h1,:h2,:h3,:h4,:h5,:h6].include?( node[0] ) heading = node[1] # stop when hitting >## More< or <Calendar> or <Thanks> etc. section # note: must escape # e.g. #{2,} must be escaped to \#{2,} break if heading =~ /^(More|Calendar|Thanks|Meta)\b/ # skip "pseudo" headings (for contribs etc.) ## e.g. #### _Contributions welcome. Anything missing? Send in a pull request. Thanks._ next if heading =~ /Contributions welcome\.|Anything Missing\?/ level = node[0][1].to_i logger.debug " heading level: #{level}, title: >#{heading}<" level_diff = level - stack.size if level_diff > 0 logger.debug "[EventReader] up +#{level_diff}" if level_diff > 1 logger.error "fatal: level step must be one (+1) is +#{level_diff}" fail "[EventReader] level step must be one (+1) is +#{level_diff}" end elsif level_diff < 0 logger.debug "[EventReader] down #{level_diff}" level_diff.abs.times { stack.pop } stack.pop else ## same level stack.pop end stack.push( [level, heading] ) logger.debug " stack: #{stack.inspect}" elsif [:li].include?( node[0] ) ## list item line = node[1] if LINK_ENTRY_RE.match( line ) logger.debug " link entry: #{line}" last_link_entry = line elsif m=DATE_ENTRY_RE.match( line ) year = m[:year] start_month_en = m[:start_month_en] start_day = m[:start_day] start_month = MONTH_EN_TO_MM[ start_month_en ] start_date = Date.new( year.to_i, start_month.to_i, start_day.to_i ) end_month_en = m[:end_month_en] end_month_en = start_month_en if end_month_en.nil? # no end month; use same as start end_day = m[:end_day] end_day = start_day if end_day.nil? # no end day; single day event (use start day) end_month = MONTH_EN_TO_MM[ end_month_en ] end_date = Date.new( year.to_i, end_month.to_i, end_day.to_i ) ## pp start_date logger.debug " date entry: #{line}" logger.debug " start_date: #{start_date}, year: #{year}, start_month_en: #{start_month_en}, start_month: #{start_month} start_day: #{start_day} => #{last_link_entry}" logger.debug " end_date: #{end_date}, end_month_en: #{end_month_en}, end_day_en: #{end_day}" s = StringScanner.new( line ) s.skip_until( /@/ ) place = s.scan( /[^;]+/ ) ## get place (everything until ; (separator)) place = place.strip logger.debug " place: #{place}, rest: >#{s.rest}<" ## todo/fix: make place uniform e.g. change ## Vienna, Austria => ## Vienna › Austria - why? why not? ## note: cut of heading 1 (e.g. page title) more_places = stack[1..-1].reverse.map {|it| it[1] }.join(', ') ## was: join(' › ') place = "#{place}, #{more_places}" logger.debug " place: #{place}" title, link = find_title_and_link( last_link_entry ) event = Event.new( title, link, place, start_date, end_date ) ## pp event events << event else logger.debug " *** skip list item line: #{line}" end else logger.debug " *** skip node:" pp node end end events end