class Wikiscript::Parser
Constants
- TEMPLATE_BEGIN_RE
Whereas MediaWiki variable names are all uppercase, template names have the same basic features and limitations as all page names: they are case-sensitive (except for the first character); underscores are parsed as spaces; and they cannot contain any of these characters: # < > [ ] | { }. This is because those are reserved for wiki markup and HTML.
- TEMPLATE_END_RE
- TEMPLATE_NAME_RE
todo/fix: check how to add # too!!!
todo: check what chars to escape in character class change to something line [^|<>\[\]{}]+ ]
Public Class Methods
new( text )
click to toggle source
# File lib/wikiscript-parser/parser.rb, line 12 def initialize( text ) @text = text end
parse( text )
click to toggle source
convenience all-in-one parse helper
# File lib/wikiscript-parser/parser.rb, line 7 def self.parse( text ) new( text ).parse end
Public Instance Methods
parse()
click to toggle source
# File lib/wikiscript-parser/parser.rb, line 16 def parse parse_lines( @text ) end
parse_lines( text )
click to toggle source
# File lib/wikiscript-parser/parser.rb, line 183 def parse_lines( text ) ## note: remove all html comments for now - why? why not? ## <!-- Area rank should match .. --> text = text.gsub( /<!--.+?-->/m ) do |m| ## note: use .+? (non-greedy match) puts " removing comment >#{m}<" '' end input = StringScanner.new( text ) nodes = [] loop do skip_whitespaces( input ) break if input.eos? nodes << parse_node( input ) end nodes end
parse_link( input )
click to toggle source
# File lib/wikiscript-parser/parser.rb, line 140 def parse_link( input ) ## todo/fix: change to parse_page - why? why not? input.scan( /\[\[/ ) ## page name name = input.scan( /[^|\]]+/ ).strip alt_name = if input.check( /\|/ ) ## optional alternate/display name input.scan( /\|/ ) ## eat up | input.scan( /[^\]]+/ ).strip else nil end input.scan( /\]\]/ ) ## eatup ]] skip_whitespaces( input ) if alt_name puts " @page<#{name} | #{alt_name}>" else puts " @page<#{name}>" end Wikitree::Page.new( name, alt_name ) end
parse_node( input )
click to toggle source
# File lib/wikiscript-parser/parser.rb, line 165 def parse_node( input ) ## puts " [debug] parse >#{input.peek(10)}...<" if input.check( TEMPLATE_BEGIN_RE ) parse_template( input ) elsif input.check( /\[\[/ ) parse_link( input ) elsif input.check( /[^|{}\[\]]+/ ) ## check for rawtext run for now run = input.scan( /[^|{}\[\]]+/ ).strip # puts " text run=>#{run}<" Wikitree::Text.new( run ) else puts " !! SYNTAX ERROR: unknown content type:" puts input.peek( 100 ) exit 1 end end
parse_param( input )
click to toggle source
# File lib/wikiscript-parser/parser.rb, line 78 def parse_param( input ) input.scan( /\|/ ) skip_whitespaces( input ) name = nil value = [] # note: value is an array of ast nodes!!! ## check for named param e.g. hello= ## otherwise assume content if input.check( /[a-z0-9 _-]+(?==)/i ) ## note: use positive lookhead (=) name = input.scan( /[a-z0-9 _-]+/i ) name = name.strip ## strip trailing spaces? puts " param name >#{name}<" input.scan( /=/ ) skip_whitespaces( input ) if input.check( /\|/ ) || input.check( /\}/ ) ## add/allow }} too? - why? why not? ## allow empty value!!! puts "!! WARN: empty value for param >#{name}<" else value = parse_param_value( input ) ## get keyed param value puts " param value >#{value}<" end else if input.check( /\|/ ) || ## add/allow }} too? - why? why not? input.check( /\}/ ) ## allow empty value here too - why? why not? puts "!! WARN: empty value for (unnamed/positioned) param" else value = parse_param_value( input ) ## get (unnamed) param value puts " param value >#{value}<" end end [name, value] end
parse_param_value( input )
click to toggle source
# File lib/wikiscript-parser/parser.rb, line 116 def parse_param_value( input ) ## todo: change to parse_param_value_nodes or such - why? why not?? # puts " [debug] parse_param_value >#{input.peek(10)}...<" values = [] ## todo - change/rename to nodes?? loop do values << parse_node( input ) skip_whitespaces( input ) ## puts " [debug] peek >#{input.peek(10)}...<" if input.check( /\|/ ) || input.check( /\}\}/ ) ## puts " [debug] break param_value" break end if input.eos? puts "!! SYNTAX ERROR: unexpected end of string in param value; expected ending w/ | or }}" exit 1 end end values end
parse_template( input )
click to toggle source
# File lib/wikiscript-parser/parser.rb, line 47 def parse_template( input ) input.scan( TEMPLATE_BEGIN_RE ) ## e.g.{{ skip_whitespaces( input ) name = input.scan( TEMPLATE_NAME_RE ) name = name.strip ## strip trailing spaces? puts "==> (begin) template >#{name}<" skip_whitespaces( input ) params = [] loop do if input.check( TEMPLATE_END_RE ) ## e.g. }} input.scan( TEMPLATE_END_RE ) puts "<== (end) template >#{name}<" ## puts " params:" ## pp params return Wikitree::Template.new( name, params ) elsif input.check( /\|/ ) ## e.g. | puts " param #{params.size+1} (#{name}):" param_name, param_value = parse_param( input ) params << [param_name, param_value] else puts "!! SYNTAX ERROR: expected closing }} or para | in template:" puts input.peek( 100 ) exit 1 end end end
skip_whitespaces( input )
click to toggle source
# File lib/wikiscript-parser/parser.rb, line 23 def skip_whitespaces( input ) ## incl. multiple newlines return 0 if input.eos? input.scan( /[ \t\r\n]*/ ) end