class Wikiscript::Parser

Constants

TEMPLATE_BEGIN_RE

Whereas MediaWiki variable names are all uppercase, template names have the same basic features and limitations as all page names: they are case-sensitive (except for the first character); underscores are parsed as spaces; and they cannot contain any of these characters: # < > [ ] | { }. This is because those are reserved for wiki markup and HTML.

TEMPLATE_END_RE
TEMPLATE_NAME_RE

todo/fix: check how to add # too!!!

todo: check what chars to escape in character class
change to something line [^|<>\[\]{}]+ ]

Public Class Methods

new( text ) click to toggle source
# File lib/wikiscript-parser/parser.rb, line 12
def initialize( text )
  @text = text
end
parse( text ) click to toggle source

convenience all-in-one parse helper

# File lib/wikiscript-parser/parser.rb, line 7
def self.parse( text )
  new( text ).parse
end

Public Instance Methods

parse() click to toggle source
# File lib/wikiscript-parser/parser.rb, line 16
def parse
  parse_lines( @text )
end
parse_lines( text ) click to toggle source
# File lib/wikiscript-parser/parser.rb, line 183
def parse_lines( text )
  ## note: remove all html comments for now - why? why not?
  ## <!-- Area rank should match .. -->
  text = text.gsub( /<!--.+?-->/m ) do |m|  ## note: use .+? (non-greedy match)
                                       puts " removing comment >#{m}<"
                                       ''
                                     end

  input = StringScanner.new( text )

  nodes = []
  loop do
    skip_whitespaces( input )
    break if input.eos?

    nodes << parse_node( input )
 end
 nodes
end
parse_node( input ) click to toggle source
# File lib/wikiscript-parser/parser.rb, line 165
def parse_node( input )
  ## puts "  [debug] parse >#{input.peek(10)}...<"
  if input.check( TEMPLATE_BEGIN_RE )
    parse_template( input )
  elsif input.check( /\[\[/ )
    parse_link( input )
  elsif input.check( /[^|{}\[\]]+/ )    ## check for rawtext run for now
    run = input.scan( /[^|{}\[\]]+/ ).strip
    # puts "   text run=>#{run}<"
    Wikitree::Text.new( run )
  else
    puts " !! SYNTAX ERROR: unknown content type:"
    puts input.peek( 100 )
    exit 1
  end
end
parse_param( input ) click to toggle source
# File lib/wikiscript-parser/parser.rb, line 78
def parse_param( input )
  input.scan( /\|/ )
  skip_whitespaces( input )

  name  = nil
  value = []    # note: value is an array of ast nodes!!!

  ## check for named param e.g. hello=
  ##  otherwise assume content
  if input.check( /[a-z0-9 _-]+(?==)/i )  ## note: use positive lookhead (=)
    name = input.scan( /[a-z0-9 _-]+/i )
    name = name.strip  ## strip trailing spaces?
    puts "        param name >#{name}<"
    input.scan( /=/ )
    skip_whitespaces( input )

    if input.check( /\|/ ) ||
       input.check( /\}/ )  ## add/allow }} too? - why? why not?
      ## allow empty value!!!
      puts "!! WARN: empty value for param >#{name}<"
    else
      value = parse_param_value( input )  ## get keyed param value
      puts "        param value >#{value}<"
    end
  else
    if input.check( /\|/ ) ||   ## add/allow }} too? - why? why not?
       input.check( /\}/ )
      ## allow empty value here too - why? why not?
      puts "!! WARN: empty value for (unnamed/positioned) param"
    else
      value = parse_param_value( input )  ## get (unnamed) param value
      puts "        param value >#{value}<"
    end
  end
  [name, value]
end
parse_param_value( input ) click to toggle source
# File lib/wikiscript-parser/parser.rb, line 116
def parse_param_value( input ) ## todo: change to parse_param_value_nodes or such - why? why not??
  # puts "     [debug] parse_param_value >#{input.peek(10)}...<"

  values = []  ## todo - change/rename to nodes??
  loop do
    values << parse_node( input )
    skip_whitespaces( input )

    ## puts "      [debug] peek >#{input.peek(10)}...<"
    if input.check( /\|/ ) || input.check( /\}\}/ )
      ## puts "        [debug] break param_value"
      break
    end

    if input.eos?
      puts "!! SYNTAX ERROR: unexpected end of string in param value; expected ending w/ | or }}"
      exit 1
    end
  end

  values
end
parse_template( input ) click to toggle source
# File lib/wikiscript-parser/parser.rb, line 47
def parse_template( input )
  input.scan( TEMPLATE_BEGIN_RE ) ## e.g.{{
  skip_whitespaces( input )

  name = input.scan( TEMPLATE_NAME_RE )
  name = name.strip  ## strip trailing spaces?
  puts "==> (begin) template >#{name}<"
  skip_whitespaces( input )

  params = []
  loop do
     if input.check( TEMPLATE_END_RE ) ## e.g. }}
       input.scan( TEMPLATE_END_RE )
       puts "<== (end) template >#{name}<"
       ## puts "  params:"
       ## pp params
       return Wikitree::Template.new( name, params )
     elsif input.check( /\|/ )  ## e.g. |
       puts "      param #{params.size+1} (#{name}):"
       param_name, param_value = parse_param( input )
       params << [param_name, param_value]
     else
       puts "!! SYNTAX ERROR: expected closing }} or para | in template:"
       puts input.peek( 100 )
       exit 1
     end
  end
end
skip_whitespaces( input ) click to toggle source
# File lib/wikiscript-parser/parser.rb, line 23
def skip_whitespaces( input )  ## incl. multiple newlines
  return 0   if input.eos?

  input.scan( /[ \t\r\n]*/ )
end