class Bayeux
Attributes
block_forest[R]
Accessors
Public Class Methods
json_create(json_hash)
click to toggle source
# File lib/bayeux/bayeux.rb, line 322 def self.json_create(json_hash) begin require 'json' syntax_tree = new(json_hash["block_forest"]) return syntax_tree rescue LoadError warn "The JSON gem couldn't be loaded, and so the JSON representation could not be generated" end end
Public Instance Methods
content_string(first, last)
click to toggle source
Helper functions
# File lib/bayeux/bayeux.rb, line 344 def content_string(first, last) unless first.nil? or last.nil? then return @content[first, (last - first)] else return "" end end
parse(content)
click to toggle source
Start parsing
# File lib/bayeux/bayeux.rb, line 11 def parse(content) # Set-up the logger @parse_log = Logger.new('bayex_parser') @parse_log.outputters = Outputter.stdout # Save the content for later @content = content # Final string to return to the caller pandoc_str = String.new # Forest containing the paragraph level block trees @block_forest = Array.new # Index of the paragraph blocks @block_index = 0 # Indicies into the content, denoting the current limits of # parsing from_index = 0 to_index = content.length ## Procedure for adding a node to the tree new_node = ->(node_type = :none, node_contents = ""){ @block_index += 100 @parse_log.info {"creating node #{@block_index}"} return Tree::TreeNode.new(@block_index, ParaBlock.new(node_type, node_contents)) } # Chunk the content into paragraph blocks while from_index < to_index do # Look for the next space, and the next next '[' character # which should tell us what type of paragraph we are dealing with space_index = content.index(' ', from_index) left_sb_index = content.index(']', from_index) block_start = content.index('[', from_index) if block_start.nil? then # It's a paragraph unless space_index.nil? then block = ParaBlock.new(:paragraph, "") block_start = from_index else # No more blocks break end elsif (left_sb_index < space_index) and (left_sb_index > block_start) then # This should be a block start marker orig_type_length = (left_sb_index - 1) - block_start block = ParaBlock.from_s(content[block_start + 1, orig_type_length]) elsif space_index > block_start then # This should be an annotated block orig_type_length = space_index - (block_start + 1) block = ParaBlock.from_s(content[block_start + 1, orig_type_length]) elsif space_index < block_start then # It's a paragraph unless space_index.nil? then block = ParaBlock.new(:paragraph, "") block_start = from_index else @parse_log.error {"Cannot find a valid block (syntax error in the start of block characters)"} return nil end end # Now we know the type of block, and where it starts, work out # where the end of the block is case block.type when :h1,:h2,:h3,:h4,:h5,:h6,:bib # Look for a terminating ']' block_end = left_sb_index # Add the node, and paragraph sub_tree, to the block forest block.content = content_string(block_start + 4, block_end) para_node = Tree::TreeNode.new(@block_index, block) para_subtree(para_node) @block_forest << para_node when :paragraph # Look for a single new line, containing only whitespace block_end = content.index(/^\s/, block_start) # Add a node to the block forest block.content = content_string(block_start, block_end) para_node = Tree::TreeNode.new(@block_index, block) para_subtree(para_node) @block_forest << para_node when :medskip # Self-terminating block_end = block_start + 9 # Add a node to the block forest para_node = Tree::TreeNode.new(@block_index, block) @block_forest << para_node when :command, :output, :file # Look for a terminating '[end]' block_end = content.index('[end]', block_start) content_index = block_start + (block.orig_type_length + 2) # Add a node to the block forest, and do no further processing block.content = content_string(content_index, block_end) para_node = Tree::TreeNode.new(@block_index, block) @block_forest << para_node # Skip the 'end' block_end += 5 when :code # Look for a terminating '[end]' block_end = content.index('[end]', block_start) content_index = block_start + (block.orig_type_length + 2) # Look for arguments to the block unless content[from_index + 1] == ']' then # Find the end of the arguments arg_end = content.index(/]\s/, from_index) arguments = content[from_index + 6, (arg_end - (from_index + 6))] # Create the parent node for this content code_start = content.index(/^/, arg_end) block.content = content_string(code_start, block_end) para_node = Tree::TreeNode.new(@block_index, block) # Split the item items = arguments.split('|') # Create a node for the description title unless items[0].nil? then para_node << new_node.call(:code_language, items[0].strip) else para_node << new_node.call(:code_language, "") end # Create the description text unless items[1].nil? then para_node << new_node.call(:code_start_number, items[1].strip) else para_node << new_node.call(:code_start_number, "0") end else # Create the parent node for this content block.content = content_string(content_index, block_end) para_node = Tree::TreeNode.new(@block_index, block) para_node << new_node.call(:code_language, "") para_node << new_node.call(:code_start_number, "0") end # Add a node to the block forest, and do no further processing @block_forest << para_node # Skip the 'end' block_end += 5 when :block_quote, :figure, :note, :quote # Look for a terminating '[end]' block_end = content.index('[end]', block_start) content_index = block_start + (block.orig_type_length + 2) # Add a node to the block forest block.content = content_string(content_index, block_end) para_node = Tree::TreeNode.new(@block_index, block) para_subtree(para_node) @block_forest << para_node # Skip the 'end' block_end += 5 when :dl # Look for a terminating '[end]' block_end = content.index('[end]', block_start) ## Description List blocks are really two blocks, so should be ## parsed as such. First we look for (and parse) the description ## header, then we look for (and parse) the description body. We ## keep doing this until we run out of things to do. # Create the master node for the list dl_node = new_node.call(:dl) # Get the list contents content_index = block_start + (block.orig_type_length + 2) list_content = content_string(content_index, block_end) # First break the list into components list_items = list_content.split(/^\n/) # Process each list item list_items.each{|list_item| # Break the list item into a description header and # description body break_at = list_item.index(/:$/) item_index = list_item.index('[item') item_end = list_item.rindex(']') description_header = list_item[item_index + 5, break_at - 5] description_body = list_item[break_at + 1, (item_end - break_at - 1)] # Parse the description header header_node = new_node.call(:dl_header, description_header) para_subtree(header_node) dl_node << header_node # Parse the description body body_node = new_node.call(:dl_text, description_body) para_subtree(body_node) dl_node << body_node } # Add the node to the block forest @block_forest << dl_node # Skip the 'end' block_end += 5 when :ol, :ul # Lists are special because they can be nested. Hence we # have to be a bit careful about the end marker. block_end = content.index(/\[end\]((\n\n)|($\z))/, block_start) if block_end.nil? then @parse_log.error {"Error: Invalid end to a list"} @parse_log.error {"Backtrace: #{to_s}"} return nil end # Add a node to the block forest block.content = content_string(block_start + 4, block_end) para_node = Tree::TreeNode.new(@block_index, block) para_subtree(para_node) @block_forest << para_node # Skip the 'end' block_end += 5 else @parse_log.error {"Error: Unknown block type '#{block.type}'"} @parse_log.error {"Backtrace: #{to_s}"} return nil end @block_index += 100 # Skip spaces unless block_end.nil? then from_index = block_end + 1 until not (content[from_index] =~ /\n/) do from_index += 1 end else return nil end end end
to_json(*a)
click to toggle source
# File lib/bayeux/bayeux.rb, line 306 def to_json(*a) begin require 'json' json_hash = { "block_forest" => @block_forest, JSON.create_id => self.class.name } return json_hash.to_json rescue LoadError warn "The JSON gem couldn't be loaded, and so the JSON representation could not be generated" end end
to_s()
click to toggle source
Generators: Convinience functions taking an AST (Abstract Syntax Tree),
and returning a document of the specified type
# File lib/bayeux/bayeux.rb, line 288 def to_s return_str = String.new @block_forest.each{|tree| return_str << "Paragraph Type: #{tree.content.type}\n" return_str << "\n" tree.each{|node| return_str << " Node Name: #{node.name}\n" return_str << " Node Type: #{node.content.type}\n" return_str << " Node Depth: #{node.node_depth}\n" node_content = node.content.content.inspect.line_wrap(70,2).indent(17) return_str << " Node Contents: |#{node_content[17, node_content.length]}|\n\n" } } return return_str end
Private Instance Methods
para_subtree(tree_root)
click to toggle source
Return the sub-tree of paragraph blocks, given a suitably initalised root of a tree
# File lib/bayeux/bayeux.rb, line 354 def para_subtree(tree_root) # Set containing the next codepoints to search for stop_at = Set.new [10,12,13,32,34,45,46,91,93] # Value of the current block block_index = tree_root.name.to_i # Flag indicating the start of a new block block_start = false # String holding the list of characters to parse content_string = tree_root.content.to_s # Massage the content string to replace pseudo-blocks content_string.gsub!(/\s'([^']+)'/, ' [single_quote \1]') # Index into the content string string_index = 0 ## Procedure for adding a node to the tree new_node = ->(node_type = :none, node_contents = ""){ block_index += 1 @parse_log.debug {"creating node #{block_index}"} return Tree::TreeNode.new(block_index, ParaBlock.new(node_type, node_contents)) } # Register holding the current paragraph string being built register = new_node.call # Add new nodes to here add_to = tree_root ## Procedure for attaching the tree node held in the register to the current attachment ## point of the growing tree attach_register = -> { begin @parse_log.debug {" > attaching node #{register.name} as a child of #{add_to.name} (#{register.content.to_debug_s})"} add_to << register rescue tree_root.print_tree raise end } ## Procedure for adding the register to the tree, at the current level. This also ## creates a new node at the same level, and sets the register to point to that ## node add_current_node = -> { # Save a pointer to the current node attach_register.call # Add a new node at the same level as the current_node register = new_node.call } ## Procedure for down the tree. This involves saving the current contents of the ## register as a new node at the current level, then creating a new node as a child move_down_tree = -> { @parse_log.debug {"-> going down #{register.name} (#{register.content.to_debug_s})"} # Save the current node, pointed to by the register attach_register.call # Add future nodes as the child of this node add_to = register # Create a new node as the first child register = new_node.call # Flag the start of a new block block_start = true } ## Procedure for moving up the tree, from the current node ## (identified by the register), to the next level up move_up_tree = -> { @parse_log.debug {"<- going up #{register.name} (#{register.content.to_debug_s})"} # Save the current node, pointed to by the register attach_register.call # Move up the tree add_to = add_to.parent register = new_node.call # Remove any 'empty' children (sub-tree nodes that we might # have just added, but which in fact could be trimmed) #clean_children.call } ### ### Parse the content ### content_string.each_codepoint{|codepoint| # Check if we need to stop, in order to process the next block if stop_at.include?(codepoint) then # If a stop has been requested, undertake the action # indicated by the current codepoint case codepoint # Check if this is a new block (we may be dealing with a block # terminated by a new-line, instead of a space) when 10,12,13 if block_start then # Record the block type, and re-set the register content register.content.content_to_type! # Unset the start of a new block flag block_start = false elsif not ((register.content.empty?) or (content_string[string_index - 1] =~ /^\s/)) then # Otherwise treat the new-line as a space, but suppress multiple spaces register.content << " " end # On a space character, first check if the flag indicating the start # of a block. If set, this should indicate that this space follows the # name of the block when 32 if block_start then # Record the block type if not already set if register.content.type == :none then register.content.content_to_type! else register.content << " " end # Unset the start of a new block flag block_start = false else # If this is not the start of a block, pass on the space (but suppress multiple spaces) unless content_string[string_index - 1] =~ /^\s/ then register.content << " " end end # On a double quote, we need to check if this is a special block (the single # quote block), or something else when 34 # If the last character was whitespace, this should be the start of # a single quote block if content_string[string_index - 1] =~ /\s|\W/ then # Create a new sub-block of the current block move_down_tree.call # Set the block type register.content.type = :double_quote # If the next character is not alphanumeric, then this should be the # end of a single quote block elsif not content_string[string_index + 1] =~ /\w/ then # Move back to the parent of the quote block move_up_tree.call # No significance, so pass it on else register.content << "'" end # Count the minus signs: this could be a hypen, an em-dash or an en-dash when 45 if content_string[string_index + 1] =~ /\s|\w/ and not content_string[string_index + 1] == '-' then if ((content_string[string_index - 1] == '-') and (content_string[string_index - 2] == '-')) then # Add the current content, and create a new node at the same level add_current_node.call # Change the type of the register, and add it to the tree (which also # create a new new at the same level for the following content) register.content.type = :em_dash add_current_node.call elsif content_string[string_index - 1] == '-' # Add the current content, and create a new node at the same level add_current_node.call # Change the type of the register, and add it to the tree (which also # create a new new at the same level for the following content) register.content.type = :en_dash add_current_node.call else register.content << "-" end end # Count the dots: this could be an elipses when 46 register.content << "." if content_string[string_index + 1] =~ /\s|\w|$/ then # Work out how many dots there are split = register.content.content.rpartition(/\s|\w/) unless split[0].empty? then case split[2].length when 3 register.content.content = split[0] + split[1] add_current_node.call register.content.type = :elipses add_current_node.call when 4 add_current_node.call register.content.content = split[0] + split[1] register.content.type = :elipses_stop add_current_node.call end end end # On the start of a new block, first record the old block # then initialize the new when 91 # Move down the tree move_down_tree.call # Flag the start of a new block block_start = true # At the termination of a block, transform the block text to the # literal equivalent when 93 ## Do any special processing ## # Supress block characters from lists that are # dealt with elsewhere if register.content.type == :none then case register.content.to_s when "end", "ol", "ul" register.content.clear end end # Split items in links if register.content.type == :link then # Split the item items = register.content.to_s.split('|') # Create a node for the link title unless items[0].nil? then title = new_node.call(:link_text, items[0].strip) register << title end # Create a node for the link target unless items[1].nil? then target = new_node.call(:link_target, items[1].strip) register << target end end # Add the register to the tree, and move back up to # the previous level move_up_tree.call end else # If we are not stopping, just add the codepoint to the register register.content << codepoint end # Increment the string index string_index += 1 } # Add the last node to the tree attach_register.call ## Prune any null nodes: this makes the tree smaller, and means we can idenify ## places where the branches need to be re-ordered tree_root.each{|node| if node.content.type == :none and node.content.empty? then @parse_log.debug {"--- pruning node #{node.name}"} node.remove_from_parent! end } ## Now we need to walk the AST, patching up the types so that enclosing ## types work properly. This requires finding all the :none nodes, and ## working out the correct type from the found siblings. To make sure ## we infer the correct type, we need to do a breadth first walk tree_root.breadth_each{|node| # First look 'across' the tree, looking for cases where enclosing node # have not been properly identified if node.is_first_sibling? then # Check the type of the last sibling: if this is :none, then # we have something like a list, where the paragraph text needs to # be separated from the element type if node.first_sibling.content.type != :none and node.last_sibling.content.type == :none then @parse_log.info {"examining node #{node.name}"} # Patch the type hierarchy. The easiest way to do this is to # look for a parent whose type is also :none: if found we can # 'borrow' the node as the item type if node.parent.content.type == :none and node.parent.content.empty? then @parse_log.info {"changing parent type"} node.parent.content.type = node.content.type node.content.type = :none # Otherwise we have to work a bit harder, separating us and # our siblings, creating a new node of the correct type, and # binding everything back together else @parse_log.info {"re-ordering siblings"} siblings = node.siblings new_root = new_node.call(node.content.type) node.parent << new_root node.remove_from_parent! new_root << node siblings.each{|node| @parse_log.info {"removing node #{node.name}"} @parse_log.info {"attaching to #{new_root.name}"} node.remove_from_parent! new_root << node } end end end } # We need to preserve the children, so delete the content from # the parent to tell users of the AST to walk our children tree_root.content.clear end