class BayeuxHTMLGen

Public Class Methods

header_id(header_text) click to toggle source

Return the header id, given a string of text

# File lib/bayeux/html_gen.rb, line 463
def self.header_id(header_text)
  return header_text.gsub(" ", "_").downcase
end
new(syntax_tree) click to toggle source

Default Constructor

# File lib/bayeux/html_gen.rb, line 10
def initialize(syntax_tree)
  @syntax_tree = syntax_tree
  @html_string = String.new
end

Public Instance Methods

doc_toc() click to toggle source

Return the table of contents, extract from the AST

# File lib/bayeux/html_gen.rb, line 60
def doc_toc
  
  # ToC to return to the caller
  toc = String.new
  
  # The current heading level
  current_level = 0

  # The last heading level
  # Note: The very top level is provided
  # by the enclosing template. Effectivly
  # we are skipping the enclosures for the L1
  # headers
  last_level = 2
  
  # Set containing valid header tags (ignore level 1 tags)
  header_tags = Set.new [:h2, :h3, :h4, :h5, :h6]
  
  # Walk the forest
  @syntax_tree.block_forest.each{|tree|
    
    # Ignore if not a header
    if header_tags.include?(tree.content.type) then
    
      # Set the current level based on the header
      case tree.content.type
        when :h1
          current_level = 1
        when :h2
          current_level = 2
        when :h3
          current_level = 3
        when :h4
          current_level = 4
        when :h5
          current_level = 5
        when :h6
          current_level = 6        
      end
      
      # Write out the toc
      if current_level < last_level then
        
        # Moving up: so close off the last level,
        # create a new level, and record the current
        # item
        
        unless last_level == 0 then
          toc << "</ul>"
        end
        
        toc << "#{indent(last_level)}<ul>\n"
        toc << "#{indent(current_level)}<li><a href=\"##{header_id(tree.content.content)}\">#{tree.content.content}</a></li>\n"
        
      elsif current_level > last_level then
        
        # Moving down: so add a new list as a sub-list of
        # the current list, and record the current item
        
        toc << "#{indent(last_level)}<ul>\n"
        toc << "#{indent(current_level)}<li><a href=\"##{header_id(tree.content.content)}\">#{tree.content.content}</a></li>\n"
        
      else
        # This header is at the same level, so just output the
        # list item
        toc << "#{indent(current_level)}<li><a href=\"##{header_id(tree.content.content)}\">#{tree.content.content}</a></li>\n"
      
      end
      
      # Record the current level
      last_level = current_level
    end
  }
  
  # Close off the lists
  current_level.downto(3){|index|
    toc << "#{indent(index)}</ul>\n"
  }
      
  return toc
end
generate() click to toggle source

Return the AST as an HTML string (stored in @html_string)

# File lib/bayeux/html_gen.rb, line 16
def generate
  
  # Set-up the logger
  @html_gen_log = Logger.new('bayex_html_gen')
  @html_gen_log.outputters = Outputter.stdout
  
  # Create a walker for the trees in the forest
  walker = TreeWalker.new
  
  walker.on_before_down = self.method(:generate_node_start)
  walker.on_after_up = self.method(:generate_node_end)
  
  walker.on_no_siblings = self.method(:generate_full_node)
  walker.on_no_children = self.method(:generate_full_node)
      
  # Clear the internal HTML representation
  @html_string.clear
  
  # Walk the forest
  @syntax_tree.block_forest.each{|tree|
    walker.walk_tree(tree)
  }

  #@html_gen_log.debug {@syntax_tree.to_s}
  #@html_gen_log.info {@html_string}
  
  # Remove extra spaces between entities and tags
  @html_string.gsub!(/;[\s]+</,';<')
  
  # Return the generated string to the caller
  tidy = TidyFFI::Tidy.new(@html_string)

  tidy.options.fix_uri = 1
  tidy.options.indent = 1
  tidy.options.join_classes = 1
  tidy.options.markup = 1
  tidy.options.output_html = 1
  tidy.options.punctuation_wrap = 1
  tidy.options.show_body_only = 1  
      
  return tidy.clean  
end
generate_full_node(block) click to toggle source

Output the full contents of the node, properly bracketed as an HTML expression. This is only called if we have no sub-nodes to deal with

# File lib/bayeux/html_gen.rb, line 150
def generate_full_node(block)
  
  # Check if we need to add a space
  # Check if we need to add a space
  if not block.content[0] =~ /\s|[:;.,?!]/
    @html_string << " "
  end
              
  case block.type
    
    # Headers
    when :h1
      @html_string << "<h1 id=\"#{header_id(block.content)}\">#{block.content}</h1>"
    when :h2
      @html_string << "<h2 id=\"#{header_id(block.content)}\">#{block.content}</h2>"
    when :h3
      @html_string << "<h3 id=\"#{header_id(block.content)}\">#{block.content}</h3>"
    when :h4
      @html_string << "<h4 id=\"#{header_id(block.content)}\">#{block.content}</h4>"
    when :h5
      @html_string << "<h5 id=\"#{header_id(block.content)}\">#{block.content}</h5>"
    when :h5
      @html_string << "<h6 id=\"#{header_id(block.content)}\">#{block.content}</h6>"
    
    # Ordinary paragraphs
    when :paragraph
      @html_string << "<p>#{typeset(block.content)}</p>"

    when :none
      @html_string << "#{typeset(block.content)}"
      
    # Special paragraphs
    when :block_quote
      @html_string << "<blockquote>#{typeset(block.content)}</blockquote>"  
    when :single_quote
      @html_string << "&lsquo;&#8202;#{typeset(block.content)}&#8202;&rsquo;"
    when :double_quote
      @html_string << "&ldquo;&#8202;#{typeset(block.content)}&#8202;&rdquo;"
      
    when :note
      @html_string << "<p class=\"note\"><span class=\"note_header\">Note:</span>#{typeset(block.content)}</span></p>"
      
    when :command
      @html_string << "<p class=\"command\">#{block.content}</span>"
      
    when :code_language
      @code_language = block.content
    when :code_start_number
      @code_start_number = block.content  
      
    when :file
      @html_string << "<pre class=\"file\">#{block.content}</pre>"
    when :output
      @html_string << "<pre class=\"output\">#{block.content}</pre>"
      
    # Special Characters
    when :em_dash
      @html_string << "&mdash;"
    when :en_dash
      @html_string << "&ndash;"
    when :elipses
      @html_string << "&#8202;.&thinsp;&#8202;.&thinsp;&#8202;.&thinsp;"
    when :elipses_stop
      @html_string << "&#8202;.&thinsp;&#8202;.&thinsp;&#8202;.&thinsp;."
      
    # Links
    when :link_target
      @link_target = block.content
    when :link_text
      @link_text = block.content
      
    # Lists
    when :item
      @html_string << "<li>#{typeset(block.content)}</li>"
    
    when :dl_header
      @html_string << "<dt>#{block.content}</dt>"
    when :dl_text
      @html_string << "<dd>#{block.content}</dd>"
            
    # Tags
    when :ac
      # Do we know anything about this acronymn?
      if $reference.acronym_list.include?(block.content) then
        
        # Assemble from the reference list
        if $reference.acronym_list[block.content].include?('text_html') then
          acronym = $reference.acronym_list[block.content]['text_html']
        else
          acronym = $reference.acronym_list[block.content]['text']
        end
        
        definition = $reference.acronym_list[block.content]['def']
        
        @html_string << "<acronym title=\"#{definition}\">#{acronym}</acronym>"
      else
        # Do what we can
        @html_string << "<acronym>#{block.content}</acronym>"
      end
          
    when :emph 
      @html_string << "<em>#{block.content}</em>"
    
    when :tt
      @html_string << "<tt>#{block.content}</tt>"
      
  end
end
generate_node_end(block) click to toggle source
# File lib/bayeux/html_gen.rb, line 369
def generate_node_end(block)
      
  case block.type
    
    # Headers
    when :h1
      @html_string << "</h1>"
    when :h2
      @html_string << "</h2>"
    when :h3
      @html_string << "</h3>"
    when :h4
      @html_string << "</h4>"
    when :h5
      @html_string << "</h5>"
    when :h6
      @html_string << "</h6>"
    
    # Ordinary paragraphs
    when :paragraph
      @html_string << "</p>"
    
    # Special paragraphs
    when :block_quote
      @html_string << "</blockquote>"
    when :single_quote
      @html_string << "&#8202;&rsquo;"
    when :double_quote
      @html_string << "&#8202;&rdquo;"
      
    when :note, :command
      @html_string << "</p>"
      
    when :code
      # Assemble the code block from the sub-tree nodes
      begin
        pretty_code = Uv.parse(block.content, "xhtml", @code_language, @code_start_number.to_i, "dawn")
        @html_string << pretty_code
      rescue
        pretty_code = Uv.parse(block.content, "xhtml", "plain_text", @code_start_number.to_i, "dawn")
        @html_string << pretty_code          
      end
              
    # Special Characters
    when :em_dash
      @html_string << "&mdash;"
    when :en_dash
      @html_string << "&ndash;"
    when :elipses
      @html_string << "&#8202;.&thinsp;&#8202;.&thinsp;&#8202;.&thinsp;"
    when :elipses_stop
      @html_string << "&#8202;.&thinsp;&#8202;.&thinsp;&#8202;.&thinsp;."
      
    # Links
    when :link
      # Assemble the link from the sub-tree nodes
      @html_string << "<a href=\"#{@link_target}\">#{@link_text}</a>"
    when :link_target
      @link_target = block.content
    when :link_text
      @link_text = block.content
        
    # Lists
    when :ol
      @html_string << "</ol>" 
    when :ul
      @html_string << "</ul>" 
    when :item
      @html_string << "</li>"
    
    when :dl
      @html_string << "</dl>" 
    when :dl_header
      @html_string << "</dt>"
    when :dl_text
      @html_string << "</dd>"
    
    # Tags
    when :ac
      @html_string << "</acronym>"
          
    when :emph 
      @html_string << "</em>"
    
    when :tt
      @html_string << "</tt>"
  end
end
generate_node_start(block) click to toggle source

Output only the start of a node

# File lib/bayeux/html_gen.rb, line 260
def generate_node_start(block)
  
  # Check if we need to add a space
  if not block.content[0] =~ /\s|[:;.,?!]/
    @html_string << " "
  end
  
  case block.type
    
    # Headers
    when :h1
      @html_string << "<h1 id=\"#{header_id(block.content)}\">"
    when :h2
      @html_string << "<h2 id=\"#{header_id(block.content)}\">"
    when :h3
      @html_string << "<h3 id=\"#{header_id(block.content)}\">"
    when :h4
      @html_string << "<h4 id=\"#{header_id(block.content)}\">"
    when :h5
      @html_string << "<h5 id=\"#{header_id(block.content)}\">"
    when :h5
      @html_string << "<h6 id=\"#{header_id(block.content)}\">"
    
    # Ordinary paragraphs
    when :paragraph
      @html_string << "<p>#{typeset(block.content)}"
      
    when :none
      @html_string << "#{block.content}"
      
    # Special paragraphs
    when :block_quote
      @html_string << "<blockquote>#{typeset(block.content)}"
    when :single_quote
      @html_string << "&lsquo;&#8202;#{typeset(block.content)}"
    when :double_quote
      @html_string << "&ldquo;&#8202;#{typeset(block.content)}"
      
    when :note
      @html_string << "<p class=\"note\"><span class=\"note_header\">Note:</span>#{typeset(block.content)}"
      
    when :command
      @html_string << "<p class=\"command\">#{typeset(block.content)}"
      
    when :code_language
      @code_language = block.content
    when :code_start_number
      @code_start_number = block.content
      
    # Special Characters
    when :em_dash
      @html_string << "&mdash;"
    when :en_dash
      @html_string << "&ndash;"
    when :elipses
      @html_string << "&#8202;.&thinsp;&#8202;.&thinsp;&#8202;.&thinsp;"
    when :elipses_stop
      @html_string << "&#8202;.&thinsp;&#8202;.&thinsp;&#8202;.&thinsp;."
      
    # Links
    when :link_target
      @link_target = block.content
    when :link_text
      @link_text = block.content
      
    # Lists
    when :ol
      @html_string << "<ol>" 
    when :ul
      @html_string << "<ul>" 
    when :item
      @html_string << "<li>#{block.content}"
    
    when :dl
      @html_string << "<dl>" 
    when :dl_header
      @html_string << "<dt>#{block.content}"
    when :dl_text
      @html_string << "<dd>#{block.content}"
    
    # Tags
    when :ac
      # Do we know anything about this acronymn?
      if $reference.acronym_list.include?(block.content) then
      
        # Assemble from the reference list
        if $reference.acronym_list[block.content].include?('text_html') then
          acronym = $reference.acronym_list[block.content]['text_html']
        else
          acronym = $reference.acronym_list[block.content]['text']
        end
      
        definition = $reference.acronym_list[block.content]['def']
      
        @html_string << "<acronym title=\"#{definition}\">#{acronym}"
      else
        # Do what we can
        @html_string << "<acronym>#{block.content}"
      end
              
    when :emph 
      @html_string << "<em>#{block.content}"
      
    when :tt
      @html_string << "<tt>#{block.content}"
      
  end
end
indent(level) click to toggle source

Return the correct indent for the given level

# File lib/bayeux/html_gen.rb, line 487
def indent(level)
  case level
    when 1
      return ""
    when 2
      return "  "
    when 3
      return "    "
    when 4
      return "      "
    when 5
      return "        "
    when 6
      return "          "
  else
    return ""
  end
end
typeset(string) click to toggle source

Fix Typography, according to HTML standards

# File lib/bayeux/html_gen.rb, line 468
def typeset(string)
  return_str = string
      
  return_str.gsub!(';','&#8202;:')
  return_str.gsub!(':','&#8202;:')
  
  return_str.gsub!("'", "&rsquo;")

  return_str.gsub!('.','&#8202;.&thinsp;')
  return_str.gsub!('?','&#8202;?&thinsp;')
  return_str.gsub!('!','&#8202;!&thinsp;')

  return_str.gsub!('e&#8202;.&thinsp;g&#8202;.&thinsp;','e.g. ')
  return_str.gsub!('i&#8202;.&thinsp;e&#8202;.&thinsp;','i.e. ')
  
  return return_str
end