class XmlCanonicalizer

Constants

AFTER_DOC_ELEMENT
BEFORE_DOC_ELEMENT
INSIDE_DOC_ELEMENT
NODE_TYPE_ATTRIBUTE
NODE_TYPE_COMMENT
NODE_TYPE_PI
NODE_TYPE_TEXT
NODE_TYPE_WHITESPACE

Attributes

logger[RW]
prefix_list[RW]

Public Class Methods

new(with_comments, excl_c14n) click to toggle source
# File lib/xmlcanonicalizer.rb, line 90
def initialize(with_comments, excl_c14n)
  @with_comments = with_comments
  @exclusive = excl_c14n
  @res = ""
  @state = BEFORE_DOC_ELEMENT
  @xnl = Array.new()
  @prevVisibleNamespacesStart = 0
  @prevVisibleNamespacesEnd = 0
  @visibleNamespaces = Array.new()
  @inclusive_namespaces = Array.new()
  @prefix_list = nil
  @rendered_prefixes = Array.new()
  @logger = nil#Logger.new("xmlcanonicalizer.log")
  #@logger.level = Logger::DEBUG
end

Public Instance Methods

add_inclusive_namespaces(prefix_list, element, visible_namespaces) click to toggle source
# File lib/xmlcanonicalizer.rb, line 106
def add_inclusive_namespaces(prefix_list, element, visible_namespaces)
  namespaces = element.attributes()
  namespaces.each_attribute{|ns|
    if (ns.prefix=="xmlns")
      if (prefix_list.include?(ns.local_name()))
        visible_namespaces.push(NamespaceNode.new("xmlns:"+ns.local_name(), ns.value()))
      end
    end
  }
  parent = element.parent()
  add_inclusive_namespaces(prefix_list, parent, visible_namespaces) if (parent)
  visible_namespaces
end
canonicalize(document) click to toggle source
# File lib/xmlcanonicalizer.rb, line 120
def canonicalize(document)
  write_document_node(document)
  @logger.debug("\nCanonicalized result\n" + @res.to_s()) if @logger
  @res
end
canonicalize_element(element, logging = true) click to toggle source
# File lib/xmlcanonicalizer.rb, line 126
def canonicalize_element(element, logging = true)
  @logger.debug("Canonicalize element:\n" + element.to_s()) if @logger
  @inclusive_namespaces = add_inclusive_namespaces(@prefix_list, element, @inclusive_namespaces) if (@prefix_list)
  @preserve_document = element.document()
  tmp_parent = element.parent()
  body_string = remove_whitespace(element.to_s().gsub("\n","").gsub("\t","").gsub("\r",""))
  document = Document.new(body_string)
  tmp_parent.delete_element(element)
  element = tmp_parent.add_element(document.root())
  @preserve_element = element
  document = Document.new(element.to_s())
  ns = element.namespace(element.prefix())
  document.root().add_namespace(element.prefix(), ns)
  write_document_node(document)
  @logger.debug("Canonicalized result:\n" + @res.to_s()) if @logger
  @res
end
is_namespace_decl(attribute) click to toggle source
# File lib/xmlcanonicalizer.rb, line 364
def is_namespace_decl(attribute)
  #return true if (attribute.name() == "xmlns")
  return true if (attribute.prefix().index("xmlns") == 0)
  return false
end
is_namespace_node(namespace_uri) click to toggle source
# File lib/xmlcanonicalizer.rb, line 291
def is_namespace_node(namespace_uri)
  return (namespace_uri == "http://www.w3.org/2000/xmlns/")
end
is_namespace_rendered(prefix, uri) click to toggle source
# File lib/xmlcanonicalizer.rb, line 295
def is_namespace_rendered(prefix, uri)
  is_empty_ns = prefix == nil && uri == nil
  if (is_empty_ns)
    start = 0
  else
    start = @prevVisibleNamespacesStart
  end
  @visibleNamespaces.each{|ns|
    if (ns.prefix() == "xmlns:"+prefix.to_s() && ns.uri() == uri)
      return true
    end
  }
  return is_empty_ns
  #(@visibleNamespaces.size()-1).downto(start) {|i|
  #   ns = @visibleNamespaces[i]
  #   return true if (ns.prefix() == "xmlns:"+prefix.to_s() && ns.uri() == uri)
  #   #p = ns.prefix() if (ns.prefix().index("xmlns") == 0)
  #   #return ns.uri() == uri if (p == prefix)
  #}
  #return is_empty_ns
end
is_node_visible(node) click to toggle source
# File lib/xmlcanonicalizer.rb, line 317
def is_node_visible(node)
  return true if (@xnl.size() == 0)
  @xnl.each{|element|
    return true if (element == node)
  }
  return false
end
is_text_node(type) click to toggle source
# File lib/xmlcanonicalizer.rb, line 370
def is_text_node(type)
  return true if (type == NODE_TYPE_TEXT || type == NODE_TYPE_CDATA || type == NODE_TYPE_WHITESPACE)
  return false
end
normalize_string(input, type) click to toggle source
# File lib/xmlcanonicalizer.rb, line 325
def normalize_string(input, type)
  sb = ""
  return input
end
remove_whitespace(string) click to toggle source
# File lib/xmlcanonicalizer.rb, line 375
def remove_whitespace(string)
  new_string = ""
  in_white = false
  string.each_byte{|b|
    #if (in_white && b == 32)
    #else
    if !(in_white && b == 32)
      new_string = new_string + b.chr()
    end
    if (b == 62) #>
      in_white = true
    end
    if (b == 60) #<
      in_white = false
    end
  }
  new_string
end
white_text?(text) click to toggle source
# File lib/xmlcanonicalizer.rb, line 359
def white_text?(text)
  return true if ((text.strip() == "") || (text.strip() == nil))
  return false
end
write_attribute_axis(node) click to toggle source
# File lib/xmlcanonicalizer.rb, line 253
def write_attribute_axis(node)
  list = Array.new()
  node.attributes.sort.each{|key, value|
    attr = REXML::Attribute.new(key, value)
    list.push(attr) if (!is_namespace_node(attr.value()) && !is_namespace_decl(attr)) # && is_node_visible(
  }
  if (!@exclusive && node.parent() != nil && node.parent().parent() != nil)
    cur = node.parent()
    while (cur != nil)
      #next if (cur.attributes() == nil)
      cur.each_attribute{|attribute|
        next if (attribute.prefix() != "xml")
        next if (attribute.prefix().index("xmlns") == 0)
        next if (node.namespace(attribute.prefix()) == attribute.value())
        found = true
        list.each{|n|
          if (n.prefix() == "xml" && n.value() == attritbute.value())
            found = true
            break
          end
        }
        next if (found)
        list.push(attribute)
      }
    end
  end
  list.each{|attribute|
    if (attribute != nil)
      if (attribute.name() != "xmlns")
        @res = @res + " " + normalize_string(attribute.to_string(), NODE_TYPE_ATTRIBUTE).gsub("'",'"')
      end
      #       else
      #       @res = @res + " " + normalize_string(attribute.name()+'="'+attribute.to_s()+'"', NODE_TYPE_ATTRIBUTE).gsub("'",'"')
      #end
    end
  }
end
write_document_node(document) click to toggle source
# File lib/xmlcanonicalizer.rb, line 144
def write_document_node(document)
  @state = BEFORE_DOC_ELEMENT
  if (document.class().to_s() == "REXML::Element")
    write_node(document)
  else
    document.each_child{|child|
      write_node(child)
    }
  end
  @res
end
write_element_node(node, visible) click to toggle source
# File lib/xmlcanonicalizer.rb, line 181
def write_element_node(node, visible)
  savedPrevVisibleNamespacesStart = @prevVisibleNamespacesStart
  savedPrevVisibleNamespacesEnd = @prevVisibleNamespacesEnd
  savedVisibleNamespacesSize = @visibleNamespaces.size()
  state = @state
  state = INSIDE_DOC_ELEMENT if (visible && state == BEFORE_DOC_ELEMENT)
  @res = @res + "<" + node.expanded_name() if (visible)
  write_namespace_axis(node, visible)
  write_attribute_axis(node)
  @res = @res + ">" if (visible)
  node.each_child{|child|
    write_node(child)
  }
  @res = @res + "</" +node.expanded_name() + ">" if (visible)
  @state = AFTER_DOC_ELEMENT if (visible && state == BEFORE_DOC_ELEMENT)
  @prevVisibleNamespacesStart = savedPrevVisibleNamespacesStart
  @prevVisibleNamespacesEnd = savedPrevVisibleNamespacesEnd
  @visibleNamespaces.slice!(savedVisibleNamespacesSize, @visibleNamespaces.size() - savedVisibleNamespacesSize)               if (@visibleNamespaces.size() > savedVisibleNamespacesSize)
end
write_namespace_axis(node, visible) click to toggle source
# File lib/xmlcanonicalizer.rb, line 201
def write_namespace_axis(node, visible)
  doc = node.document()
  has_empty_namespace = false
  list = Array.new()
  cur = node
  #while ((cur != nil) && (cur != doc) && (cur.node_type() != :document))
  namespaces = cur.node_namespaces()
  namespaces.each{|prefix|
    next if ((prefix == "xmlns") && (node.namespace(prefix) == ""))
    namespace = cur.namespace(prefix)
    next if (is_namespace_node(namespace))
    next if (node.namespace(prefix) != cur.namespace(prefix))
    next if (prefix == "xml" && namespace == "http://www.w3.org/XML/1998/namespace")
    next if (!is_node_visible(cur))
    rendered = is_namespace_rendered(prefix, namespace)
    @visibleNamespaces.push(NamespaceNode.new("xmlns:"+prefix,namespace)) if (visible)
    if ((!rendered) && !list.include?(prefix))
      list.push(prefix)
    end
    has_empty_namespace = true if (prefix == nil)
  }
  if (visible && !has_empty_namespace && !is_namespace_rendered(nil, nil))
    @res = @res + ' xmlns=""'
  end
  #TODO: ns of inclusive_list
  #=begin
  if ((@prefix_list) && (node.to_s() == node.parent().to_s()))
    #list.push(node.prefix())
    @inclusive_namespaces.each{|ns|
      prefix = ns.prefix().split(":")[1]
      list.push(prefix) if (!list.include?(prefix) && (!node.attributes.prefixes.include?(prefix)))
    }
    @prefix_list = nil
  end
  #=end
  list.sort!()
  list.insert(0, "xmlns") unless list.delete("xmlns").nil?
  list.each{|prefix|
    next if (prefix == "")
    next if (@rendered_prefixes.include?(prefix))
    @rendered_prefixes.push(prefix)
    ns = node.namespace(prefix)
    ns = @preserve_element.namespace(prefix) if (ns == nil)
    @res = @res + normalize_string(" " + prefix + '="' + ns + '"', NODE_TYPE_TEXT) if (prefix == "xmlns")
    @res = @res + normalize_string(" xmlns:" + prefix + '="' + ns + '"', NODE_TYPE_TEXT) if (prefix != nil && prefix != "xmlns")
  }
  if (visible)
    @prevVisibleNamespacesStart = @prevVisibleNamespacesEnd
    @prevVisibleNamespacesEnd = @visibleNamespaces.size()
  end
end
write_node(node) click to toggle source
# File lib/xmlcanonicalizer.rb, line 156
def write_node(node)
  visible = is_node_visible(node)
  if ((node.node_type() == :text) && white_text?(node.value()))
    res = node.value()
    res.gsub("\r\n","\n")
    #res = res.delete(" ").delete("\t")
    res.delete("\r")
    @res = @res + res
    #write_text_node(node,visible) if (@state == INSIDE_DOC_ELEMENT)
    return
  end
  if (node.node_type() == :text)
    write_text_node(node, visible)
    return
  end
  if (node.node_type() == :element)
    write_element_node(node, visible) if (!node.rendered?())
    node.rendered=(true)
  end
  if (node.node_type() == :processing_instruction)
  end
  if (node.node_type() == :comment)
  end
end
write_text_node(node, visible) click to toggle source

input.each_byte{|b|

if (b ==60 && (type == NODE_TYPE_ATTRIBUTE || is_text_node(type)))
        sb = sb + "&lt;"
elsif (b == 62 && is_text_node(type))
        sb = sb + "&gt;"
elsif (b == 38 && (is_text_node(type) || is_text_node(type))) #Ampersand
        sb = sb + "&amp;"
elsif (b == 34 && is_text_node(type)) #Quote
        sb = sb + "&quot;"
elsif (b == 9 && is_text_node(type)) #Tabulator
        sb = sb + "&#x9;"
elsif (b == 11 && is_text_node(type)) #CR
        sb = sb + "&#xA;"
elsif (b == 13 && (type == NODE_TYPE_ATTRIBUTE || (is_text_node(type) && type != NODE_TYPE_WHITESPACE) || type == NODE_TYPE_COMMENT || type == NODE_TYPE_PI))
        sb = sb + "&#xD;"
elsif (b == 13)
        next
else
        sb = sb.concat(b)
end

} sb end

# File lib/xmlcanonicalizer.rb, line 353
def write_text_node(node, visible)
  if (visible)
    @res = @res + normalize_string(node.value(), node.node_type())
  end
end