module Asciidoctor::DocTest::HTML::Normalizer

Module to be included into Nokogiri::HTML::Document or DocumentFragment to add {#normalize!} feature.

@example

Nokogiri::HTML.parse(str).normalize!
Nokogiri::HTML.fragment(str).normalize!

Constants

HTML_INLINE_ELEMENTS

Public Instance Methods

normalize!() click to toggle source

Normalizes the HTML document or fragment so it can be easily compared with another HTML.

What does it actually do?

  • sorts element attributes by name

  • sorts inline CSS declarations inside a style attribute by name

  • removes all blank text nodes (i.e. node that contain just whitespaces)

  • strips nonsignificant leading and trailing whitespaces around text

  • strips nonsignificant repeated whitespaces

@return [Object] self

# File lib/asciidoctor/doctest/html/normalizer.rb, line 31
def normalize!
  traverse do |node|
    case node.type

    when Nokogiri::XML::Node::ELEMENT_NODE
      sort_element_attrs! node
      sort_element_style_attr! node

    when Nokogiri::XML::Node::TEXT_NODE
      # Remove text node that contains whitespaces only.
      if node.blank?
        node.remove

      elsif !preformatted_block? node
        strip_redundant_spaces! node
        strip_spaces_around_text! node
      end
    end
  end
  self
end

Private Instance Methods

inline_element?(node) click to toggle source

@return [Boolean] true if the node represents an inline HTML element.

# File lib/asciidoctor/doctest/html/normalizer.rb, line 106
def inline_element?(node)
  node.element? && HTML_INLINE_ELEMENTS.include?(node.name)
end
preformatted_block?(node) click to toggle source

@return [Boolean] true if the node is descendant of +<pre>+ node.

# File lib/asciidoctor/doctest/html/normalizer.rb, line 111
def preformatted_block?(node)
  node.path =~ %r{/pre/}
end
sort_element_attrs!(node) click to toggle source

Sorts attributes of the element node by name.

# File lib/asciidoctor/doctest/html/normalizer.rb, line 56
def sort_element_attrs!(node)
  node.attributes.sort_by(&:first).each do |name, value|
    node.delete(name)
    node[name] = value
  end
end
sort_element_style_attr!(node) click to toggle source

Sorts CSS declarations in style attribute of the element node by name.

# File lib/asciidoctor/doctest/html/normalizer.rb, line 64
def sort_element_style_attr!(node)
  return unless node.has_attribute? 'style'
  decls = node['style'].scan(/([\w-]+):\s*([^;]+);?/).sort_by(&:first)
  node['style'] = decls.map { |name, val| "#{name}: #{val};" }.join(' ')
end
strip_redundant_spaces!(node) click to toggle source

Strips repeated whitespaces in the text node.

# File lib/asciidoctor/doctest/html/normalizer.rb, line 73
def strip_redundant_spaces!(node)
  node.content = node.content.gsub("\n", ' ').gsub(/(\s)+/, '\1')
end
strip_spaces_around_text!(node) click to toggle source

Strips nonsignificant leading and trailing whitespaces in the text node.

# File lib/asciidoctor/doctest/html/normalizer.rb, line 78
def strip_spaces_around_text!(node)
  node.content = node.content.lstrip if text_block_boundary? node, :left
  node.content = node.content.rstrip if text_block_boundary? node, :right
end
text_block_boundary?(node, side) click to toggle source

Returns true if the text node is the first (:left), or the last (:right) inline element of the nearest block element ancestor or direct sibling of +
+ element.

@return [Boolean]

# File lib/asciidoctor/doctest/html/normalizer.rb, line 90
def text_block_boundary?(node, side)
  method = { left: :previous_sibling, right: :next_sibling }[side]

  return true if node.send(method).try(:name) == 'br'
  loop do
    if (sibling = node.send(method))
      return false if sibling.text? || inline_element?(sibling)
    end
    node = node.parent
    return true unless inline_element? node
  end
end