class Squoosh::Squoosher

Minify HTML, JavaScript, and CSS using a single set of options. Minified versions of the JavaScript and CSS encountered are cached to speed up minification when the same scripts or inline style sheets appear multiple times.

Constants

DEFAULT_OPTIONS

Default options for minifying.

  • remove_comments Remove all comments that aren't “loud”

  • omit_tags Omit unnecessary start and end tags

  • loud_comments Keep all comments matching this regex

  • minify_javascript Minify JavaScript <script> and inline JavaScript

  • minify_css Minify CSS <style> and inline CSS

  • uglifier_options Options to pass to {www.rubydoc.info/gems/uglifier Uglifier}

  • sass_options Options to pass to {github.com/sass/sassc-ruby#readme Sassc}

ESCAPABLE_RAW_TEXT_ELEMENTS
EVENT_HANDLERS_XPATH

rubocop:disable Style/StringConcatenation

FOREIGN_ELEMENTS
HTML_WHITESPACE
INLINE_SCRIPT_OPTIONS
PHRASING_CONTENT
RAW_TEXT_ELEMENTS
VOID_ELEMENTS

Element kinds

Public Class Methods

new(options = {}) click to toggle source

Create a new instance of Squoosher.

@param options [Hash] options to override the default options

# File lib/squoosh.rb, line 69
def initialize(options = {})
  options.each do |key, _val|
    unless DEFAULT_OPTIONS.include?(key)
      raise ArgumentError, "Invalid option `#{key}'"
    end
  end
  @options = DEFAULT_OPTIONS.merge(options)
  @js_cache = {}
  @inline_script_cache = {}
  @css_cache = {}
end

Public Instance Methods

minify_css(content) click to toggle source

Minify CSS using Sassc.

@param content [String] the CSS to minify @return [String] the minified CSS

# File lib/squoosh.rb, line 103
def minify_css(content)
  @css_cache[content] ||= begin
    root = SassC::Engine.new(content, @options[:sass_options])
    root.render.rstrip
  end
end
minify_html(content) click to toggle source

Minify HTML and inline JavaScript and CSS.

If the content does not start with an HTML document type <!DOCTYPE html>, then content is returned unchanged.

@param content [String] the HTML to minify @return [String] the minified HTML

# File lib/squoosh.rb, line 88
def minify_html(content)
  doc = Nokogiri.HTML5(content)
  return content unless doc&.internal_subset&.html5_dtd?

  remove_comments(doc) if @options[:remove_comments]
  compress_javascript(doc) if @options[:minify_javascript]
  compress_css(doc) if @options[:minify_css]
  doc.children.each { |c| compress_spaces(c) } if @options[:compress_spaces]
  doc.children.map { |node| stringify_node(node) }.join
end
minify_js(content) click to toggle source

Minify JavaScript using Uglify.

@param content [String] the JavaScript to minify @return [String] the minified JavaScript

# File lib/squoosh.rb, line 114
def minify_js(content)
  @js_cache[content] ||= uglify(content, @options[:uglifier_options])
end

Private Instance Methods

compress_css(doc) click to toggle source
# File lib/squoosh.rb, line 267
def compress_css(doc)
  # Compress style elements.
  doc.xpath('//style[not(ancestor::math or ancestor::svg)]').each do |node|
    type = node['type']&.downcase
    next unless type.nil? || type == 'text/css'

    node.content = minify_css node.content
  end
  # Compress style attributes
  doc.xpath('//@style[not(ancestor::math or ancestor::svg)]').each do |node|
    elm_type = node.parent.name
    css = "#{elm_type}{#{node.content}}"
    node.content = minify_css(css)[elm_type.length + 1..-2]
  end
  nil
end
compress_javascript(doc) click to toggle source
# File lib/squoosh.rb, line 253
def compress_javascript(doc)
  # Compress script elements.
  doc.xpath('//script[not(ancestor::math or ancestor::svg)]').each do |node|
    type = node['type']&.downcase
    next unless type.nil? || type == 'text/javascript'

    node.content = compress_script(node.content)
  end
  # Compress event handlers.
  doc.xpath(EVENT_HANDLERS_XPATH).each do |attr|
    attr.content = minify_js(attr.content)
  end
end
compress_script(content) click to toggle source
# File lib/squoosh.rb, line 246
def compress_script(content)
  @inline_script_cache[content] ||= begin
    options = @options[:uglifier_options].merge(INLINE_SCRIPT_OPTIONS)
    uglify(content, options)
  end
end
compress_spaces(node) click to toggle source
# File lib/squoosh.rb, line 284
def compress_spaces(node)
  if node.text?
    if text_node_removable? node
      node.unlink
    else
      content = node.content
      content.gsub!(/[ \t\n\r\f]+/, ' ')
      content.lstrip! if trim_left? node
      content.rstrip! if trim_right? node
      node.content = content
    end
  elsif node.element? &&
        (node.name == 'pre' || node.name == 'textarea')
    # Leave the contents of these nodes alone.
  elsif normal_element?(node) || node.name == 'title'
    # Compress spaces in normal elements and title.
    node.children.each { |c| compress_spaces c }
  end
  nil
end
content_node?(node) click to toggle source
# File lib/squoosh.rb, line 418
def content_node?(node)
  # Inter-element whitespace, comment nodes, and processing instruction
  # nodes must be ignored when establishing whether an element's contents
  # match the element's content model or not, and must be ignored when
  # following algorithms that define document and element semantics.
  !(node.comment? ||
    node.processing_instruction? ||
    inter_element_whitespace?(node))
end
escapable_raw_text_element?(node) click to toggle source
# File lib/squoosh.rb, line 144
def escapable_raw_text_element?(node)
  ESCAPABLE_RAW_TEXT_ELEMENTS.include? node.name
end
first_child_content_node(node) click to toggle source
# File lib/squoosh.rb, line 442
def first_child_content_node(node)
  return nil if node.children.empty?

  node = node.children[0]
  return node if content_node?(node)

  next_sibling_content_node(node)
end
foreign_element?(node) click to toggle source
# File lib/squoosh.rb, line 148
def foreign_element?(node)
  FOREIGN_ELEMENTS.include? node.name
end
inter_element_whitespace?(node) click to toggle source
# File lib/squoosh.rb, line 162
def inter_element_whitespace?(node)
  return false unless node.text?

  node.content.each_char.all? { |c| HTML_WHITESPACE.include? c }
end
next_sibling_content_node(node) click to toggle source
# File lib/squoosh.rb, line 435
def next_sibling_content_node(node)
  while (node = node.next_sibling)
    return node if content_node?(node)
  end
  nil
end
next_sibling_is_nil_or_one_of?(node, elements) click to toggle source
# File lib/squoosh.rb, line 451
def next_sibling_is_nil_or_one_of?(node, elements)
  node = next_sibling_content_node(node)
  node.nil? || (node.element? && elements.include?(node.name))
end
next_sibling_is_one_of?(node, elements) click to toggle source
# File lib/squoosh.rb, line 456
def next_sibling_is_one_of?(node, elements)
  node = next_sibling_content_node(node)
  node&.element? && elements.include?(node.name)
end
normal_element?(node) click to toggle source
# File lib/squoosh.rb, line 152
def normal_element?(node)
  !void_element?(node) &&
    !raw_text_element?(node) &&
    !escapable_raw_text_element?(node) &&
    !foreign_element?(node)
end
omit_end_tag?(node) click to toggle source
# File lib/squoosh.rb, line 523
def omit_end_tag?(node)
  return true if void_element?(node) || self_closing?(node)
  return false unless @options[:omit_tags]
  return false if node.parent.name == 'noscript'

  next_node = node.next_sibling
  case node.name
  when 'html'
    # An html element's end tag may be omitted if the html element is not
    # immediately followed by a comment.
    return next_node.nil? || !next_node.comment?

  when 'head'
    # A head element's end tag may be omitted if the head element is not
    # immediately followed by a space character or a comment.
    return next_node.nil? ||
           (next_node.text? && !next_node.content.start_with?(' ')) ||
           !next_node.comment?

  when 'body'
    # A body element's end tag may be omitted if the body element is not
    # immediately followed by a comment.
    return next_node.nil? || !next_node.comment?

  when 'li'
    # An li element's end tag may be omitted if the li element is
    # immediately followed by another li element or if there is no more
    # content in the parent element.
    return next_sibling_is_nil_or_one_of?(node, ['li'])

  when 'dt'
    # A dt element's end tag may be omitted if the dt element is immediately
    # followed by another dt element or a dd element.
    return next_sibling_is_one_of?(node, %w[dt dd])

  when 'dd'
    # A dd element's end tag may be omitted if the dd element is immediately
    # followed by another dd element or a dt element, or if there is no more
    # content in the parent element.
    return next_sibling_is_nil_or_one_of?(node, %w[dt dd])

  when 'p'
    # A p element's end tag can be omitted if the p element is immediately
    # followed by an address, article, aside, blockquote, details, div,
    # dl, fieldset, figcaption, figure, footer, form, h1, h2, h3, h4, h5,
    # h6, header, hgroup, hr, main, menu, nav, ol, p, pre, section, table,
    # or ul element, or if there is no more content in the parent element
    # and the parent element is an HTML element that is not an a, audio,
    # del, ins, map, noscript, or video element, or an autonomous custom
    # element.
    return true if next_sibling_is_one_of?(
      node,
      %w[
        address article aside blockquote details div dl fieldset figcaption
        figure footer form h1 h2 h3 h4 h5 h6 header hgroup hr main menu nav
        ol p pre section table ul
      ]
    )
    return false if foreign_element?(node.parent)

    return !parent_contains_more_content?(node) &&
           !%(a audio del ins map noscript video).include?(node.parent.name)

  when 'rb', 'rt', 'rp'
    # An rb element's end tag may be omitted if the rb element is
    # immediately followed by an rb, rt, rtc or rp element, or if there is
    # no more content in the parent element.
    #
    # An rt element's end tag may be omitted if the rt element is
    # immediately followed by an rb, rt, rtc, or rp element, or if there
    # is no more content in the parent element.
    #
    # An rp element's end tag may be omitted if the rp element is
    # immediately followed by an rb, rt, rtc or rp element, or if there is
    # no more content in the parent element.
    return next_sibling_is_nil_or_one_of?(node, %w[rb rt rtc rp])

  when 'rtc'
    # An rtc element's end tag may be omitted if the rtc element is
    # immediately followed by an rb, rtc or rp element, or if there is no
    # more content in the parent element.
    return next_sibling_is_nil_or_one_of?(node, %w[rb rtc rp])

  when 'optgroup'
    # An optgroup element's end tag may be omitted if the optgroup element
    # is immediately followed by another optgroup element, or if there is
    # no more content in the parent element.
    return next_sibling_is_nil_or_one_of?(node, ['optgroup'])

  when 'option'
    # An option element's end tag may be omitted if the option element is
    # immediately followed by another option element, or if it is
    # immediately followed by an optgroup element, or if there is no more
    # content in the parent element.
    return next_sibling_is_nil_or_one_of?(node, %w[option optgroup])

  when 'colgroup'
    # A colgroup element's end tag may be omitted if the colgroup element is
    # not immediately followed by a space character or a comment.
    return true if next_node.nil?
    return !next_node.content.start_with?(' ') if next_node.text?

    return !next_node.comment?

  when 'thead'
    # A thead element's end tag may be omitted if the thead element is
    # immediately followed by a tbody or tfoot element.
    return next_sibling_is_one_of?(node, %w[tbody tfoot])

  when 'tbody'
    # A tbody element's end tag may be omitted if the tbody element is
    # immediately followed by a tbody or tfoot element, or if there is no
    # more content in the parent element.
    return next_sibling_is_nil_or_one_of?(node, %w[tbody tfoot])

  when 'tfoot'
    # A tfoot element's end tag can be omitted if there is no more content
    # in the parent element.
    return !parent_contains_more_content?(node)

  when 'tr'
    # A tr element's end tag may be omitted if the tr element is immediately
    # followed by another tr element, or if there is no more content in the
    # parent element.
    return next_sibling_is_nil_or_one_of?(node, ['tr'])

  when 'td', 'th'
    # A td element's end tag may be omitted if the td element is immediately
    # followed by a td or th element, or if there is no more content in the
    # parent element.
    #
    # A th element's end tag may be omitted if the th element is immediately
    # followed by a td or th element, or if there is no more content in the
    # parent element.
    return next_sibling_is_nil_or_one_of?(node, %w[td th])
  end
  false
end
omit_start_tag?(node) click to toggle source
# File lib/squoosh.rb, line 465
def omit_start_tag?(node)
  return false unless @options[:omit_tags]
  return false unless node.attributes.empty?

  case node.name
  when 'html'
    # An html element's start tag may be omitted if the first thing inside
    # the html element is not a comment.
    return node.children.empty? || !node.children[0].comment?

  when 'head'
    # A head element's start tag may be omitted if the element is empty,
    # or if the first thing inside the head element is an element.
    return node.children.empty? || node.children[0].element?

  when 'body'
    # A body element's start tag may be omitted if the element is empty,
    # or if the first thing inside the body element is not a space
    # character or a comment, except if the first thing inside the body
    # element is a meta, link, script, style, or template element.
    return true if node.children.empty?

    c = node.children[0]
    return !c.content.start_with?(' ') if c.text?
    return false if c.comment?

    return !c.element? ||
           !%w[meta link script style template].include?(c.name)

  when 'colgroup'
    # A colgroup element's start tag may be omitted if the first thing
    # inside the colgroup element is a col element, and if the element is
    # not immediately preceded by another colgroup element whose end tag
    # has been omitted. (It can't be omitted if the element is empty.)
    child = first_child_content_node(node)
    return false if child.nil? || !child.element? || child.name != 'col'

    prev_node = previous_sibling_content_node(node)
    return !(prev_node&.element? &&
             prev_node.name == 'colgroup' &&
             omit_end_tag?(prev_node))

  when 'tbody'
    # A tbody element's start tag may be omitted if the first thing inside
    # the tbody element is a tr element, and if the element is not
    # immediately preceded by a tbody, thead, or tfoot element whose end
    # tag has been omitted. (It can't be omitted if the element is empty.)
    child = first_child_content_node(node)
    return false if child.nil? || !child.element? || child.name != 'tr'

    prev_node = previous_sibling_content_node(node)
    return !(prev_node&.element? &&
             %w[tbody thead tfoot].include?(prev_node.name) &&
             omit_end_tag?(prev_node))
  end
  false
end
parent_contains_more_content?(node) click to toggle source
# File lib/squoosh.rb, line 461
def parent_contains_more_content?(node)
  !next_sibling_content_node(node).nil?
end
phrasing_content?(node) click to toggle source
# File lib/squoosh.rb, line 178
def phrasing_content?(node)
  name = node.name
  PHRASING_CONTENT.include?(name)
end
preserve_comment?(node) click to toggle source
# File lib/squoosh.rb, line 198
def preserve_comment?(node)
  content = node.content
  return true if content.start_with? '[if '
  return true if /\A\s*!/ =~ content

  # Support other retained comments?
  false
end
previous_sibling_content_node(node) click to toggle source
# File lib/squoosh.rb, line 428
def previous_sibling_content_node(node)
  while (node = node.previous_sibling)
    return node if content_node?(node)
  end
  nil
end
qualified_attribute_name(attr) click to toggle source
# File lib/squoosh.rb, line 390
def qualified_attribute_name(attr)
  ns = attr.namespace
  return attr.name if ns.nil?

  uri = ns.href
  if uri == Nokogiri::HTML5::XML_NAMESPACE
    "xml:#{attr.name}"
  elsif uri == Nokogiri::HTML5::XMLNS_NAMESPACE && attr.name == 'xmlns'
    'xmlns'
  elsif uri == Nokogiri::HTML5::XMLNS_NAMESPACE
    "xmlns:#{attr.name}"
  elsif uri == Nokogiri::HTML5::XLINK_NAMESPACE
    "xlink:#{attr.name}"
  else
    # :nocov:
    raise 'Unreachable!'
    # :nocov:
  end
end
raw_text_element?(node) click to toggle source
# File lib/squoosh.rb, line 140
def raw_text_element?(node)
  RAW_TEXT_ELEMENTS.include? node.name
end
remove_comments(doc) click to toggle source
# File lib/squoosh.rb, line 183
def remove_comments(doc)
  doc.xpath('//comment()').each do |node|
    next if preserve_comment?(node)

    prev_node = node.previous_sibling
    next_node = node.next_sibling
    node.unlink
    if prev_node&.text? && next_node&.text?
      prev_node.content += next_node.content
      next_node.unlink
    end
  end
  nil
end
self_closing?(node) click to toggle source
# File lib/squoosh.rb, line 410
def self_closing?(node)
  # If we're not omitting end tags, then don't mark foreign elements as
  # self closing.
  return false unless @options[:omit_tags]

  foreign_element?(node) && node.children.empty?
end
stringify_node(node) click to toggle source
# File lib/squoosh.rb, line 330
def stringify_node(node)
  return node.to_html(encoding: 'UTF-8') unless node.element?

  output = StringIO.new
  # Add start tag. 8.1.2.1
  unless omit_start_tag? node
    output << "<#{node.name}"

    # Add attributes. 8.1.2.3
    last_attr_unquoted = false
    node.attribute_nodes.each do |attr|
      name = qualified_attribute_name(attr)
      last_attr_unquoted = false
      # Make sure there are no character references.
      # XXX: We should be able to compress a bit more by leaving bare & in
      # some cases.
      # value = (attr.value || '')
      # value.gsub!(/&([a-zA-Z0-9]+;|#[0-9]+|#[xX][a-fA-F0-9]+)/, '&amp;\1')
      value = (attr.value || '').gsub('&', '&amp;')
      if value.empty?
        output << " #{name}"
      elsif /[\t\n\f\r "'`=<>]/ !~ value
        last_attr_unquoted = true
        output << " #{name}=#{value}"
      elsif !value.include?('"')
        output << " #{name}=\"#{value}\""
      elsif !value.include?("'")
        output << " #{name}='#{value}'"
      else
        # Contains both ' and ".
        output << " #{name}=\"#{value.gsub('"', '&#34;')}\""
      end
    end

    # Close start tag.
    if self_closing? node
      output << ' ' if last_attr_unquoted
      output << '/'
    end
    output << '>'
  end

  # If pre or textarea start with a newline, double it because the HTML
  # parser strips leading newlines.
  if (node.name == 'pre' || node.name == 'textarea') &&
     !node.children.empty?
    first_child = node.children[0]
    if first_child.text? && first_child.content.start_with?("\n")
      output << "\n"
    end
  end

  # Add content.
  output << node.children.map { |c| stringify_node c }.join

  # Add end tag. 8.1.2.2
  output << "</#{node.name}>" unless omit_end_tag? node
  output.string
end
text_node_removable?(node) click to toggle source

Be conservative. If an element can be phrasing content, assume it is.

# File lib/squoosh.rb, line 306
def text_node_removable?(node)
  return false unless inter_element_whitespace?(node)
  return false if phrasing_content?(node.parent)

  prev_elm = node.previous_element
  next_elm = node.next_element
  prev_elm.nil? || !phrasing_content?(prev_elm) ||
    next_elm.nil? || !phrasing_content?(next_elm)
end
trim_left?(node) click to toggle source
# File lib/squoosh.rb, line 316
def trim_left?(node)
  prev_elm = node.previous_element
  return !phrasing_content?(node.parent) if prev_elm.nil?

  prev_elm.name == 'br'
end
trim_right?(node) click to toggle source
# File lib/squoosh.rb, line 323
def trim_right?(node)
  next_elm = node.next_element
  return !phrasing_content?(node.parent) if next_elm.nil?

  next_elm.name == 'br'
end
uglify(content, options) click to toggle source

rubocop:enable Style/StringConcatenation

# File lib/squoosh.rb, line 240
def uglify(content, options)
  js = Uglifier.compile(content, options)
  js.chomp!(';')
  js
end
void_element?(node) click to toggle source
# File lib/squoosh.rb, line 136
def void_element?(node)
  VOID_ELEMENTS.include? node.name
end