module Oga::XML::Entities

Module for encoding/decoding XML and HTML entities. The mapping of HTML entities can be found in {Oga::HTML::Entities::DECODE_MAPPING}.

Constants

AMPERSAND

@return [String]

DECODE_MAPPING

Hash containing XML entities and the corresponding characters.

The ‘&` mapping must come last to ensure proper conversion of non encoded to encoded forms (see {Oga::XML::Text#to_xml}).

@return [Hash]

ENCODE_ATTRIBUTE_MAPPING

Hash containing characters and the corresponding XML entities to use when encoding XML/HTML attribute values.

@return [Hash]

ENCODE_ATTRIBUTE_REGEXP

@return [Regexp]

ENCODE_MAPPING

Hash containing characters and the corresponding XML entities.

@return [Hash]

ENCODE_REGEXP

@return [Regexp]

HEX_CODE_POINT_ENTITY

Regexp for matching XML/HTML hex entities such as “<”.

@return [Regexp]

NUMERIC_CODE_POINT_ENTITY

Regexp for matching XML/HTML numeric entities such as “&”.

@return [Regexp]

REGULAR_ENTITY

Regexp for matching XML/HTML entities such as “ ”.

@return [Regexp]

Public Class Methods

decode(input, mapping = DECODE_MAPPING) click to toggle source

Decodes XML entities.

@param [String] input @param [Hash] mapping @return [String]

# File lib/oga/xml/entities.rb, line 71
def self.decode(input, mapping = DECODE_MAPPING)
  return input unless input.include?(AMPERSAND)

  input = input.gsub(REGULAR_ENTITY, mapping)

  if input.include?(AMPERSAND)
    input = input.gsub(NUMERIC_CODE_POINT_ENTITY) do |found|
      pack_string($1, 10) || found
    end
  end

  if input.include?(AMPERSAND)
    input = input.gsub(HEX_CODE_POINT_ENTITY) do |found|
      pack_string($1, 16) || found
    end
  end

  input
end
encode(input, mapping = ENCODE_MAPPING) click to toggle source

Encodes special characters as XML entities.

@param [String] input @param [Hash] mapping @return [String]

# File lib/oga/xml/entities.rb, line 96
def self.encode(input, mapping = ENCODE_MAPPING)
  input.gsub(ENCODE_REGEXP, mapping)
end
encode_attribute(input) click to toggle source

Encodes special characters in an XML attribute value.

@param [String] input @return [String]

# File lib/oga/xml/entities.rb, line 104
def self.encode_attribute(input)
  input.gsub(ENCODE_ATTRIBUTE_REGEXP, ENCODE_ATTRIBUTE_MAPPING)
end

Private Class Methods

pack_string(input, base) click to toggle source

@param [String] input @param [Fixnum] base @return [String]

# File lib/oga/xml/entities.rb, line 113
def self.pack_string(input, base)
  packed = [Integer(input, base)].pack('U*')

  packed.valid_encoding? ? packed : nil
end