module EBNF::Unescape

Unsecape strings

Constants

ECHAR
ESCAPE_CHAR4
ESCAPE_CHAR8
ESCAPE_CHARS
UCHAR

Public Class Methods

unescape(string) click to toggle source

Perform string and codepoint unescaping if defined for this terminal @param [String] string @return [String]

# File lib/ebnf/unescape.rb, line 58
def unescape(string)
  unescape_string(unescape_codepoints(string))
end
unescape_codepoints(string) click to toggle source

Returns a copy of the given ‘input` string with all `uXXXX` and `UXXXXXXXX` Unicode codepoint escape sequences replaced with their unescaped UTF-8 character counterparts.

@param [String] string @return [String] @see www.w3.org/TR/rdf-sparql-query/#codepointEscape

# File lib/ebnf/unescape.rb, line 27
def unescape_codepoints(string)
  string = string.dup
  string.force_encoding(Encoding::ASCII_8BIT) if string.respond_to?(:force_encoding)

  # Decode \uXXXX and \UXXXXXXXX code points:
  string = string.gsub(UCHAR) do |c|
    s = [(c[2..-1]).hex].pack('U*')
    s.respond_to?(:force_encoding) ? s.force_encoding(Encoding::ASCII_8BIT) : s
  end

  string.force_encoding(Encoding::UTF_8) if string.respond_to?(:force_encoding) 
  string
end
unescape_string(input) click to toggle source

Returns a copy of the given ‘input` string with all string escape sequences (e.g. `n` and `t`) replaced with their unescaped UTF-8 character counterparts.

@param [String] input @return [String] @see www.w3.org/TR/rdf-sparql-query/#grammarEscapes

# File lib/ebnf/unescape.rb, line 50
def unescape_string(input)
  input.gsub(ECHAR) { |escaped| ESCAPE_CHARS[escaped] || escaped[1..-1]}
end

Private Instance Methods

unescape(string) click to toggle source

Perform string and codepoint unescaping if defined for this terminal @param [String] string @return [String]

# File lib/ebnf/unescape.rb, line 58
def unescape(string)
  unescape_string(unescape_codepoints(string))
end
unescape_codepoints(string) click to toggle source

Returns a copy of the given ‘input` string with all `uXXXX` and `UXXXXXXXX` Unicode codepoint escape sequences replaced with their unescaped UTF-8 character counterparts.

@param [String] string @return [String] @see www.w3.org/TR/rdf-sparql-query/#codepointEscape

# File lib/ebnf/unescape.rb, line 27
def unescape_codepoints(string)
  string = string.dup
  string.force_encoding(Encoding::ASCII_8BIT) if string.respond_to?(:force_encoding)

  # Decode \uXXXX and \UXXXXXXXX code points:
  string = string.gsub(UCHAR) do |c|
    s = [(c[2..-1]).hex].pack('U*')
    s.respond_to?(:force_encoding) ? s.force_encoding(Encoding::ASCII_8BIT) : s
  end

  string.force_encoding(Encoding::UTF_8) if string.respond_to?(:force_encoding) 
  string
end
unescape_string(input) click to toggle source

Returns a copy of the given ‘input` string with all string escape sequences (e.g. `n` and `t`) replaced with their unescaped UTF-8 character counterparts.

@param [String] input @return [String] @see www.w3.org/TR/rdf-sparql-query/#grammarEscapes

# File lib/ebnf/unescape.rb, line 50
def unescape_string(input)
  input.gsub(ECHAR) { |escaped| ESCAPE_CHARS[escaped] || escaped[1..-1]}
end