module RipperRubyParser::Unescape

Implements string unescaping

@api private

Constants

DELIMITER_PAIRS
ESCAPE_SEQUENCE_REGEXP
SINGLE_LETTER_ESCAPES
SINGLE_LETTER_ESCAPES_REGEXP

Public Instance Methods

fix_encoding(string) click to toggle source
# File lib/ripper_ruby_parser/unescape.rb, line 94
def fix_encoding(string)
  unless string.encoding == Encoding::UTF_8
    dup = string.dup.force_encoding Encoding::UTF_8
    return dup if dup.valid_encoding?
  end
  string
end
simple_unescape(string, delimiter) click to toggle source
# File lib/ripper_ruby_parser/unescape.rb, line 47
def simple_unescape(string, delimiter)
  delimiters = delimiter_regexp_pattern(delimiter)
  string.gsub(/
              \\ # a backslash
              (  # followed by a
                #{delimiters} | # delimiter or
                \\              # backslash
              )/x) do
                Regexp.last_match[1]
              end
end
simple_unescape_wordlist_word(string, delimiter) click to toggle source
# File lib/ripper_ruby_parser/unescape.rb, line 59
def simple_unescape_wordlist_word(string, delimiter)
  delimiters = delimiter_regexp_pattern(delimiter)
  string.gsub(/
              \\ # a backslash
              (  # followed by a
                #{delimiters} | # delimiter or
                \\            | # backslash or
                [ ]           | # space or
                \n              # newline
              )
              /x) do
                Regexp.last_match[1]
              end
end
unescape(string) click to toggle source
# File lib/ripper_ruby_parser/unescape.rb, line 74
def unescape(string)
  string = string.dup if string.frozen?
  string.force_encoding("ASCII-8BIT")
  string.gsub(ESCAPE_SEQUENCE_REGEXP) do
    bare = Regexp.last_match[1]
    if bare == "\n"
      ""
    else
      unescaped_value(bare).force_encoding("ASCII-8BIT")
    end
  end
end
unescape_regexp(string) click to toggle source
# File lib/ripper_ruby_parser/unescape.rb, line 102
def unescape_regexp(string)
  string.gsub(/\\(\n|\\)/) do
    bare = Regexp.last_match[1]
    case bare
    when "\n"
      ""
    else
      "\\\\"
    end
  end
end
unescape_wordlist_word(string) click to toggle source
# File lib/ripper_ruby_parser/unescape.rb, line 87
def unescape_wordlist_word(string)
  string.gsub(ESCAPE_SEQUENCE_REGEXP) do
    bare = Regexp.last_match[1]
    unescaped_value(bare)
  end
end

Private Instance Methods

control(val) click to toggle source
# File lib/ripper_ruby_parser/unescape.rb, line 171
def control(val)
  val & 0b1001_1111
end
delimiter_regexp_pattern(delimiter) click to toggle source
# File lib/ripper_ruby_parser/unescape.rb, line 179
def delimiter_regexp_pattern(delimiter)
  delimiter = delimiter[-1]
  delimiters = DELIMITER_PAIRS.fetch(delimiter, delimiter)
  delimiters.each_char.map { |it| Regexp.escape it }.join(" | ")
end
hex_to_char(str) click to toggle source
# File lib/ripper_ruby_parser/unescape.rb, line 167
def hex_to_char(str)
  str.to_i(16).chr
end
hex_to_unicode_char(str) click to toggle source
# File lib/ripper_ruby_parser/unescape.rb, line 163
def hex_to_unicode_char(str)
  str.to_i(16).chr(Encoding::UTF_8)
end
meta(val) click to toggle source
# File lib/ripper_ruby_parser/unescape.rb, line 175
def meta(val)
  val | 0b1000_0000
end
unescape_hex_char(bare) click to toggle source
# File lib/ripper_ruby_parser/unescape.rb, line 133
def unescape_hex_char(bare)
  hex_to_char(bare[1..-1])
end
unescape_meta_control(bare) click to toggle source
# File lib/ripper_ruby_parser/unescape.rb, line 146
def unescape_meta_control(bare)
  base_value = bare[-1].ord
  value = case bare
          when /^(c|C-).$/
            control(base_value)
          when /^M-.$/
            meta(base_value)
          when /^(M-\\C-|C-\\M-|M-\\c|c\\M-).$/
            meta(control(base_value))
          end
  value.chr
end
unescape_octal(bare) click to toggle source
# File lib/ripper_ruby_parser/unescape.rb, line 159
def unescape_octal(bare)
  bare.to_i(8).chr
end
unescape_unicode_char(bare) click to toggle source
# File lib/ripper_ruby_parser/unescape.rb, line 137
def unescape_unicode_char(bare)
  hex_chars = if bare.start_with? "u{"
                bare[2..-2]
              else
                bare[1..4]
              end
  hex_to_unicode_char(hex_chars)
end
unescaped_value(bare) click to toggle source
# File lib/ripper_ruby_parser/unescape.rb, line 116
def unescaped_value(bare)
  case bare
  when SINGLE_LETTER_ESCAPES_REGEXP
    SINGLE_LETTER_ESCAPES[bare].dup
  when /^x/
    unescape_hex_char bare
  when /^u/
    unescape_unicode_char bare
  when /^(c|C-|M-|M-\\C-|C-\\M-|M-\\c|c\\M-).$/
    unescape_meta_control bare
  when /^[0-7]+/
    unescape_octal bare
  else
    bare
  end
end