module RIMS::RFC822::CharsetText

Public Class Methods

decode_mime_encoded_words(encoded_string, decode_charset=nil, charset_aliases: DEFAULT_CHARSET_ALIASES, charset_convert_options: {}) click to toggle source
# File lib/rims/rfc822.rb, line 404
def decode_mime_encoded_words(encoded_string, decode_charset=nil, charset_aliases: DEFAULT_CHARSET_ALIASES, charset_convert_options: {})
  src = encoded_string
  dst = ''.dup
  charset_convert_options ||= {} # for backward compatibility

  if (decode_charset) then
    if (decode_charset.is_a? Encoding) then
      decode_charset_encoding = decode_charset
    else
      decode_charset_encoding = charset_aliases[decode_charset] ||
                                Encoding.find(decode_charset) # raise `ArgumentError' when wrong charset due to library user
    end
    dst.force_encoding(decode_charset_encoding)
  else
    dst.force_encoding(encoded_string.encoding)
  end

  while (src =~ %r{
           =\? [^\s?]+ \? [BQ] \? [^\s?]+ \?=
           (?:
             \s+
             =\? [^\s?]+ \? [BQ] \? [^\s?]+ \?=
           )*
         }ix)

    src = $'
    foreword = $`
    encoded_word_list = $&.split(/\s+/, -1)

    unless (foreword.empty?) then
      if (Encoding.compatible? dst, foreword) then
        foreword.encode!(dst.encoding, **charset_convert_options)
      end
      dst << foreword
    end

    for encoded_word in encoded_word_list
      _, charset, encoding, encoded_text, _ = encoded_word.split('?', 5)
      encoding.upcase!
      encoded_text.tr!('_', ' ') if (encoding == 'Q')
      transfer_encoding = ENCODED_WORD_TRANSFER_ENCODING_TABLE[encoding] or raise "internal error - unknown encoding: #{encoding}"
      decoded_text = get_mime_charset_text(encoded_text, charset, transfer_encoding, charset_aliases: charset_aliases)

      if (decode_charset_encoding) then
        if (decoded_text.encoding != decode_charset_encoding) then
          # `decoded_text' is frozen
          decoded_text = decoded_text.encode(decode_charset_encoding, **charset_convert_options)
        end
      end

      unless (Encoding.compatible? dst, decoded_text) then
        if (dst.ascii_only?) then
          dst.encode!(decoded_text.encoding, **charset_convert_options)
        else
          # `decoded_text' is frozen
          decoded_text = decoded_text.encode(dst.encoding, **charset_convert_options)
        end
      end

      dst << decoded_text
    end
  end

  unless (src.empty?) then
    unless (Encoding.compatible? dst, src) then
      src = src.encode(dst.encoding, **charset_convert_options) # `src' may be frozen
    end
    dst << src
  end

  dst.freeze
end
find_string_encoding(name) click to toggle source
# File lib/rims/rfc822.rb, line 366
def self.find_string_encoding(name)
  begin
    Encoding.find(name)
  rescue ArgumentError
    raise EncodingError.new($!.to_s)
  end
end
get_mime_charset_text(binary_string, charset, transfer_encoding=nil, charset_aliases: DEFAULT_CHARSET_ALIASES) click to toggle source
# File lib/rims/rfc822.rb, line 374
def get_mime_charset_text(binary_string, charset, transfer_encoding=nil, charset_aliases: DEFAULT_CHARSET_ALIASES)
  case (transfer_encoding&.upcase)
  when 'BASE64'
    text = binary_string.unpack1('m')
  when 'QUOTED-PRINTABLE'
    text = binary_string.unpack1('M')
  else
    text = binary_string.dup
  end

  if (charset) then
    if (charset.is_a? Encoding) then
      enc = charset
    else
      enc = charset_aliases[charset] ||
            CharsetText.find_string_encoding(charset) # raise `EncodingError' when wrong charset due to document
    end
    text.force_encoding(enc)
    text.valid_encoding? or raise EncodingError, "invalid encoding - #{enc}"
  end

  text.freeze
end

Private Instance Methods

decode_mime_encoded_words(encoded_string, decode_charset=nil, charset_aliases: DEFAULT_CHARSET_ALIASES, charset_convert_options: {}) click to toggle source
# File lib/rims/rfc822.rb, line 404
def decode_mime_encoded_words(encoded_string, decode_charset=nil, charset_aliases: DEFAULT_CHARSET_ALIASES, charset_convert_options: {})
  src = encoded_string
  dst = ''.dup
  charset_convert_options ||= {} # for backward compatibility

  if (decode_charset) then
    if (decode_charset.is_a? Encoding) then
      decode_charset_encoding = decode_charset
    else
      decode_charset_encoding = charset_aliases[decode_charset] ||
                                Encoding.find(decode_charset) # raise `ArgumentError' when wrong charset due to library user
    end
    dst.force_encoding(decode_charset_encoding)
  else
    dst.force_encoding(encoded_string.encoding)
  end

  while (src =~ %r{
           =\? [^\s?]+ \? [BQ] \? [^\s?]+ \?=
           (?:
             \s+
             =\? [^\s?]+ \? [BQ] \? [^\s?]+ \?=
           )*
         }ix)

    src = $'
    foreword = $`
    encoded_word_list = $&.split(/\s+/, -1)

    unless (foreword.empty?) then
      if (Encoding.compatible? dst, foreword) then
        foreword.encode!(dst.encoding, **charset_convert_options)
      end
      dst << foreword
    end

    for encoded_word in encoded_word_list
      _, charset, encoding, encoded_text, _ = encoded_word.split('?', 5)
      encoding.upcase!
      encoded_text.tr!('_', ' ') if (encoding == 'Q')
      transfer_encoding = ENCODED_WORD_TRANSFER_ENCODING_TABLE[encoding] or raise "internal error - unknown encoding: #{encoding}"
      decoded_text = get_mime_charset_text(encoded_text, charset, transfer_encoding, charset_aliases: charset_aliases)

      if (decode_charset_encoding) then
        if (decoded_text.encoding != decode_charset_encoding) then
          # `decoded_text' is frozen
          decoded_text = decoded_text.encode(decode_charset_encoding, **charset_convert_options)
        end
      end

      unless (Encoding.compatible? dst, decoded_text) then
        if (dst.ascii_only?) then
          dst.encode!(decoded_text.encoding, **charset_convert_options)
        else
          # `decoded_text' is frozen
          decoded_text = decoded_text.encode(dst.encoding, **charset_convert_options)
        end
      end

      dst << decoded_text
    end
  end

  unless (src.empty?) then
    unless (Encoding.compatible? dst, src) then
      src = src.encode(dst.encoding, **charset_convert_options) # `src' may be frozen
    end
    dst << src
  end

  dst.freeze
end
get_mime_charset_text(binary_string, charset, transfer_encoding=nil, charset_aliases: DEFAULT_CHARSET_ALIASES) click to toggle source
# File lib/rims/rfc822.rb, line 374
def get_mime_charset_text(binary_string, charset, transfer_encoding=nil, charset_aliases: DEFAULT_CHARSET_ALIASES)
  case (transfer_encoding&.upcase)
  when 'BASE64'
    text = binary_string.unpack1('m')
  when 'QUOTED-PRINTABLE'
    text = binary_string.unpack1('M')
  else
    text = binary_string.dup
  end

  if (charset) then
    if (charset.is_a? Encoding) then
      enc = charset
    else
      enc = charset_aliases[charset] ||
            CharsetText.find_string_encoding(charset) # raise `EncodingError' when wrong charset due to document
    end
    text.force_encoding(enc)
    text.valid_encoding? or raise EncodingError, "invalid encoding - #{enc}"
  end

  text.freeze
end