module RIMS::RFC822::CharsetText
Public Class Methods
decode_mime_encoded_words(encoded_string, decode_charset=nil, charset_aliases: DEFAULT_CHARSET_ALIASES, charset_convert_options: {})
click to toggle source
# File lib/rims/rfc822.rb, line 404 def decode_mime_encoded_words(encoded_string, decode_charset=nil, charset_aliases: DEFAULT_CHARSET_ALIASES, charset_convert_options: {}) src = encoded_string dst = ''.dup charset_convert_options ||= {} # for backward compatibility if (decode_charset) then if (decode_charset.is_a? Encoding) then decode_charset_encoding = decode_charset else decode_charset_encoding = charset_aliases[decode_charset] || Encoding.find(decode_charset) # raise `ArgumentError' when wrong charset due to library user end dst.force_encoding(decode_charset_encoding) else dst.force_encoding(encoded_string.encoding) end while (src =~ %r{ =\? [^\s?]+ \? [BQ] \? [^\s?]+ \?= (?: \s+ =\? [^\s?]+ \? [BQ] \? [^\s?]+ \?= )* }ix) src = $' foreword = $` encoded_word_list = $&.split(/\s+/, -1) unless (foreword.empty?) then if (Encoding.compatible? dst, foreword) then foreword.encode!(dst.encoding, **charset_convert_options) end dst << foreword end for encoded_word in encoded_word_list _, charset, encoding, encoded_text, _ = encoded_word.split('?', 5) encoding.upcase! encoded_text.tr!('_', ' ') if (encoding == 'Q') transfer_encoding = ENCODED_WORD_TRANSFER_ENCODING_TABLE[encoding] or raise "internal error - unknown encoding: #{encoding}" decoded_text = get_mime_charset_text(encoded_text, charset, transfer_encoding, charset_aliases: charset_aliases) if (decode_charset_encoding) then if (decoded_text.encoding != decode_charset_encoding) then # `decoded_text' is frozen decoded_text = decoded_text.encode(decode_charset_encoding, **charset_convert_options) end end unless (Encoding.compatible? dst, decoded_text) then if (dst.ascii_only?) then dst.encode!(decoded_text.encoding, **charset_convert_options) else # `decoded_text' is frozen decoded_text = decoded_text.encode(dst.encoding, **charset_convert_options) end end dst << decoded_text end end unless (src.empty?) then unless (Encoding.compatible? dst, src) then src = src.encode(dst.encoding, **charset_convert_options) # `src' may be frozen end dst << src end dst.freeze end
find_string_encoding(name)
click to toggle source
# File lib/rims/rfc822.rb, line 366 def self.find_string_encoding(name) begin Encoding.find(name) rescue ArgumentError raise EncodingError.new($!.to_s) end end
get_mime_charset_text(binary_string, charset, transfer_encoding=nil, charset_aliases: DEFAULT_CHARSET_ALIASES)
click to toggle source
# File lib/rims/rfc822.rb, line 374 def get_mime_charset_text(binary_string, charset, transfer_encoding=nil, charset_aliases: DEFAULT_CHARSET_ALIASES) case (transfer_encoding&.upcase) when 'BASE64' text = binary_string.unpack1('m') when 'QUOTED-PRINTABLE' text = binary_string.unpack1('M') else text = binary_string.dup end if (charset) then if (charset.is_a? Encoding) then enc = charset else enc = charset_aliases[charset] || CharsetText.find_string_encoding(charset) # raise `EncodingError' when wrong charset due to document end text.force_encoding(enc) text.valid_encoding? or raise EncodingError, "invalid encoding - #{enc}" end text.freeze end
Private Instance Methods
decode_mime_encoded_words(encoded_string, decode_charset=nil, charset_aliases: DEFAULT_CHARSET_ALIASES, charset_convert_options: {})
click to toggle source
# File lib/rims/rfc822.rb, line 404 def decode_mime_encoded_words(encoded_string, decode_charset=nil, charset_aliases: DEFAULT_CHARSET_ALIASES, charset_convert_options: {}) src = encoded_string dst = ''.dup charset_convert_options ||= {} # for backward compatibility if (decode_charset) then if (decode_charset.is_a? Encoding) then decode_charset_encoding = decode_charset else decode_charset_encoding = charset_aliases[decode_charset] || Encoding.find(decode_charset) # raise `ArgumentError' when wrong charset due to library user end dst.force_encoding(decode_charset_encoding) else dst.force_encoding(encoded_string.encoding) end while (src =~ %r{ =\? [^\s?]+ \? [BQ] \? [^\s?]+ \?= (?: \s+ =\? [^\s?]+ \? [BQ] \? [^\s?]+ \?= )* }ix) src = $' foreword = $` encoded_word_list = $&.split(/\s+/, -1) unless (foreword.empty?) then if (Encoding.compatible? dst, foreword) then foreword.encode!(dst.encoding, **charset_convert_options) end dst << foreword end for encoded_word in encoded_word_list _, charset, encoding, encoded_text, _ = encoded_word.split('?', 5) encoding.upcase! encoded_text.tr!('_', ' ') if (encoding == 'Q') transfer_encoding = ENCODED_WORD_TRANSFER_ENCODING_TABLE[encoding] or raise "internal error - unknown encoding: #{encoding}" decoded_text = get_mime_charset_text(encoded_text, charset, transfer_encoding, charset_aliases: charset_aliases) if (decode_charset_encoding) then if (decoded_text.encoding != decode_charset_encoding) then # `decoded_text' is frozen decoded_text = decoded_text.encode(decode_charset_encoding, **charset_convert_options) end end unless (Encoding.compatible? dst, decoded_text) then if (dst.ascii_only?) then dst.encode!(decoded_text.encoding, **charset_convert_options) else # `decoded_text' is frozen decoded_text = decoded_text.encode(dst.encoding, **charset_convert_options) end end dst << decoded_text end end unless (src.empty?) then unless (Encoding.compatible? dst, src) then src = src.encode(dst.encoding, **charset_convert_options) # `src' may be frozen end dst << src end dst.freeze end
get_mime_charset_text(binary_string, charset, transfer_encoding=nil, charset_aliases: DEFAULT_CHARSET_ALIASES)
click to toggle source
# File lib/rims/rfc822.rb, line 374 def get_mime_charset_text(binary_string, charset, transfer_encoding=nil, charset_aliases: DEFAULT_CHARSET_ALIASES) case (transfer_encoding&.upcase) when 'BASE64' text = binary_string.unpack1('m') when 'QUOTED-PRINTABLE' text = binary_string.unpack1('M') else text = binary_string.dup end if (charset) then if (charset.is_a? Encoding) then enc = charset else enc = charset_aliases[charset] || CharsetText.find_string_encoding(charset) # raise `EncodingError' when wrong charset due to document end text.force_encoding(enc) text.valid_encoding? or raise EncodingError, "invalid encoding - #{enc}" end text.freeze end