module Rfc2047
Copyright © 2020 Jian Weihang <tonytonyjan@gmail.com>
Constants
- ENCODED_TEXT
- ENCODED_WORD
- ENCODED_WORD_SEQUENCE
- TOKEN
Public Class Methods
decode(input)
click to toggle source
example
Rfc2047.decode '=?UTF-8?B?5Yu/5Lul5oOh5bCP6ICM54K65LmL77yM5Yu/5Lul5ZaE5bCP6ICM5LiN54K6?= =?UTF-8?B?44CC?=' # => "勿以惡小而為之,勿以善小而不為。"
# File lib/rfc_2047.rb, line 36 def decode(input) return input unless input.match?(ENCODED_WORD) input.gsub(ENCODED_WORD_SEQUENCE) do |match| result = +'' match.scan(ENCODED_WORD) { result << decode_word($&) } if result.encoding == Encoding::UTF_7 result.replace( decode_utf7(result.force_encoding(Encoding::BINARY)) ).force_encoding(Encoding::UTF_8) else result.encode!(Encoding::UTF_8) end result end end
encode(input, encoding: :B)
click to toggle source
example:
Rfc2047.encode('己所不欲,勿施於人。') # => "=?UTF-8?B?5bex5omA5LiN5qyy77yM5Yu/5pa95pa85Lq644CC?="
# File lib/rfc_2047.rb, line 15 def encode(input, encoding: :B) return input if input.ascii_only? case encoding when :B size = 45 chunks = Array.new(((input.bytesize + size - 1) / size)) { input.byteslice(_1 * size, size) } chunks.map! { "=?#{input.encoding}?B?#{[_1].pack('m0')}?=" }.join(' ') when :Q [input] .pack('M').each_line .map { "=?#{input.encoding}?Q?#{_1.chomp!.gsub(' ', '_')}?=" } .join(' ') else raise ":encoding should be either :B or :Q, got #{encoding}" end end
Private Class Methods
decode_utf7(s)
click to toggle source
from Net::IMAP
# File lib/rfc_2047.rb, line 87 def decode_utf7(s) s.gsub(/&([^-]+)?-/n) do if Regexp.last_match(1) (Regexp.last_match(1).tr(',', '/') + '===').unpack1('m').encode(Encoding::UTF_8, Encoding::UTF_16BE) else '&' end end end
decode_word(input)
click to toggle source
# File lib/rfc_2047.rb, line 55 def decode_word(input) match_data = ENCODED_WORD.match(input) raise ArgumentError if match_data.nil? charset, encoding, encoded_text = match_data.captures charset = 'CP950' if charset == 'MS950' decoded = case encoding when 'Q', 'q' then encoded_text.gsub('_', '=20').unpack1('M') when 'B', 'b' then encoded_text.unpack1('m') end found_encoding = find_encoding(charset) found_encoding = Encoding::UTF_8 if found_encoding == Encoding::ASCII_8BIT decoded.force_encoding(found_encoding) end
find_encoding(charset)
click to toggle source
# File lib/rfc_2047.rb, line 72 def find_encoding(charset) case charset.downcase when 'utf-16' then Encoding::UTF_16BE when 'utf-32' then Encoding::UTF_32BE when 'ks_c_5601-1987' then Encoding::CP949 when 'shift-jis' then Encoding::Shift_JIS when 'gb2312' then Encoding::GB18030 when 'ms950' then Encoding::CP950 when '8bit' then Encoding::ASCII_8BIT when 'latin2' then Encoding::ISO_8859_2 else Encoding.find(charset) end end