class Hermeneutics::HeaderExt
Header field contents (RFC 2047) encoding
Examples¶ ↑
HeaderExt.encode "Jörg Müller" #=> "=?utf-8?Q?J=C3=B6rg_M=C3=BCller?=" HeaderExt.decode "=?UTF-8?Q?J=C3=B6rg_M=C3=BCller?=" #=> "Jörg Müller"
Public Class Methods
decode( str) → str
click to toggle source
Remove header field style escapes.
HeaderExt.decode "=?UTF-8?Q?J=C3=B6rg_M=C3=BCller?=" #=> "Jörg Müller"
# File lib/hermeneutics/escape.rb, line 731 def decode str r, e = [], [] v, l = nil, nil lexer str do |type,piece| case type when :decoded then e.push piece.encoding if l == :space and (v == :decoded or not v) then r.pop elsif l == :plain then r.push SPACE end when :space then nil when :plain then if l == :decoded then r.push SPACE end end r.push piece v, l = l, type end if l == :space and v == :decoded then r.pop end e.uniq! begin r.join rescue EncodingError raise if e.empty? f = e.shift r.each { |x| x.encode! f } retry end end
encode( str) → str
click to toggle source
Use the standard content encoding.
# File lib/hermeneutics/escape.rb, line 710 def encode str std.encode str end
encode_whole( str) → str
click to toggle source
Use the standard content encoding.
# File lib/hermeneutics/escape.rb, line 719 def encode_whole str std.encode_whole str end
lexer(str) { |:plain, $`| ... }
click to toggle source
# File lib/hermeneutics/escape.rb, line 767 def lexer str while str do str =~ /(\s+)|\B=\?(\S*?)\?([QB])\?(\S*?)\?=\B/i if $1 then yield :plain, $` unless $`.empty? yield :space, $& elsif $2 then yield :plain, $` unless $`.empty? d = unmask $2, $3, $4 yield :decoded, d else yield :plain, str end str = $'.notempty? end end
needs? str → true or false
click to toggle source
Use the standard content encoding.
# File lib/hermeneutics/escape.rb, line 701 def needs? str std.needs? str end
new( [ parameters] ) → con
click to toggle source
Creates a HeaderExt
converter.
See the encode
method for an explanation of the parameters.
Examples¶ ↑
con = HeaderExt.new con = HeaderExt.new base64: true, limit: 32, lower: true con = HeaderExt.new mask: /["'()]/
# File lib/hermeneutics/escape.rb, line 541 def initialize params = nil if params then @base64 = params.delete :base64 @limit = params.delete :limit @lower = params.delete :lower @mask = params.delete :mask params.empty? or raise ArgumentError, "invalid parameter: #{params.keys.first}." end end
std()
click to toggle source
The standard header content encoding has a word break limit of 64.
# File lib/hermeneutics/escape.rb, line 692 def std @std ||= new limit: 64 end
Private Class Methods
unmask(cs, tp, txt)
click to toggle source
# File lib/hermeneutics/escape.rb, line 786 def unmask cs, tp, txt case tp.upcase when "B" then txt, = txt.unpack "m*" when "Q" then txt.tr! "_", " " ; txt, = txt.unpack "M*" end cs.slice! /\*\w+\z/ # language as in rfc2231, 5. case cs when /\Autf-?7\z/i then # Ruby doesn't seem to do that. txt.force_encoding Encoding::US_ASCII txt.gsub! /\+([0-9a-zA-Z+\/]*)-?/ do if $1.empty? then "+" else s = ("#$1==".unpack "m*").join (s.unpack "S>*").map { |x| x.chr Encoding::UTF_8 }.join end end txt.force_encoding Encoding::UTF_8 when /\Aunknown/i then txt.force_encoding Encoding::US_ASCII else txt.force_encoding cs end txt end
Public Instance Methods
decode(str)
click to toggle source
# File lib/hermeneutics/escape.rb, line 684 def decode str self.class.decode str end
encode( str) → str
click to toggle source
Create a header field style encoded string. The following parameters will be evaluated:
:base64 # build ?B? instead of ?Q? :limit # break words longer than this :lower # build lower case ?b? and ?q? :mask # a regular expression detecting characters to mask
The result will not contain any 8-bit characters. The encoding will be kept although it won't have a meaning.
The parameter :mask
will have no influence on the masking itself but will guarantee characters to be masked.
Examples¶ ↑
yodel = "Holleri du dödl di, diri diri dudl dö." con = HeaderExt.new con.encode yodel #=> "Holleri du =?UTF-8?Q?d=C3=B6dl?= di, diri diri dudl =?UTF-8?Q?d=C3=B6=2E?=" yodel.encode! "iso8859-1" con.encode yodel #=> "Holleri du =?ISO8859-1?Q?d=F6dl?= di, diri diri dudl =?ISO8859-1?Q?d=F6=2E?=" e = "€" e.encode! "utf-8" ; con.encode e #=> "=?UTF-8?Q?=E2=82=AC?=" e.encode! "iso8859-15" ; con.encode e #=> "=?ISO8859-15?Q?=A4?=" e.encode! "ms-ansi" ; con.encode e #=> "=?MS-ANSI?Q?=80?=" con = HeaderExt.new mask: /["'()]/ con.encode "'Stop!' said Fred." #=> "=?UTF-8?Q?=27Stop=21=27?= said Fred."
# File lib/hermeneutics/escape.rb, line 599 def encode str do_encoding str do # I don't like this kind of programming style but it seems to work. BS r, enc = "", "" while str =~ /\S+/ do if needs? $& then (enc.notempty? || r) << $` enc << $& else if not enc.empty? then r << (mask enc) enc.clear end r << $` << $& end str = $' end if not enc.empty? then enc << str r << (mask enc) else r << str end r end end
encode_whole( str) → str
click to toggle source
The unlike encode
the whole string as one piece will be encoded.
yodel = "Holleri du dödl di, diri diri dudl dö." HeaderExt.encode_whole yodel #=> "=?UTF-8?Q?Holleri_du_d=C3=B6dl_di,_diri_diri_dudl_d=C3=B6=2E?="
# File lib/hermeneutics/escape.rb, line 635 def encode_whole str do_encoding str do mask str end end
needs? str → true or false
click to toggle source
Check whether a string needs encoding.
# File lib/hermeneutics/escape.rb, line 557 def needs? str (not str.ascii_only? or str =~ @mask) and true or false end
Private Instance Methods
base64(c)
click to toggle source
# File lib/hermeneutics/escape.rb, line 669 def base64 c c = [c].pack "m*" c.gsub! /\s/, "" c end
do_encoding(str) { || ... }
click to toggle source
# File lib/hermeneutics/escape.rb, line 643 def do_encoding str @charset = str.encoding @type, @encoder = @base64 ? [ "B", :base64] : [ "Q", :quopri ] if @lower then @charset.downcase! @type.downcase! end yield.force_encoding str.encoding ensure @charset = @type = @encoder = nil end
mask(str)
click to toggle source
# File lib/hermeneutics/escape.rb, line 659 def mask str r, i = [], 0 while i < str.length do l = @limit||str.length r.push "=?#@charset?#@type?#{send @encoder, str[ i, l]}?=" i += l end r.join SPACE end
quopri(c)
click to toggle source
# File lib/hermeneutics/escape.rb, line 675 def quopri c c.force_encoding Encoding::ASCII_8BIT c.gsub! /([^ a-zA-Z0-9])/ do |s| "=%02X" % s.ord end c.tr! " ", "_" c end