class Hermeneutics::URLText

URL-able representation

What's acually happening

URLs may not contain spaces and serveral character as slashes, ampersands etc. These characters will be masked by a percent sign and two hex digits representing the ASCII code. Eight bit characters should be masked the same way.

An URL line does not store encoding information by itself. A locator may either say one of these:

http://www.example.com/subdir/index.html?umlfield=%C3%BCber+alles
http://www.example.com/subdir/index.html?umlfield=%FCber+alles

The reading CGI has to decide on itself how to treat it.

Examples

URLText.encode "'Stop!' said Fred."     #=> "%27Stop%21%27+said+Fred."
URLText.decode "%27Stop%21%27+said+Fred%2e"
                                        #=> "'Stop!' said Fred."

Attributes

keep_8bit[RW]
keep_space[RW]
mask_space[RW]

Public Class Methods

decode( str) → str click to toggle source
decode( str, encoding) → str

Decode the contained string.

utx = URLText.new
utx.decode "%27Stop%21%27+said+Fred%2e"       #=> "'Stop!' said Fred."

The encoding will be kept. That means that an invalidly encoded string could be produced.

a = "bl%F6d"
a.encode! "utf-8"
d = utx.decode a
d =~ /./        #=> "invalid byte sequence in UTF-8 (ArgumentError)"
# File lib/hermeneutics/escape.rb, line 461
def decode str
  r = str.new_string
  r.tr! "+", " "
  r.gsub! /(?:%([0-9A-F]{2}))/i do $1.hex.chr end
  r.force_encoding str.encoding
  r
end
decode_hash( str) → hash click to toggle source
decode_hash( str) { |key,val| ... } → nil or int

Decode a URL-style encoded string to a Hash. In case a block is given, the number of key-value pairs is returned.

str = "a=%3B%3B%3B&x=%26auml%3B%26ouml%3B%26uuml%3B"
URLText.decode_hash str do |k,v|
  puts "#{k} = #{v}"
end

Output:

a = ;;;
x = äöü
# File lib/hermeneutics/escape.rb, line 486
def decode_hash qstr
  if block_given? then
    i = 0
    each_pair qstr do |k,v|
      yield k, v
      i += 1
    end
    i.nonzero?
  else
    Dict.create do |h|
      each_pair qstr do |k,v| h.parse k, v end
    end
  end
end
encode(str) click to toggle source
# File lib/hermeneutics/escape.rb, line 432
def encode str
  std.encode str
end
encode_hash(hash) click to toggle source
# File lib/hermeneutics/escape.rb, line 436
def encode_hash hash
  std.encode_hash hash
end
mkurl(path, hash, anchor = nil) click to toggle source
# File lib/hermeneutics/escape.rb, line 440
def mkurl path, hash, anchor = nil
  std.mkurl path, hash, anchor
end
new( hash) → urltext click to toggle source

Creates a URLText converter.

The parameters may be given as values or as a hash.

utx = URLText.new keep_8bit: true, keep_space: false

See the encode method for an explanation of these parameters.

# File lib/hermeneutics/escape.rb, line 270
def initialize keep_8bit: nil, keep_space: nil, mask_space: nil
  @keep_8bit  = keep_8bit
  @keep_space = keep_space
  @mask_space = mask_space
end
std() click to toggle source
# File lib/hermeneutics/escape.rb, line 428
def std
  @std ||= new
end

Private Class Methods

each_pair(qstr) { |*kv| ... } click to toggle source
# File lib/hermeneutics/escape.rb, line 503
def each_pair qstr
  qstr or return
  h = qstr.to_s.split PAIR_SEP
  h.each do |pair|
    kv = pair.split PAIR_SET, 2
    kv.map! { |x| decode x if x }
    yield *kv
  end
end

Public Instance Methods

decode(str) click to toggle source
# File lib/hermeneutics/escape.rb, line 418
def decode str
  self.class.decode str
end
decode_hash(qstr, &block) click to toggle source
# File lib/hermeneutics/escape.rb, line 422
def decode_hash qstr, &block
  self.class.decode_hash qstr, &block
end
encode( str) → str click to toggle source

Create a string that contains %XX-encoded bytes.

utx = URLText.new
utx.encode "'Stop!' said Fred."       #=> "%27Stop%21%27+said+Fred."

The result will not contain any 8-bit characters, except when keep_8bit is set. The result will be in the same encoding as the argument although this normally has no meaning.

utx = URLText.new keep_8bit: true
s = "< ä >".encode "UTF-8"
utx.encode s                    #=> "%3C+\u{e4}+%3E"  in UTF-8

s = "< ä >".encode "ISO-8859-1"
utx.encode s                    #=> "%3C+\xe4+%3E"      in ISO-8859-1

A space " " will not be replaced by a plus "+" if keep_space is set.

utx = URLText.new keep_space: true
s = "< x >"
utx.encode s                    #=> "%3C x %3E"

When mask_space is set, then a space will be represented as "%20",

# File lib/hermeneutics/escape.rb, line 305
def encode str
  r = str.new_string
  r.force_encoding Encoding::ASCII_8BIT unless @keep_8bit
  r.gsub! %r/([^a-zA-Z0-9_.-])/ do |c|
    if c == " " and not @mask_space then
      @keep_space ? c : "+"
    elsif not @keep_8bit or c.ascii_only? then
      "%%%02X" % c.ord
    else
      c
    end
  end
  r.encode! str.encoding
end
encode_hash( hash) → str click to toggle source

Encode a Hash to a URL-style string.

utx = URLText.new

h = { name: "John Doe", age: 42 }
utx.encode_hash h
    #=> "name=John+Doe&age=42"

h = { a: ";;;", x: "äöü" }
utx.encode_hash h
    #=> "a=%3B%3B%3B&x=%C3%A4%C3%B6%C3%BC"
# File lib/hermeneutics/escape.rb, line 385
def encode_hash hash
  hash.map { |(k,v)|
    case v
      when nil   then next
      when true  then v = k
      when false then v = ""
    end
    [k, v].map { |x| encode x.to_s }.join PAIR_SET
  }.compact.join PAIR_SEP
end
mkurl( path, hash, anchor = nil) → str click to toggle source

Make an URL.

utx = URLText.new
h = { name: "John Doe", age: "42" }
utx.encode_hash "myscript.rb", h, "chapter"
    #=> "myscript.rb?name=John+Doe&age=42#chapter"
# File lib/hermeneutics/escape.rb, line 406
def mkurl path, hash = nil, anchor = nil
  unless Hash === hash then
    hash, anchor = anchor, hash
  end
  r = "#{path}"
  r << "?#{encode_hash hash}" if hash
  r << "##{anchor}" if anchor
  r
end