class Escape_Escape_Escape

Constants

BLANK
CODER
CONFIG
ENCODING_OPTIONS_CLEAN_UTF8
HTML_TAB
INVALID_FILE_NAME_CHARS
Invalid
Invalid_HREF
Invalid_Relative_HREF
Invalid_Type
MULTI_CONTROL_AND_UNPRINTABLE

MULTI_CONTROL_CHARS: ==================================

Unicode whitespaces, like 160 codepoint, tabs, etc. Excludes newline.

Examples:

\r\n \r\n -> \n \n

NOTE: Don’t use “x20” because that is the space character.

Whitespace regex ([:space:]) from:

http://www.rubyinside.com/the-split-is-not-enough-whitespace-shenigans-for-rubyists-5980.html
NL
REGEX_UNSUITABLE_CHARS

From sanitize gem:

https://raw.githubusercontent.com/rgrove/sanitize/master/lib/sanitize.rb
REPEATING_DOTS
SPACE
SPACES
TAB
TABS
TAG_PATTERN
TWO_SPACES
VALID_CSS_ATTR
VALID_CSS_CLASS_NAME
VALID_CSS_SELECTOR
VALID_CSS_VALUE
VALID_CSS_WIDTH
VALID_HTML_ID
VALID_HTML_TAG

Public Class Methods

clean_utf8(raw_s, *opts) click to toggle source
  • normalized to :KC

  • “rn” changed to “n”

  • all control characters stripped except for “n”

and end. Normalization, then strip:

http://msdn.microsoft.com/en-us/library/dd374126(v=vs.85).aspx
http://www.unicode.org/faq/normalization.html

Getting rid of non-ascii characters in ruby: stackoverflow.com/questions/1268289/how-to-get-rid-of-non-ascii-characters-in-ruby

Test: [160, 160,64, 116, 119, 101, 108, 108, 121, 109, 101, 160, 102, 105, 108, 109]. inject(”, :<<)

Options:

:tabs
:spaces
# File lib/escape_escape_escape.rb, line 148
def clean_utf8 raw_s, *opts

  fail("Not a string: #{raw_s.inspect}") unless raw_s.is_a?(String)

  # === Check options. ==================================================================
  @plaintext_allowed_options ||= [ :spaces, :tabs ]
  invalid_opts = opts - @plaintext_allowed_options
  fail(ArgumentError, "INVALID OPTION: #{invalid_opts.inspect}" ) if !invalid_opts.empty?
  # =====================================================================================

  raw_s = raw_s.dup

  # === Save tabs if requested.
  raw_s.gsub!(TAB, HTML_TAB) if opts.include?(:tabs)

  raw_s.encode!(Encoding.find('utf-8') , ENCODING_OPTIONS_CLEAN_UTF8)
  raw_s.scrub!
  raw_s.gsub!(TAB                           , TWO_SPACES)
  raw_s.gsub!(MULTI_CONTROL_AND_UNPRINTABLE , BLANK)
  raw_s.gsub!(REGEX_UNSUITABLE_CHARS        , ' ')

  clean = raw_s.to_nfkc

  # Save whitespace or strip.
  if !opts.include?(:spaces)
    clean.strip!
  end

  # Put back tabs by request.
  if opts.include?(:tabs)
    clean.gsub!(HTML_TAB, TAB)
  end

  clean
end
regexp(str) click to toggle source
# File lib/escape_escape_escape.rb, line 115
def regexp str
  @regexp_opts ||= Regexp::FIXEDENCODING | Regexp::IGNORECASE
  Regexp.new(clean_utf8(str), @regexp_opts)
end
validate_timezone(timezone) click to toggle source

Raises: TZInfo::InvalidTimezoneIdentifier.

# File lib/escape_escape_escape.rb, line 123
def validate_timezone(timezone)
  TZInfo::Timezone.get( timezone.to_s.strip ).identifier
end

Public Instance Methods

css(*args) click to toggle source
# File lib/escape_escape_escape.rb, line 243
def css *args
  case
  when args.size == 1
    raw_name = :unknown
    raw = args.first
  when args.size == 2
    raw_name, raw = args
  else
    fail ArgumentError, "Unknown args: #{args.inspect}"
  end

  name = raw_name.to_s.strip
  clean = html(raw)

  passes = case
           when name['width'.freeze]
             clean[VALID_CSS_WIDTH]
           else
             clean[VALID_CSS_VALUE]
           end

  fail ArgumentError, "contains invalid chars: #{raw.inspect}" unless passes
  clean
end
css_class_name(val) click to toggle source
# File lib/escape_escape_escape.rb, line 268
def css_class_name val
  return val if val.is_a?(String) && val[VALID_CSS_CLASS_NAME]
  fail(Invalid, "CSS class name: #{val.inspect}")
end
decode_html(raw) click to toggle source
# File lib/escape_escape_escape.rb, line 227
def decode_html raw
  fail("Not a string: #{raw.inspect}") unless raw.is_a?(String)
  CODER.decode clean_utf8(raw, :spaces)
end
escape(o, method_name = :html) click to toggle source
# File lib/escape_escape_escape.rb, line 308
def escape o, method_name = :html
  if o.kind_of? Hash
    return(
      o.inject({}) { |memo, (k, v)|
        memo[escape(k,method_name)] = escape(v, method_name)
        memo
      }
    )
  end

  return(send(method_name, o.to_s).to_sym) if o.is_a?(Symbol)
  return(o.map { |v| escape(v, method_name) }) if o.kind_of? Array
  return send(method_name, o) if o.is_a?(String)
  return send(method_name, o.to_s) if o == true || o == false || o.kind_of?(Numeric)

  fail Invalid, "Not a String, Number, Array, or Hash"
end
html(raw_text) click to toggle source

A better alternative than “Rack::Utils.escape_html”. Escapes various characters (including ‘&’, ‘<’, ‘>’, and both quotation mark types) to HTML decimal entities. Also escapes the characters from <HTML_ESCAPE_TABLE>.

Text has to be UTF-8 before encoding, according to HTMLEntities gem. Therefore, all text is run through <plaintext> before encoding.

# File lib/escape_escape_escape.rb, line 282
def html raw_text
  EscapeUtils.escape_html(decode_html(raw_text))
end
html_id(raw_o) click to toggle source
# File lib/escape_escape_escape.rb, line 297
def html_id raw_o
  case raw_o
  when String, Symbol
    str = raw_o.to_s.downcase
    return str.to_sym if str[VALID_HTML_ID]
    raise Invalid, "Invalid chars: #{raw_o.inspect}"
  else
    fail TypeError, "Not a String or Symbol: #{raw_o.inspect}"
  end
end
html_tag(raw_o) click to toggle source
# File lib/escape_escape_escape.rb, line 286
def html_tag raw_o
  case raw_o
  when String, Symbol
    str = raw_o.to_s.downcase
    return str.to_sym if str[VALID_HTML_TAG]
    raise Invalid, "Invalid chars: #{raw_o.inspect}"
  else
    fail TypeError, "Not a String or Symbol: #{raw_o.inspect}"
  end
end
json_decode(o) click to toggle source
# File lib/escape_escape_escape.rb, line 340
def json_decode o
  case o
  when String
    Oj.strict_load clean_utf8(o)
  else
    fail Invalid, "Not a String: #{o.inspect}"
  end
end
json_encode(o) click to toggle source
# File lib/escape_escape_escape.rb, line 331
def json_encode o
  case o
  when Hash
    Oj.dump(clean_for_json(o), mode: :strict)
  else
    fail Invalid, "Not an object/hash: #{o.inspect}"
  end
end
num(v) click to toggle source
# File lib/escape_escape_escape.rb, line 326
def num v
  fail ArgumentError, "Not a Numeric: #{v.inspect}" unless v.is_a?(Numeric)
  v
end
relative_href(raw_str) click to toggle source
# File lib/escape_escape_escape.rb, line 210
def relative_href raw_str
  str = Escape_Escape_Escape.decode_html href(raw_str)
  uri = URI.parse(str)
  fail( Invalid_Relative_HREF, "Is not relative: #{str}" ) if uri.scheme

  Escape_Escape_Escape.html str
end
tag( raw_tag ) click to toggle source

HTML

# File lib/escape_escape_escape.rb, line 222
def tag( raw_tag )
  return nil unless raw_tag[TAG_PATTERN]
  raw_tag
end

Private Instance Methods

clean_for_json(o) click to toggle source
# File lib/escape_escape_escape.rb, line 350
def clean_for_json o
  case o
  when Hash
    o.inject({}) { |memo, (k,v)|
      memo[k.to_s] = clean_for_json(v)
      memo
    }
  when Array
    o.map { |v| clean_for_json v }
  when Symbol
    o.to_s
  when String, Numeric, NilClass, TrueClass, FalseClass
    o
  else
    fail ArgumentError, "Unknown Class for json: #{o.inspect}"
  end
end