class Escape_Escape_Escape
Constants
- BLANK
- CODER
¶ ↑
- CONFIG
- ENCODING_OPTIONS_CLEAN_UTF8
¶ ↑
- HTML_TAB
- INVALID_FILE_NAME_CHARS
- Invalid
- Invalid_HREF
- Invalid_Relative_HREF
- Invalid_Type
- MULTI_CONTROL_AND_UNPRINTABLE
MULTI_CONTROL_CHARS: ==================================¶ ↑
Unicode whitespaces, like 160 codepoint, tabs, etc. Excludes newline.
Examples:
\r\n \r\n -> \n \n
NOTE: Don’t use “x20” because that is the space character.
Whitespace regex ([:space:]) from:
http://www.rubyinside.com/the-split-is-not-enough-whitespace-shenigans-for-rubyists-5980.html
¶ ↑
- NL
- REGEX_UNSUITABLE_CHARS
From sanitize gem:¶ ↑
https://raw.githubusercontent.com/rgrove/sanitize/master/lib/sanitize.rb
- REPEATING_DOTS
- SPACE
- SPACES
- TAB
- TABS
- TAG_PATTERN
- TWO_SPACES
- VALID_CSS_ATTR
- VALID_CSS_CLASS_NAME
- VALID_CSS_SELECTOR
- VALID_CSS_VALUE
- VALID_CSS_WIDTH
- VALID_HTML_ID
- VALID_HTML_TAG
Public Class Methods
¶ ↑
-
normalized to :KC
-
“rn” changed to “n”
-
all control characters stripped except for “n”
and end. Normalization, then strip:
http://msdn.microsoft.com/en-us/library/dd374126(v=vs.85).aspx http://www.unicode.org/faq/normalization.html
Getting rid of non-ascii characters in ruby: stackoverflow.com/questions/1268289/how-to-get-rid-of-non-ascii-characters-in-ruby
Test: [160, 160,64, 116, 119, 101, 108, 108, 121, 109, 101, 160, 102, 105, 108, 109]. inject(”, :<<)
Options:
:tabs :spaces
# File lib/escape_escape_escape.rb, line 148 def clean_utf8 raw_s, *opts fail("Not a string: #{raw_s.inspect}") unless raw_s.is_a?(String) # === Check options. ================================================================== @plaintext_allowed_options ||= [ :spaces, :tabs ] invalid_opts = opts - @plaintext_allowed_options fail(ArgumentError, "INVALID OPTION: #{invalid_opts.inspect}" ) if !invalid_opts.empty? # ===================================================================================== raw_s = raw_s.dup # === Save tabs if requested. raw_s.gsub!(TAB, HTML_TAB) if opts.include?(:tabs) raw_s.encode!(Encoding.find('utf-8') , ENCODING_OPTIONS_CLEAN_UTF8) raw_s.scrub! raw_s.gsub!(TAB , TWO_SPACES) raw_s.gsub!(MULTI_CONTROL_AND_UNPRINTABLE , BLANK) raw_s.gsub!(REGEX_UNSUITABLE_CHARS , ' ') clean = raw_s.to_nfkc # Save whitespace or strip. if !opts.include?(:spaces) clean.strip! end # Put back tabs by request. if opts.include?(:tabs) clean.gsub!(HTML_TAB, TAB) end clean end
# File lib/escape_escape_escape.rb, line 115 def regexp str @regexp_opts ||= Regexp::FIXEDENCODING | Regexp::IGNORECASE Regexp.new(clean_utf8(str), @regexp_opts) end
Public Instance Methods
# File lib/escape_escape_escape.rb, line 243 def css *args case when args.size == 1 raw_name = :unknown raw = args.first when args.size == 2 raw_name, raw = args else fail ArgumentError, "Unknown args: #{args.inspect}" end name = raw_name.to_s.strip clean = html(raw) passes = case when name['width'.freeze] clean[VALID_CSS_WIDTH] else clean[VALID_CSS_VALUE] end fail ArgumentError, "contains invalid chars: #{raw.inspect}" unless passes clean end
# File lib/escape_escape_escape.rb, line 268 def css_class_name val return val if val.is_a?(String) && val[VALID_CSS_CLASS_NAME] fail(Invalid, "CSS class name: #{val.inspect}") end
# File lib/escape_escape_escape.rb, line 227 def decode_html raw fail("Not a string: #{raw.inspect}") unless raw.is_a?(String) CODER.decode clean_utf8(raw, :spaces) end
# File lib/escape_escape_escape.rb, line 308 def escape o, method_name = :html if o.kind_of? Hash return( o.inject({}) { |memo, (k, v)| memo[escape(k,method_name)] = escape(v, method_name) memo } ) end return(send(method_name, o.to_s).to_sym) if o.is_a?(Symbol) return(o.map { |v| escape(v, method_name) }) if o.kind_of? Array return send(method_name, o) if o.is_a?(String) return send(method_name, o.to_s) if o == true || o == false || o.kind_of?(Numeric) fail Invalid, "Not a String, Number, Array, or Hash" end
¶ ↑
A better alternative than “Rack::Utils.escape_html”. Escapes various characters (including ‘&’, ‘<’, ‘>’, and both quotation mark types) to HTML decimal entities. Also escapes the characters from <HTML_ESCAPE_TABLE>.
Text has to be UTF-8 before encoding, according to HTMLEntities gem. Therefore, all text is run through <plaintext> before encoding.
¶ ↑
# File lib/escape_escape_escape.rb, line 282 def html raw_text EscapeUtils.escape_html(decode_html(raw_text)) end
# File lib/escape_escape_escape.rb, line 297 def html_id raw_o case raw_o when String, Symbol str = raw_o.to_s.downcase return str.to_sym if str[VALID_HTML_ID] raise Invalid, "Invalid chars: #{raw_o.inspect}" else fail TypeError, "Not a String or Symbol: #{raw_o.inspect}" end end
# File lib/escape_escape_escape.rb, line 286 def html_tag raw_o case raw_o when String, Symbol str = raw_o.to_s.downcase return str.to_sym if str[VALID_HTML_TAG] raise Invalid, "Invalid chars: #{raw_o.inspect}" else fail TypeError, "Not a String or Symbol: #{raw_o.inspect}" end end
# File lib/escape_escape_escape.rb, line 340 def json_decode o case o when String Oj.strict_load clean_utf8(o) else fail Invalid, "Not a String: #{o.inspect}" end end
# File lib/escape_escape_escape.rb, line 331 def json_encode o case o when Hash Oj.dump(clean_for_json(o), mode: :strict) else fail Invalid, "Not an object/hash: #{o.inspect}" end end
# File lib/escape_escape_escape.rb, line 326 def num v fail ArgumentError, "Not a Numeric: #{v.inspect}" unless v.is_a?(Numeric) v end
# File lib/escape_escape_escape.rb, line 210 def relative_href raw_str str = Escape_Escape_Escape.decode_html href(raw_str) uri = URI.parse(str) fail( Invalid_Relative_HREF, "Is not relative: #{str}" ) if uri.scheme Escape_Escape_Escape.html str end
Private Instance Methods
# File lib/escape_escape_escape.rb, line 350 def clean_for_json o case o when Hash o.inject({}) { |memo, (k,v)| memo[k.to_s] = clean_for_json(v) memo } when Array o.map { |v| clean_for_json v } when Symbol o.to_s when String, Numeric, NilClass, TrueClass, FalseClass o else fail ArgumentError, "Unknown Class for json: #{o.inspect}" end end