module TypoHero
Constants
- AMP_RE
- BDQUO
- CAPS_INNER_RE
- CAPS_RE
- DASH_RE
- ELLIPSIS
- ESCAPE
- ESCAPE_RE
- EXCLUDED_TAGS
- EXCLUDED_TAGS_RE
- INITIAL_QUOTES
- INITIAL_QUOTE_RE
- INLINE_RE
- LATEX
- LATEX_RE
- LDQUO
- LEFT_PAREN_RE
- LEFT_QUOTES
- LEFT_QUOTE_RE
- LSQUO
- MDASH
- MDASH_SPACE
- MDASH_SPACE_RE
- NBSP
- NBSP_THIN
- NDASH
- NDASH_SPACE
- NDASH_SPACE_RE
- ORDINAL_RE
- PARAGRAPH_RE
- PRIMES
- PRIME_RE
- RDQUO
- REPLACE_AMP_RE
- RIGHT_QUOTES
- RIGHT_QUOTE_RE
- RSQUO
- SPECIAL
- SPECIAL_RE
- TOKENIZER_RE
- TWO_QUOTES
- UNESCAPE
- UNESCAPE_RE
- UNITS
- UNITS_RE
- VERSION
- WIDONT_INLINE_RE
- WIDONT_NBSP_RE
- WIDONT_PARAGRAPH_RE
Public Instance Methods
amp(s)
click to toggle source
# File lib/typohero.rb, line 344 def amp(s) s.gsub!(REPLACE_AMP_RE, '<span class="amp">&</span>') end
caps(s)
click to toggle source
# File lib/typohero.rb, line 348 def caps(s) s.gsub!(CAPS_RE, '<span class="caps">\1</span>') end
dash_spaces(s)
click to toggle source
# File lib/typohero.rb, line 339 def dash_spaces(s) s.gsub!(MDASH_SPACE_RE, MDASH_SPACE) s.gsub!(NDASH_SPACE_RE, NDASH_SPACE) end
decode(s)
click to toggle source
# File lib/typohero.rb, line 316 def decode(s) s.gsub!(/&#x([0-9A-F]+);|&#([0-9]+);/i) do i = $1 ? $1.to_i(16) : $2.to_i(10) i == 38 ? '&' : i.chr('UTF-8') end end
enhance(input)
click to toggle source
# File lib/typohero.rb, line 252 def enhance(input) tokens, text, prev_last_char = [], [] tokenize(input) do |s, type| if type == :text last_char = s[-1] decode(s) escape(s) units(s) primes(s) special(s) latex(s) quotes(s, prev_last_char) dash_spaces(s) prev_last_char = last_char text << s end tokens << s end widont(tokens) text.each do |s| caps(s) initial_quotes(s) amp(s) ordinals(s) nobr(s) unescape(s) end html_safe(input, tokens.join) end
escape(s)
click to toggle source
# File lib/typohero.rb, line 323 def escape(s) s.gsub!(ESCAPE_RE, ESCAPE) end
html_safe(src, dst)
click to toggle source
# File lib/typohero.rb, line 312 def html_safe(src, dst) src.respond_to?(:html_safe?) && src.html_safe? ? dst.html_safe : dst end
initial_quotes(s)
click to toggle source
# File lib/typohero.rb, line 352 def initial_quotes(s) s.gsub!(INITIAL_QUOTE_RE, INITIAL_QUOTES) end
latex(s)
click to toggle source
# File lib/typohero.rb, line 335 def latex(s) s.gsub!(LATEX_RE, LATEX) end
nobr(s)
click to toggle source
# File lib/typohero.rb, line 356 def nobr(s) s.gsub!(/[\p{Word}]+(-[\p{Word}]+)+/, '<span class="nobr">\0</span>') end
ordinals(s)
click to toggle source
# File lib/typohero.rb, line 365 def ordinals(s) s.gsub!(ORDINAL_RE, '<span class="ord">\1</span>') end
primes(s)
click to toggle source
# File lib/typohero.rb, line 360 def primes(s) # Special case for inches and minutes, seconds s.gsub!(PRIME_RE, PRIMES) end
quotes(s, prev_last_char)
click to toggle source
# File lib/typohero.rb, line 369 def quotes(s, prev_last_char) if s =~ /\A['"]\Z/ s.replace(prev_last_char =~ /\P{Space}/ ? RIGHT_QUOTES[s] : LEFT_QUOTES[s]) return end # Special case for double sets of quotes, e.g. # <p>He said, "'Quoted' words in a larger quote."</p> s.gsub!(/(?:"'|'")(?=\p{Word})/, TWO_QUOTES) s.gsub!(RIGHT_QUOTE_RE, RIGHT_QUOTES) s.gsub!(/['"]/, LEFT_QUOTES) end
special(s)
click to toggle source
# File lib/typohero.rb, line 331 def special(s) s.gsub!(SPECIAL_RE, SPECIAL) end
tokenize(input) { |s, type| ... }
click to toggle source
# File lib/typohero.rb, line 137 def tokenize(input) excluded, latex, dollar = 0, 0, 0 input.scan TOKENIZER_RE do |s| type = if s =~ /\A<!--/ :comment elsif s =~ /\A<!\[/ :cdata end if !type && latex == 0 && dollar.even? if s=~ /\A</ if s =~ EXCLUDED_TAGS_RE excluded += $1 ? -1 : 1 excluded = 0 if excluded < 0 type = :excluded else type = excluded == 0 ? :tag : :excluded end end end if !type && excluded == 0 case s when /\A\\[\(\[]\Z/ latex += 1 type = :latex when /\A\\[\)\]]\Z/ latex -= 1 if latex > 0 type = :latex when '$$' dollar += 1 type = :latex end end type ||= if excluded != 0 :excluded elsif latex != 0 || dollar.odd? :latex else :text end yield(s, type) end end
truncate(input, *max_words_or_separator)
click to toggle source
# File lib/typohero.rb, line 200 def truncate(input, *max_words_or_separator) max_words = max_words_or_separator.select {|i| Fixnum === i }.first if separator = max_words_or_separator.reject {|i| Fixnum === i }.first separator = Regexp.union(separator) unless Regexp === separator separator = nil unless input =~ separator end out, tail, truncated = '', '', false tokenize_with_tags(input) do |s, type, tags| if separator && (type == :comment || type == :text || type == :latex || type == :tag) && separator === s out << $` if type == :text if type == :tag if s =~ /\A<\// tail << s else tags.pop end end truncated = tags break elsif max_words == 0 if type == :text truncated = tags break end tail << s else if max_words && type == :text s =~ /\A(\p{Space}*)(.*)\Z/m ws, w = $1, $2.split(/\p{Space}+/) if w.size > max_words out << ws << w[0...max_words].join(' ') truncated = tags break end max_words -= w.size end out << s end end if truncated out.sub!(/[\p{Space}\p{Punct}]*\Z/, ELLIPSIS) tail << "</#{truncated.pop}>" until truncated.empty? end html_safe(input, out << tail) end
unescape(s)
click to toggle source
# File lib/typohero.rb, line 327 def unescape(s) s.gsub!(UNESCAPE_RE, UNESCAPE) end
units(s)
click to toggle source
# File lib/typohero.rb, line 382 def units(s) s.gsub!(UNITS_RE, UNITS) end
widont(tokens)
click to toggle source
# File lib/typohero.rb, line 282 def widont(tokens) state, i, widow = 1, tokens.size - 1, nil while i >= 0 if tokens[i] =~ WIDONT_PARAGRAPH_RE state = 1 elsif tokens[i] !~ WIDONT_INLINE_RE if tokens[i] =~ WIDONT_NBSP_RE state = 0 elsif state == 1 || state == 3 if tokens[i] =~ (state == 1 ? /(\P{Space}+)?(\p{Space}+)?(\P{Space}+\p{Space}*)\Z/m : /(\P{Space}+)?(\p{Space}+)(\P{Space}*)\Z/m) if $1 && $2 tokens[i].replace "#{$`}#{$1}#{NBSP}#{$3}" state = 0 elsif $2 state = 2 widow = tokens[i] else state = 3 end end elsif state == 2 && tokens[i] =~ /(\P{Space}+\p{Space}*)\Z/m widow.sub!(/\A\p{Space}*/, NBSP) state = 0 end end i -= 1 end end