module Card::Content::Clean

tools for cleaning content, especially for restricting unwanted HTML

Constants

ALLOWED_TAGS
ATTR_VALUE_RE

Public Instance Methods

clean!(string, tags=ALLOWED_TAGS) click to toggle source
# File lib/card/content/clean.rb, line 35
def clean! string, tags=ALLOWED_TAGS
  cleaned = clean_tags string, tags
  cleaned = clean_spaces cleaned if Cardio.config.space_last_in_multispace
  cleaned
end

Private Instance Methods

clean_spaces(string) click to toggle source
# File lib/card/content/clean.rb, line 52
def clean_spaces string
  string.gsub(/(?:^|\b) ((?: )+)/, '\1 ')
end
clean_tag(match, ok_tags) click to toggle source
# File lib/card/content/clean.rb, line 56
def clean_tag match, ok_tags
  tag = match[2].downcase
  return " " unless (ok_attrs = ok_tags[tag])

  "<#{match[1]}#{html_attribs tag, match[3], ok_attrs}>"
end
clean_tags(string, ok_tags) click to toggle source

Method that cleans the String of HTML tags and attributes outside of the allowed list.

# File lib/card/content/clean.rb, line 45
def clean_tags string, ok_tags
  # $LAST_MATCH_INFO is nil if string is a SafeBuffer
  string.to_str.gsub(%r{<(/*)(\w+)([^>]*)>}) do |_raw|
    clean_tag $LAST_MATCH_INFO, ok_tags
  end.gsub(/<!--.*?-->/, "")
end
html_attribs(tag, raw_attr, ok_attrs) click to toggle source
# File lib/card/content/clean.rb, line 63
def html_attribs tag, raw_attr, ok_attrs
  ok_attrs.each_with_object([tag]) do |ok_attr, pcs|
    q, rest_value = process_attribute ok_attr, raw_attr
    pcs << "#{ok_attr}=#{q}#{rest_value}#{q}" unless rest_value.blank?
  end * " "
end
process_attribute(attrib, all_attributes) click to toggle source
# File lib/card/content/clean.rb, line 70
def process_attribute attrib, all_attributes
  return ['"', nil] unless all_attributes =~ /\b#{attrib}\s*=\s*(?=(.))/i

  q = '"'
  rest_value = Regexp.last_match.post_match
  if (idx = %w[' "].index Regexp.last_match(1))
    q = Regexp.last_match(1)
  end
  reg_exp = ATTR_VALUE_RE[idx || 2]
  rest_value = process_attribute_match rest_value, reg_exp, attrib
  [q, rest_value]
end
process_attribute_match(rest_value, reg_exp, attrib) click to toggle source

NOTE: allows classes beginning with “w-” (deprecated)

# File lib/card/content/clean.rb, line 84
def process_attribute_match rest_value, reg_exp, attrib
  return rest_value unless (match = rest_value.match reg_exp)

  rest_value = match[0]
  if attrib == "class"
    rest_value.split(/\s+/).select { |s| s =~ /^w-/i }.join(" ")
  else
    rest_value
  end
end