class HtmlProcessor
This utility class is used to work on html text
You can initialize it with html or markdown text
Constants
- DESCRIPTION_PROCESSING_ORDER
¶ ↑
Constants
¶ ↑
- SANITIZER_OPTS
Default options for Sanitize
- YOUTUBE_TRANSFORMER
Define Youtube transformer for Sanitize
Attributes
html[R]
original[R]
Public Class Methods
new(text, options = { })
click to toggle source
¶ ↑
Methods
¶ ↑
# File lib/html_processor.rb, line 60 def initialize(text, options = { }) @original = text # Process markdown or leave original if options[:format].to_s == 'markdown' && text html_options = { :safe_links_only => true, :hard_wrap => true, :filter_html => false } renderer_options = { :autolink => true, :no_intraemphasis => true, :fenced_code_blocks => true, :superscript => true } renderer = Redcarpet::Markdown.new(Redcarpet::Render::HTML.new(html_options), renderer_options) raw_html = renderer.render(text) @html = Sanitize.fragment(raw_html, SANITIZER_OPTS) else @html = text end end
Public Instance Methods
description()
click to toggle source
Return a description of the document by returning the first sentence of the first DESCRIPTION_PROCESSING_ORDER
found
# File lib/html_processor.rb, line 85 def description # Return cached value if one return @description if @description # Parse the html document to try to find # a description @description = '' DESCRIPTION_PROCESSING_ORDER.each do |selector| elem = self.document.css(selector).detect { |e| e && !e.content.blank? } next if elem.blank? #skip if nil or empty # Try to get the first two sentences match = elem.content.match(/([^.!?]+[.!?]?)([^.!?]+[.!?]?)?/) if match && match.captures.any? @description = match.captures.compact.join('') end break if !@description.empty? end return @description end
document()
click to toggle source
Return a Nokogiri document based on processor html
# File lib/html_processor.rb, line 78 def document @document ||= Nokogiri::HTML(@html) end