class Onebox::Engine::AllowlistedGenericOnebox
Public Class Methods
===(other)
click to toggle source
Calls superclass method
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 179 def self.===(other) other.kind_of?(URI) ? host_matches(other, allowed_domains) || probable_wordpress(other) || probable_discourse(other) : super end
allowed_domains()
click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 20 def self.allowed_domains @allowed_domains ||= default_allowed_domains.dup end
allowed_domains=(list)
click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 16 def self.allowed_domains=(list) @allowed_domains = list end
allowed_twitter_labels()
click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 175 def self.allowed_twitter_labels ['brand', 'price', 'usd', 'cad', 'reading time', 'likes'] end
default_allowed_domains()
click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 24 def self.default_allowed_domains %w( 23hq.com 500px.com 8tracks.com abc.net.au answers.com arstechnica.com ask.com battle.net bbc.co.uk bbs.boingboing.net bestbuy.ca bestbuy.com bloomberg.com businessinsider.com change.org cnet.com cnn.com codepen.io collegehumor.com consider.it coursera.org cracked.com dailymail.co.uk dailymotion.com deadline.com dell.com deviantart.com digg.com dotsub.com ebay.ca ebay.co.uk ebay.com ehow.com espn.go.com etsy.com facebook.com findery.com folksy.com forbes.com foxnews.com funnyordie.com gifs.com groupon.com howtogeek.com huffingtonpost.ca huffingtonpost.com hulu.com ign.com ikea.com imdb.com indiatimes.com itunes.apple.com khanacademy.org kickstarter.com kinomap.com lessonplanet.com linkedin.com liveleak.com livestream.com mashable.com medium.com meetup.com mixcloud.com mlb.com myspace.com nba.com npr.org nytimes.com photobucket.com pinterest.com reference.com rottentomatoes.com samsung.com scribd.com slideshare.net sourceforge.net speakerdeck.com spotify.com streamable.com techcrunch.com ted.com thefreedictionary.com theglobeandmail.com thenextweb.com theonion.com thestar.com thesun.co.uk thinkgeek.com tmz.com torontosun.com tumblr.com twitpic.com usatoday.com viddler.com vine.co walmart.com washingtonpost.com wi.st wikia.com wikihow.com wired.com wistia.com wonderhowto.com wsj.com zappos.com zillow.com ) end
default_html_providers()
click to toggle source
Often using the `html` attribute is not what we want, like for some blogs that include the entire page HTML
. However for some providers like Flickr it allows us to return gifv and galleries.
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 138 def self.default_html_providers ['Flickr', 'Meetup'] end
host_matches(uri, list)
click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 163 def self.host_matches(uri, list) !!list.find { |h| %r((^|\.)#{Regexp.escape(h)}$).match(uri.host) } end
html_providers()
click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 142 def self.html_providers @html_providers ||= default_html_providers.dup end
html_providers=(new_provs)
click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 146 def self.html_providers=(new_provs) @html_providers = new_provs end
https_hosts()
click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 159 def self.https_hosts %w(slideshare.net dailymotion.com livestream.com imgur.com flickr.com) end
priority()
click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 12 def self.priority 200 end
probable_discourse(uri)
click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 167 def self.probable_discourse(uri) !!(uri.path =~ /\/t\/[^\/]+\/\d+(\/\d+)?(\?.*)?$/) end
probable_wordpress(uri)
click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 171 def self.probable_wordpress(uri) !!(uri.path =~ /\d{4}\/\d{2}\//) end
rewrites()
click to toggle source
A re-written URL converts http:// -> https://
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 151 def self.rewrites @rewrites ||= https_hosts.dup end
rewrites=(new_list)
click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 155 def self.rewrites=(new_list) @rewrites = new_list end
Public Instance Methods
data()
click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 197 def data @data ||= begin html_entities = HTMLEntities.new d = { link: link }.merge(raw) if !Onebox::Helpers.blank?(d[:title]) d[:title] = html_entities.decode(Onebox::Helpers.truncate(d[:title], 80)) end d[:description] ||= d[:summary] if !Onebox::Helpers.blank?(d[:description]) d[:description] = html_entities.decode(Onebox::Helpers.truncate(d[:description], 250)) end if !Onebox::Helpers.blank?(d[:site_name]) d[:domain] = html_entities.decode(Onebox::Helpers.truncate(d[:site_name], 80)) elsif !Onebox::Helpers.blank?(d[:domain]) d[:domain] = "http://#{d[:domain]}" unless d[:domain] =~ /^https?:\/\// d[:domain] = URI(d[:domain]).host.to_s.sub(/^www\./, '') rescue nil end # prefer secure URLs d[:image] = d[:image_secure_url] || d[:image_url] || d[:thumbnail_url] || d[:image] d[:image] = Onebox::Helpers::get_absolute_image_url(d[:image], @url) d[:image] = Onebox::Helpers::normalize_url_for_output(html_entities.decode(d[:image])) d[:image] = nil if Onebox::Helpers.blank?(d[:image]) d[:video] = d[:video_secure_url] || d[:video_url] || d[:video] d[:video] = nil if Onebox::Helpers.blank?(d[:video]) d[:published_time] = d[:article_published_time] unless Onebox::Helpers.blank?(d[:article_published_time]) if !Onebox::Helpers.blank?(d[:published_time]) d[:article_published_time] = Time.parse(d[:published_time]).strftime("%-d %b %y") d[:article_published_time_title] = Time.parse(d[:published_time]).strftime("%I:%M%p - %d %B %Y") end # Twitter labels if !Onebox::Helpers.blank?(d[:label1]) && !Onebox::Helpers.blank?(d[:data1]) && !!AllowlistedGenericOnebox.allowed_twitter_labels.find { |l| d[:label1] =~ /#{l}/i } d[:label_1] = Onebox::Helpers.truncate(d[:label1]) d[:data_1] = Onebox::Helpers.truncate(d[:data1]) end if !Onebox::Helpers.blank?(d[:label2]) && !Onebox::Helpers.blank?(d[:data2]) && !!AllowlistedGenericOnebox.allowed_twitter_labels.find { |l| d[:label2] =~ /#{l}/i } unless Onebox::Helpers.blank?(d[:label_1]) d[:label_2] = Onebox::Helpers.truncate(d[:label2]) d[:data_2] = Onebox::Helpers.truncate(d[:data2]) else d[:label_1] = Onebox::Helpers.truncate(d[:label2]) d[:data_1] = Onebox::Helpers.truncate(d[:data2]) end end if Onebox::Helpers.blank?(d[:label_1]) && !Onebox::Helpers.blank?(d[:price_amount]) && !Onebox::Helpers.blank?(d[:price_currency]) d[:label_1] = "Price" d[:data_1] = Onebox::Helpers.truncate("#{d[:price_currency].strip} #{d[:price_amount].strip}") end skip_missing_tags = [:video] d.each do |k, v| next if skip_missing_tags.include?(k) if v == nil || v == '' errors[k] ||= [] errors[k] << 'is blank' end end d end end
placeholder_html()
click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 189 def placeholder_html return article_html if is_article? return image_html if is_image? return Onebox::Helpers.video_placeholder_html if is_video? || is_card? return Onebox::Helpers.generic_placeholder_html if is_embedded? to_html end
to_html()
click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 185 def to_html rewrite_https(generic_html) end
Private Instance Methods
article_html()
click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 347 def article_html layout.to_html end
card_html()
click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 334 def card_html escaped_url = ::Onebox::Helpers.normalize_url_for_output(data[:player]) <<~RAW <iframe src="#{escaped_url}" width="#{data[:player_width] || "100%"}" height="#{data[:player_height]}" scrolling="no" frameborder="0"> </iframe> RAW end
embedded_html()
click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 381 def embedded_html fragment = Nokogiri::HTML5::fragment(data[:html]) fragment.css("img").each { |img| img["class"] = "thumbnail" } if iframe = fragment.at_css("iframe") iframe.remove_attribute("style") iframe["width"] = data[:width] || "100%" iframe["height"] = data[:height] iframe["scrolling"] = "no" iframe["frameborder"] = "0" end fragment.to_html end
generic_html()
click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 276 def generic_html return article_html if is_article? return video_html if is_video? return image_html if is_image? return embedded_html if is_embedded? return card_html if is_card? return article_html if (has_text? || is_image_article?) end
has_image?()
click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 314 def has_image? !Onebox::Helpers.blank?(data[:image]) end
has_text?()
click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 296 def has_text? has_title? && !Onebox::Helpers.blank?(data[:description]) end
has_title?()
click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 300 def has_title? !Onebox::Helpers.blank?(data[:title]) end
image_html()
click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 351 def image_html return if Onebox::Helpers.blank?(data[:image]) escaped_src = ::Onebox::Helpers.normalize_url_for_output(data[:image]) alt = data[:description] || data[:title] width = data[:image_width] || data[:thumbnail_width] || data[:width] height = data[:image_height] || data[:thumbnail_height] || data[:height] "<img src='#{escaped_src}' alt='#{alt}' width='#{width}' height='#{height}' class='onebox'>" end
is_article?()
click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 291 def is_article? (data[:type] =~ /article/ || data[:asset_type] =~ /article/) && has_text? end
is_card?()
click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 285 def is_card? data[:card] == 'player' && data[:player] =~ URI::regexp && options[:allowed_iframe_regexes]&.any? { |r| data[:player] =~ r } end
is_embedded?()
click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 324 def is_embedded? return false unless data[:html] && data[:height] return true if AllowlistedGenericOnebox.html_providers.include?(data[:provider_name]) return false unless data[:html]["iframe"] fragment = Nokogiri::HTML5::fragment(data[:html]) src = fragment.at_css('iframe')&.[]("src") options[:allowed_iframe_regexes]&.any? { |r| src =~ r } end
is_image?()
click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 308 def is_image? data[:type] =~ /photo|image/ && data[:type] !~ /photostream/ && has_image? end
is_image_article?()
click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 304 def is_image_article? has_title? && has_image? end
is_video?()
click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 318 def is_video? data[:type] =~ /^video[\/\.]/ && data[:video_type] == "video/mp4" && # Many sites include 'videos' with text/html types (i.e. iframes) !Onebox::Helpers.blank?(data[:video]) end
rewrite_https(html)
click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 268 def rewrite_https(html) return unless html if AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.rewrites) html = html.gsub("http://", "https://") end html end
video_html()
click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 363 def video_html escaped_video_src = ::Onebox::Helpers.normalize_url_for_output(data[:video]) escaped_image_src = ::Onebox::Helpers.normalize_url_for_output(data[:image]) <<-HTML <video title='#{data[:title]}' width='#{data[:video_width]}' height='#{data[:video_height]}' style='max-width:100%' poster='#{escaped_image_src}' controls='' > <source src='#{escaped_video_src}'> </video> HTML end