class Onebox::Engine::AllowlistedGenericOnebox

Public Class Methods

===(other) click to toggle source
Calls superclass method
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 179
def self.===(other)
  other.kind_of?(URI) ?
    host_matches(other, allowed_domains) || probable_wordpress(other) || probable_discourse(other) :
    super
end
allowed_domains() click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 20
def self.allowed_domains
  @allowed_domains ||= default_allowed_domains.dup
end
allowed_domains=(list) click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 16
def self.allowed_domains=(list)
  @allowed_domains = list
end
allowed_twitter_labels() click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 175
def self.allowed_twitter_labels
  ['brand', 'price', 'usd', 'cad', 'reading time', 'likes']
end
default_allowed_domains() click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 24
def self.default_allowed_domains
  %w(
    23hq.com
    500px.com
    8tracks.com
    abc.net.au
    answers.com
    arstechnica.com
    ask.com
    battle.net
    bbc.co.uk
    bbs.boingboing.net
    bestbuy.ca
    bestbuy.com
    bloomberg.com
    businessinsider.com
    change.org
    cnet.com
    cnn.com
    codepen.io
    collegehumor.com
    consider.it
    coursera.org
    cracked.com
    dailymail.co.uk
    dailymotion.com
    deadline.com
    dell.com
    deviantart.com
    digg.com
    dotsub.com
    ebay.ca
    ebay.co.uk
    ebay.com
    ehow.com
    espn.go.com
    etsy.com
    facebook.com
    findery.com
    folksy.com
    forbes.com
    foxnews.com
    funnyordie.com
    gifs.com
    groupon.com
    howtogeek.com
    huffingtonpost.ca
    huffingtonpost.com
    hulu.com
    ign.com
    ikea.com
    imdb.com
    indiatimes.com
    itunes.apple.com
    khanacademy.org
    kickstarter.com
    kinomap.com
    lessonplanet.com
    linkedin.com
    liveleak.com
    livestream.com
    mashable.com
    medium.com
    meetup.com
    mixcloud.com
    mlb.com
    myspace.com
    nba.com
    npr.org
    nytimes.com
    photobucket.com
    pinterest.com
    reference.com
    rottentomatoes.com
    samsung.com
    scribd.com
    slideshare.net
    sourceforge.net
    speakerdeck.com
    spotify.com
    streamable.com
    techcrunch.com
    ted.com
    thefreedictionary.com
    theglobeandmail.com
    thenextweb.com
    theonion.com
    thestar.com
    thesun.co.uk
    thinkgeek.com
    tmz.com
    torontosun.com
    tumblr.com
    twitpic.com
    usatoday.com
    viddler.com
    vine.co
    walmart.com
    washingtonpost.com
    wi.st
    wikia.com
    wikihow.com
    wired.com
    wistia.com
    wonderhowto.com
    wsj.com
    zappos.com
    zillow.com
  )
end
default_html_providers() click to toggle source

Often using the `html` attribute is not what we want, like for some blogs that include the entire page HTML. However for some providers like Flickr it allows us to return gifv and galleries.

# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 138
def self.default_html_providers
  ['Flickr', 'Meetup']
end
host_matches(uri, list) click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 163
def self.host_matches(uri, list)
  !!list.find { |h| %r((^|\.)#{Regexp.escape(h)}$).match(uri.host) }
end
html_providers() click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 142
def self.html_providers
  @html_providers ||= default_html_providers.dup
end
html_providers=(new_provs) click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 146
def self.html_providers=(new_provs)
  @html_providers = new_provs
end
https_hosts() click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 159
def self.https_hosts
  %w(slideshare.net dailymotion.com livestream.com imgur.com flickr.com)
end
priority() click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 12
def self.priority
  200
end
probable_discourse(uri) click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 167
def self.probable_discourse(uri)
  !!(uri.path =~ /\/t\/[^\/]+\/\d+(\/\d+)?(\?.*)?$/)
end
probable_wordpress(uri) click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 171
def self.probable_wordpress(uri)
  !!(uri.path =~ /\d{4}\/\d{2}\//)
end
rewrites() click to toggle source

A re-written URL converts http:// -> https://

# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 151
def self.rewrites
  @rewrites ||= https_hosts.dup
end
rewrites=(new_list) click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 155
def self.rewrites=(new_list)
  @rewrites = new_list
end

Public Instance Methods

data() click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 197
def data
  @data ||= begin
    html_entities = HTMLEntities.new
    d = { link: link }.merge(raw)

    if !Onebox::Helpers.blank?(d[:title])
      d[:title] = html_entities.decode(Onebox::Helpers.truncate(d[:title], 80))
    end

    d[:description] ||= d[:summary]
    if !Onebox::Helpers.blank?(d[:description])
      d[:description] = html_entities.decode(Onebox::Helpers.truncate(d[:description], 250))
    end

    if !Onebox::Helpers.blank?(d[:site_name])
      d[:domain] = html_entities.decode(Onebox::Helpers.truncate(d[:site_name], 80))
    elsif !Onebox::Helpers.blank?(d[:domain])
      d[:domain] = "http://#{d[:domain]}" unless d[:domain] =~ /^https?:\/\//
      d[:domain] = URI(d[:domain]).host.to_s.sub(/^www\./, '') rescue nil
    end

    # prefer secure URLs
    d[:image] = d[:image_secure_url] || d[:image_url] || d[:thumbnail_url] || d[:image]
    d[:image] = Onebox::Helpers::get_absolute_image_url(d[:image], @url)
    d[:image] = Onebox::Helpers::normalize_url_for_output(html_entities.decode(d[:image]))
    d[:image] = nil if Onebox::Helpers.blank?(d[:image])

    d[:video] = d[:video_secure_url] || d[:video_url] || d[:video]
    d[:video] = nil if Onebox::Helpers.blank?(d[:video])

    d[:published_time] = d[:article_published_time] unless Onebox::Helpers.blank?(d[:article_published_time])
    if !Onebox::Helpers.blank?(d[:published_time])
      d[:article_published_time] = Time.parse(d[:published_time]).strftime("%-d %b %y")
      d[:article_published_time_title] = Time.parse(d[:published_time]).strftime("%I:%M%p - %d %B %Y")
    end

    # Twitter labels
    if !Onebox::Helpers.blank?(d[:label1]) && !Onebox::Helpers.blank?(d[:data1]) && !!AllowlistedGenericOnebox.allowed_twitter_labels.find { |l| d[:label1] =~ /#{l}/i }
      d[:label_1] = Onebox::Helpers.truncate(d[:label1])
      d[:data_1]  = Onebox::Helpers.truncate(d[:data1])
    end
    if !Onebox::Helpers.blank?(d[:label2]) && !Onebox::Helpers.blank?(d[:data2]) && !!AllowlistedGenericOnebox.allowed_twitter_labels.find { |l| d[:label2] =~ /#{l}/i }
      unless Onebox::Helpers.blank?(d[:label_1])
        d[:label_2] = Onebox::Helpers.truncate(d[:label2])
        d[:data_2]  = Onebox::Helpers.truncate(d[:data2])
      else
        d[:label_1] = Onebox::Helpers.truncate(d[:label2])
        d[:data_1]  = Onebox::Helpers.truncate(d[:data2])
      end
    end

    if Onebox::Helpers.blank?(d[:label_1]) && !Onebox::Helpers.blank?(d[:price_amount]) && !Onebox::Helpers.blank?(d[:price_currency])
      d[:label_1] = "Price"
      d[:data_1] = Onebox::Helpers.truncate("#{d[:price_currency].strip} #{d[:price_amount].strip}")
    end

    skip_missing_tags = [:video]
    d.each do |k, v|
      next if skip_missing_tags.include?(k)
      if v == nil || v == ''
        errors[k] ||= []
        errors[k] << 'is blank'
      end
    end

    d
  end
end
placeholder_html() click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 189
def placeholder_html
  return article_html if is_article?
  return image_html if is_image?
  return Onebox::Helpers.video_placeholder_html if is_video? || is_card?
  return Onebox::Helpers.generic_placeholder_html if is_embedded?
  to_html
end
to_html() click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 185
def to_html
  rewrite_https(generic_html)
end

Private Instance Methods

article_html() click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 347
def article_html
  layout.to_html
end
card_html() click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 334
      def card_html
        escaped_url = ::Onebox::Helpers.normalize_url_for_output(data[:player])

        <<~RAW
        <iframe src="#{escaped_url}"
                width="#{data[:player_width] || "100%"}"
                height="#{data[:player_height]}"
                scrolling="no"
                frameborder="0">
        </iframe>
        RAW
      end
embedded_html() click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 381
def embedded_html
  fragment = Nokogiri::HTML5::fragment(data[:html])
  fragment.css("img").each { |img| img["class"] = "thumbnail" }
  if iframe = fragment.at_css("iframe")
    iframe.remove_attribute("style")
    iframe["width"] = data[:width] || "100%"
    iframe["height"] = data[:height]
    iframe["scrolling"] = "no"
    iframe["frameborder"] = "0"
  end
  fragment.to_html
end
generic_html() click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 276
def generic_html
  return article_html  if is_article?
  return video_html    if is_video?
  return image_html    if is_image?
  return embedded_html if is_embedded?
  return card_html     if is_card?
  return article_html  if (has_text? || is_image_article?)
end
has_image?() click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 314
def has_image?
  !Onebox::Helpers.blank?(data[:image])
end
has_text?() click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 296
def has_text?
  has_title? && !Onebox::Helpers.blank?(data[:description])
end
has_title?() click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 300
def has_title?
  !Onebox::Helpers.blank?(data[:title])
end
image_html() click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 351
def image_html
  return if Onebox::Helpers.blank?(data[:image])

  escaped_src = ::Onebox::Helpers.normalize_url_for_output(data[:image])

  alt    = data[:description]  || data[:title]
  width  = data[:image_width]  || data[:thumbnail_width]  || data[:width]
  height = data[:image_height] || data[:thumbnail_height] || data[:height]

  "<img src='#{escaped_src}' alt='#{alt}' width='#{width}' height='#{height}' class='onebox'>"
end
is_article?() click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 291
def is_article?
  (data[:type] =~ /article/ || data[:asset_type] =~ /article/) &&
  has_text?
end
is_card?() click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 285
def is_card?
  data[:card] == 'player' &&
    data[:player] =~ URI::regexp &&
    options[:allowed_iframe_regexes]&.any? { |r| data[:player] =~ r }
end
is_embedded?() click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 324
def is_embedded?
  return false unless data[:html] && data[:height]
  return true if AllowlistedGenericOnebox.html_providers.include?(data[:provider_name])
  return false unless data[:html]["iframe"]

  fragment = Nokogiri::HTML5::fragment(data[:html])
  src = fragment.at_css('iframe')&.[]("src")
  options[:allowed_iframe_regexes]&.any? { |r| src =~ r }
end
is_image?() click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 308
def is_image?
  data[:type] =~ /photo|image/ &&
  data[:type] !~ /photostream/ &&
  has_image?
end
is_image_article?() click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 304
def is_image_article?
  has_title? && has_image?
end
is_video?() click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 318
def is_video?
  data[:type] =~ /^video[\/\.]/ &&
    data[:video_type] == "video/mp4" && # Many sites include 'videos' with text/html types (i.e. iframes)
    !Onebox::Helpers.blank?(data[:video])
end
rewrite_https(html) click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 268
def rewrite_https(html)
  return unless html
  if AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.rewrites)
    html = html.gsub("http://", "https://")
  end
  html
end
video_html() click to toggle source
# File lib/onebox/engine/allowlisted_generic_onebox.rb, line 363
      def video_html
        escaped_video_src = ::Onebox::Helpers.normalize_url_for_output(data[:video])
        escaped_image_src = ::Onebox::Helpers.normalize_url_for_output(data[:image])

        <<-HTML
          <video
            title='#{data[:title]}'
            width='#{data[:video_width]}'
            height='#{data[:video_height]}'
            style='max-width:100%'
            poster='#{escaped_image_src}'
            controls=''
          >
            <source src='#{escaped_video_src}'>
          </video>
        HTML
      end