class StringTools::HTML::LinksRemoveScrubber

Public Class Methods

new(options) click to toggle source
# File lib/string_tools/html.rb, line 58
def initialize(options)
  @whitelist = options.fetch(:whitelist)
  @remove_without_host = options.fetch(:remove_without_host, true)
  @is_have_done_changes = false
end

Public Instance Methods

call(node) click to toggle source
# File lib/string_tools/html.rb, line 68
def call(node)
  href = node['href']
  return if href.blank?
  uri = Addressable::URI.parse(href).normalize
  if !uri.host
    replace_with_content node if @remove_without_host
  elsif !whitelisted?(SimpleIDN.to_unicode(uri.host))
    replace_with_content node
  end
rescue Addressable::URI::InvalidURIError
  replace_with_content node
end
done_changes?() click to toggle source
# File lib/string_tools/html.rb, line 64
def done_changes?
  @is_have_done_changes
end
whitelisted?(domain) click to toggle source
# File lib/string_tools/html.rb, line 81
def whitelisted?(domain)
  host_parts = domain.split('.')
  host = host_parts[-1] # com, ru ...
  (host_parts.length - 2).downto(0) do |i|
    subdomain = host_parts[i]
    host = "#{subdomain}.#{host}"
    return true if @whitelist.include? host
  end
  false
end

Private Instance Methods

replace_with_content(node) click to toggle source
# File lib/string_tools/html.rb, line 94
def replace_with_content(node)
  node.swap(node.children)
  @is_have_done_changes = true
end