class SiteInspector::Endpoint::Content

Public Instance Methods

body() click to toggle source
# File lib/site-inspector/checks/content.rb, line 23
def body
  @body ||= document.to_s.force_encoding('UTF-8').encode('UTF-8', invalid: :replace, replace: '')
end
doc()
Alias for: document
doctype() click to toggle source
# File lib/site-inspector/checks/content.rb, line 39
def doctype
  document.internal_subset.external_id
end
document() click to toggle source
# File lib/site-inspector/checks/content.rb, line 17
def document
  require 'nokogiri'
  @doc ||= Nokogiri::HTML response.body if response
end
Also aliased as: doc
generator() click to toggle source
# File lib/site-inspector/checks/content.rb, line 43
def generator
  @generator ||= begin
    tag = document.at('meta[name="generator"]')
    tag['content'] if tag
  end
end
humans_txt?() click to toggle source
# File lib/site-inspector/checks/content.rb, line 35
def humans_txt?
  @humans_txt ||= path_exists?('humans.txt') if proper_404s?
end
path_exists?(path) click to toggle source

Given a path (e.g, “/data”), check if the given path exists on the canonical endpoint

# File lib/site-inspector/checks/content.rb, line 7
def path_exists?(path)
  endpoint.up? && endpoint.request(path: path, followlocation: true).success?
end
prefetch() click to toggle source
# File lib/site-inspector/checks/content.rb, line 50
def prefetch
  return unless endpoint.up?

  options = SiteInspector.typhoeus_defaults.merge(followlocation: true)
  ['robots.txt', 'sitemap.xml', 'humans.txt', random_path].each do |path|
    request = Typhoeus::Request.new(URI.join(endpoint.uri, path), options)
    SiteInspector.hydra.queue(request)
  end
  SiteInspector.hydra.run
end
proper_404s?() click to toggle source
# File lib/site-inspector/checks/content.rb, line 61
def proper_404s?
  @proper_404s ||= !path_exists?(random_path)
end
response() click to toggle source

The default Check#response method is from a HEAD request The content check has a special response which includes the body from a GET request

# File lib/site-inspector/checks/content.rb, line 13
def response
  @response ||= endpoint.request(method: :get)
end
robots_txt?() click to toggle source
# File lib/site-inspector/checks/content.rb, line 27
def robots_txt?
  @bodts_txt ||= path_exists?('robots.txt') if proper_404s?
end
sitemap_xml?() click to toggle source
# File lib/site-inspector/checks/content.rb, line 31
def sitemap_xml?
  @sitemap_xml ||= path_exists?('sitemap.xml') if proper_404s?
end
to_h() click to toggle source
# File lib/site-inspector/checks/content.rb, line 65
def to_h
  prefetch
  {
    doctype: doctype,
    generator: generator,
    sitemap_xml: sitemap_xml?,
    robots_txt: robots_txt?,
    humans_txt: humans_txt?,
    proper_404s: proper_404s?
  }
end

Private Instance Methods

random_path() click to toggle source
# File lib/site-inspector/checks/content.rb, line 79
def random_path
  require 'securerandom'
  @random_path ||= SecureRandom.hex
end