class SiteInspector::Endpoint::Content
Public Instance Methods
body()
click to toggle source
# File lib/site-inspector/checks/content.rb, line 23 def body @body ||= document.to_s.force_encoding('UTF-8').encode('UTF-8', invalid: :replace, replace: '') end
doctype()
click to toggle source
# File lib/site-inspector/checks/content.rb, line 39 def doctype document.internal_subset.external_id end
document()
click to toggle source
# File lib/site-inspector/checks/content.rb, line 17 def document require 'nokogiri' @doc ||= Nokogiri::HTML response.body if response end
Also aliased as: doc
generator()
click to toggle source
# File lib/site-inspector/checks/content.rb, line 43 def generator @generator ||= begin tag = document.at('meta[name="generator"]') tag['content'] if tag end end
humans_txt?()
click to toggle source
# File lib/site-inspector/checks/content.rb, line 35 def humans_txt? @humans_txt ||= path_exists?('humans.txt') if proper_404s? end
path_exists?(path)
click to toggle source
Given a path (e.g, “/data”), check if the given path exists on the canonical endpoint
# File lib/site-inspector/checks/content.rb, line 7 def path_exists?(path) endpoint.up? && endpoint.request(path: path, followlocation: true).success? end
prefetch()
click to toggle source
# File lib/site-inspector/checks/content.rb, line 50 def prefetch return unless endpoint.up? options = SiteInspector.typhoeus_defaults.merge(followlocation: true) ['robots.txt', 'sitemap.xml', 'humans.txt', random_path].each do |path| request = Typhoeus::Request.new(URI.join(endpoint.uri, path), options) SiteInspector.hydra.queue(request) end SiteInspector.hydra.run end
proper_404s?()
click to toggle source
# File lib/site-inspector/checks/content.rb, line 61 def proper_404s? @proper_404s ||= !path_exists?(random_path) end
response()
click to toggle source
The default Check#response
method is from a HEAD request The content check has a special response which includes the body from a GET request
# File lib/site-inspector/checks/content.rb, line 13 def response @response ||= endpoint.request(method: :get) end
robots_txt?()
click to toggle source
# File lib/site-inspector/checks/content.rb, line 27 def robots_txt? @bodts_txt ||= path_exists?('robots.txt') if proper_404s? end
sitemap_xml?()
click to toggle source
# File lib/site-inspector/checks/content.rb, line 31 def sitemap_xml? @sitemap_xml ||= path_exists?('sitemap.xml') if proper_404s? end
to_h()
click to toggle source
# File lib/site-inspector/checks/content.rb, line 65 def to_h prefetch { doctype: doctype, generator: generator, sitemap_xml: sitemap_xml?, robots_txt: robots_txt?, humans_txt: humans_txt?, proper_404s: proper_404s? } end
Private Instance Methods
random_path()
click to toggle source
# File lib/site-inspector/checks/content.rb, line 79 def random_path require 'securerandom' @random_path ||= SecureRandom.hex end