class BadLinkFinder::Link
Attributes
error_message[R]
exception[R]
link[R]
url[R]
Public Class Methods
new(page_url, link, logger = BadLinkFinder::NullLogger.new)
click to toggle source
# File lib/bad_link_finder/link.rb, line 8 def initialize(page_url, link, logger = BadLinkFinder::NullLogger.new) @logger = logger @page_url = page_url @link = link @url = get_url_from_link(link) verify_url(@url) validate_with_request rescue URI::InvalidURIError => exception record_error("This link is in a bad format", exception) rescue Mechanize::ResponseCodeError => exception if [405, 500].include?(exception.response_code.to_i) && !@head_unsupported @head_unsupported = true retry else record_error("This request returned a #{exception.response_code}", exception) end rescue Mechanize::UnauthorizedError => exception record_error("This link requires authorisation", exception) rescue Mechanize::UnsupportedSchemeError => exception record_error("This link has a scheme we can't load (should be http or https)", exception) rescue Mechanize::RedirectLimitReachedError => exception record_error("This link might be in a redirect loop", exception) rescue Mechanize::RobotsDisallowedError => exception record_error("This link is blocked by robots.txt or nofollow attributes", exception) rescue Mechanize::Error, Net::HTTP::Persistent::Error, Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, Errno::ETIMEDOUT, EOFError, Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError, Net::ProtocolError, OpenSSL::SSL::SSLError, SocketError => exception # Thanks Net::HTTP record_error("The server failed to serve this page properly", exception) rescue Exception => exception record_error("Some other exception happened", exception) end
Public Instance Methods
valid?()
click to toggle source
# File lib/bad_link_finder/link.rb, line 42 def valid? @error_message.nil? end
Protected Instance Methods
get_url_from_link(link)
click to toggle source
# File lib/bad_link_finder/link.rb, line 66 def get_url_from_link(link) URI.join(@page_url, link).to_s end
record_error(message, exception = nil)
click to toggle source
# File lib/bad_link_finder/link.rb, line 76 def record_error(message, exception = nil) @error_message = message @exception = exception @logger.info "---- found broken link #{@url}: #{message}: #{exception.message if exception}" end
validate_with_request()
click to toggle source
# File lib/bad_link_finder/link.rb, line 48 def validate_with_request @logger.info "-- testing link #{@link} using #{@url}" sleep 0.1 # Recommended pause for gov.uk rate limiting browser = Mechanize.new browser.user_agent = 'GOV.UK link checker' browser.keep_alive = false browser.history.max_size = 0 browser.agent.http.verify_mode = OpenSSL::SSL::VERIFY_NONE browser.agent.open_timeout = 15 if @head_unsupported browser.get(@url) else browser.head(@url) end end
verify_url(url)
click to toggle source
# File lib/bad_link_finder/link.rb, line 70 def verify_url(url) if url.start_with?('http') raise URI::InvalidURIError.new("The URL #{url} should start with http:// or https://") unless url =~ %r{^https?://} end end