class Blinkr::Extensions::Links
Public Class Methods
new(config)
click to toggle source
# File lib/blinkr/extensions/links.rb, line 10 def initialize(config) @config = config @links = {} end
Public Instance Methods
analyze(context, browser)
click to toggle source
# File lib/blinkr/extensions/links.rb, line 30 def analyze(context, browser) puts '----------------------' puts " #{@links.length} links to check " puts '----------------------' start = DateTime.now processed = 0 # Find the internal links @links.select{|k| k.start_with? @config.base_url}.each do |url, locations| # TODO figure out what to do about relative links link = URI.parse(url) # fix up links so they're proper, also drop fragments and queries as they won't be in the sitemap that way link.fragment = nil link.query = nil link.path = link.path.gsub(/\/+/, '/') if link.path unless context.pages.keys.include?(link.to_s) || context.pages.keys.include?((link.to_s + '/')) locations.each do |location| location[:page].errors << Blinkr::Error.new({:severity => :warning, :category => 'Resource missing from sitemap', :type => '<a href=""> target missing from sitemap', :url => url, :title => "#{url} (line #{location[:line]})", :code => nil, :message => 'Missing from sitemap', :detail => 'Checked with Typheous', :snippet => location[:snippet], :icon => 'fa-bookmark-o' }) # It wasn't in the sitemap, so we'll add it to the "external_links" to still be checked end end end @links.each do |url, metadata| # if link start_with? @config.base_url check to see if it's in the sitemap.xml browser.process(url, @config.max_retrys, :method => :get, :followlocation => true, :timeout => 60, :cookiefile => '_tmp/cookies', :cookiejar => '_tmp/cookies', :connecttimeout => 30, :maxredirs => 3) do |resp| puts "Loaded #{url} via #{browser.name} #{'(cached)' if resp.cached?}" if @config.verbose resp_code = resp.code.to_i if ((resp_code > 300 && resp_code < 400) && @config.warning_on_300s) || resp_code > 400 response = resp detail = nil if response.status_message.nil? message = response.return_message else message = response.status_message detail = response.return_message unless resp.return_message == 'No error' end severity = :danger if response.code.to_i >= 300 && response.code.to_i < 400 severity = :warning end metadata.each do |src| src[:page].errors << Blinkr::Error.new({:severity => severity, :category => 'Resources missing', :type => '<a href=""> target cannot be loaded', :url => url, :title => "#{url} (line #{src[:line]})", :code => response.code.to_i, :message => message, :detail => detail, :snippet => src[:snippet], :icon => 'fa-bookmark-o'}) unless response.success? end end processed += 1 puts "Processed #{processed} of #{@links.size}" if @config.verbose end end browser.hydra.run if browser.is_a? Blinkr::TyphoeusWrapper puts "Total time in links: #{(DateTime.now.to_time - start.to_time).duration}" if @config.verbose end
collect(page)
click to toggle source
# File lib/blinkr/extensions/links.rb, line 15 def collect(page) page.body.css('a[href]').each do |a| attr = a.attribute('href') src = page.response.effective_url url = attr.value unless @config.skipped?(url) url = sanitize url, src unless url.nil? @links[url] ||= [] @links[url] << {:page => page, :line => attr.line, :snippet => attr.parent.to_s} end end end end