class BadLinkFinder::Page
Attributes
path[R]
Public Class Methods
new(mirror_dir, path)
click to toggle source
# File lib/bad_link_finder/page.rb, line 5 def initialize(mirror_dir, path) @path = strip_html_ending(path) File.open(mirror_dir + path) do |file| @doc = Nokogiri::HTML(file.read) end end
Public Instance Methods
id()
click to toggle source
# File lib/bad_link_finder/page.rb, line 21 def id @id ||= begin if (article = @doc.xpath('(//article[not(ancestor::article)])').first) article['id'] end end end
links()
click to toggle source
# File lib/bad_link_finder/page.rb, line 15 def links @links ||= @doc.css('a').map do |a| strip_html_ending(a['href']) unless ignore_link?(a['href']) end.compact end
Protected Instance Methods
ignore_link?(href)
click to toggle source
# File lib/bad_link_finder/page.rb, line 39 def ignore_link?(href) href.nil? || href.start_with?('#', 'mailto:') end
strip_html_ending(href)
click to toggle source
# File lib/bad_link_finder/page.rb, line 31 def strip_html_ending(href) if href.start_with?('http') href else href.sub(%r{(?<!\?)(?:index\.html|\.html)(.*)}, '\1') end end