class SiteChecker::LinkCollector
Attributes
ignore_list[RW]
max_recursion_depth[RW]
visit_references[RW]
Public Class Methods
new() { |self| ... }
click to toggle source
# File lib/site_checker/link_collector.rb, line 5 def initialize yield self if block_given? @ignore_list ||= [] @visit_references ||= false @max_recursion_depth ||= -1 end
Public Instance Methods
check(url, root=nil)
click to toggle source
# File lib/site_checker/link_collector.rb, line 12 def check(url, root=nil) @links = {} @recursion_depth = 0 @root = figure_out_root(url,root) @content_reader = get_content_reader link = Link.create({:url => url, :kind => :page, :location => :local}) register_visit(link) process_local_page(link) evaluate_anchors end
local_images()
click to toggle source
# File lib/site_checker/link_collector.rb, line 33 def local_images get_urls(:local, :image) end
local_pages()
click to toggle source
# File lib/site_checker/link_collector.rb, line 25 def local_pages get_urls(:local, :page) end
problems()
click to toggle source
# File lib/site_checker/link_collector.rb, line 41 def problems problems = {} @links.keys.each do |link| if link.has_problem? problems[link.parent_url] ||= [] problems[link.parent_url] << "#{link.url} #{link.problem}" end end problems end
remote_images()
click to toggle source
# File lib/site_checker/link_collector.rb, line 37 def remote_images get_urls(:remote, :image) end
remote_pages()
click to toggle source
# File lib/site_checker/link_collector.rb, line 29 def remote_pages get_urls(:remote, :page) end
Private Instance Methods
collect_links(link)
click to toggle source
# File lib/site_checker/link_collector.rb, line 127 def collect_links(link) content = open_reference(link) return SiteChecker::Parse::Page.parse(content, @ignore_list, @root) end
evaluate_anchors()
click to toggle source
# File lib/site_checker/link_collector.rb, line 142 def evaluate_anchors anchors = @links.keys.find_all {|link| link.anchor?} anchor_references = @links.keys.find_all {|link| link.anchor_ref?} anchor_references.each do |anchor_ref| if find_matching_anchor(anchors, anchor_ref).empty? anchor_ref.problem = "(404 Not Found)" end end end
figure_out_root(url, root)
click to toggle source
# File lib/site_checker/link_collector.rb, line 53 def figure_out_root(url, root) unless root url_uri = URI(url) if url_uri.absolute? root = "#{url_uri.scheme}://#{url_uri.host}" else root = url end end root end
find_matching_anchor(anchors, anchor_ref)
click to toggle source
# File lib/site_checker/link_collector.rb, line 152 def find_matching_anchor(anchors, anchor_ref) result = [] anchors.each do |anchor| if (anchor.parent_url == anchor_ref.parent_url && anchor_ref.url == "##{anchor.url}") || (anchor.parent_url != anchor_ref.parent_url && anchor_ref.url == "#{anchor.parent_url}##{anchor.url}") result << anchor end end result end
get_content_reader()
click to toggle source
# File lib/site_checker/link_collector.rb, line 65 def get_content_reader if URI(@root).absolute? SiteChecker::IO::ContentFromWeb.new(@visit_references, @root) else SiteChecker::IO::ContentFromFileSystem.new(@visit_references, @root) end end
get_urls(location, kind)
click to toggle source
# File lib/site_checker/link_collector.rb, line 73 def get_urls(location, kind) @links.keys.find_all do |link| if link.location == location && link.kind == kind link end end.map do |link| link.url end end
open_reference(link)
click to toggle source
# File lib/site_checker/link_collector.rb, line 117 def open_reference(link) content = nil begin content = @content_reader.get(link) rescue => e link.problem = "#{e.message.strip}" end content end
process_local_page(parent)
click to toggle source
# File lib/site_checker/link_collector.rb, line 83 def process_local_page(parent) links = collect_links(parent) links.each do |link| link.parent_url = parent.url unless link.anchor_related? visit(link) unless visited?(link) else @links[link] = nil end end end
register_visit(link)
click to toggle source
# File lib/site_checker/link_collector.rb, line 96 def register_visit(link) @links[link] = nil unless visited?(link) end
stop_recursion?()
click to toggle source
# File lib/site_checker/link_collector.rb, line 132 def stop_recursion? if @max_recursion_depth == -1 false elsif @max_recursion_depth > @recursion_depth false else true end end
visit(link)
click to toggle source
# File lib/site_checker/link_collector.rb, line 104 def visit(link) register_visit(link) unless link.local_page? open_reference(link) else unless stop_recursion? @recursion_depth += 1 process_local_page(link) @recursion_depth -= 1 end end end
visited?(link)
click to toggle source
# File lib/site_checker/link_collector.rb, line 100 def visited?(link) @links.has_key?(link) end