class SiteChecker::LinkCollector

Attributes

ignore_list[RW]
max_recursion_depth[RW]
visit_references[RW]

Public Class Methods

new() { |self| ... } click to toggle source
# File lib/site_checker/link_collector.rb, line 5
def initialize
  yield self if block_given?
  @ignore_list ||= []
  @visit_references ||= false
  @max_recursion_depth ||= -1
end

Public Instance Methods

check(url, root=nil) click to toggle source
# File lib/site_checker/link_collector.rb, line 12
def check(url, root=nil)
  @links = {}
  @recursion_depth = 0
  @root = figure_out_root(url,root)

  @content_reader = get_content_reader

  link = Link.create({:url => url, :kind => :page, :location => :local})
  register_visit(link)
  process_local_page(link)
  evaluate_anchors
end
local_images() click to toggle source
# File lib/site_checker/link_collector.rb, line 33
def local_images
  get_urls(:local, :image)
end
local_pages() click to toggle source
# File lib/site_checker/link_collector.rb, line 25
def local_pages
  get_urls(:local, :page)
end
problems() click to toggle source
# File lib/site_checker/link_collector.rb, line 41
def problems
  problems = {}
  @links.keys.each do |link|
    if link.has_problem?
      problems[link.parent_url] ||= []
      problems[link.parent_url] << "#{link.url} #{link.problem}"
    end
  end
  problems
end
remote_images() click to toggle source
# File lib/site_checker/link_collector.rb, line 37
def remote_images
  get_urls(:remote, :image)
end
remote_pages() click to toggle source
# File lib/site_checker/link_collector.rb, line 29
def remote_pages
  get_urls(:remote, :page)
end

Private Instance Methods

evaluate_anchors() click to toggle source
# File lib/site_checker/link_collector.rb, line 142
def evaluate_anchors
  anchors = @links.keys.find_all {|link| link.anchor?}
  anchor_references = @links.keys.find_all {|link| link.anchor_ref?}
  anchor_references.each do |anchor_ref|
    if find_matching_anchor(anchors, anchor_ref).empty?
      anchor_ref.problem = "(404 Not Found)"
    end
  end
end
figure_out_root(url, root) click to toggle source
# File lib/site_checker/link_collector.rb, line 53
def figure_out_root(url, root)
  unless root
    url_uri = URI(url)
    if url_uri.absolute?
      root = "#{url_uri.scheme}://#{url_uri.host}"
    else
      root = url
    end
  end
  root
end
find_matching_anchor(anchors, anchor_ref) click to toggle source
# File lib/site_checker/link_collector.rb, line 152
def find_matching_anchor(anchors, anchor_ref)
  result = []
  anchors.each do |anchor|
    if (anchor.parent_url == anchor_ref.parent_url &&
          anchor_ref.url == "##{anchor.url}") ||
        (anchor.parent_url != anchor_ref.parent_url &&
          anchor_ref.url == "#{anchor.parent_url}##{anchor.url}")
      result << anchor
    end
  end
  result
end
get_content_reader() click to toggle source
# File lib/site_checker/link_collector.rb, line 65
def get_content_reader
  if URI(@root).absolute?
    SiteChecker::IO::ContentFromWeb.new(@visit_references, @root)
  else
    SiteChecker::IO::ContentFromFileSystem.new(@visit_references, @root)
  end
end
get_urls(location, kind) click to toggle source
# File lib/site_checker/link_collector.rb, line 73
def get_urls(location, kind)
  @links.keys.find_all do |link|
    if link.location == location && link.kind == kind
      link
    end
  end.map do |link|
    link.url
  end
end
open_reference(link) click to toggle source
# File lib/site_checker/link_collector.rb, line 117
def open_reference(link)
  content = nil
  begin
    content = @content_reader.get(link)
  rescue => e
    link.problem = "#{e.message.strip}"
  end
  content
end
process_local_page(parent) click to toggle source
# File lib/site_checker/link_collector.rb, line 83
def process_local_page(parent)
  links = collect_links(parent)

  links.each do |link|
    link.parent_url = parent.url
    unless link.anchor_related?
      visit(link) unless visited?(link)
    else
      @links[link] = nil
    end
  end
end
register_visit(link) click to toggle source
# File lib/site_checker/link_collector.rb, line 96
def register_visit(link)
  @links[link] = nil unless visited?(link)
end
stop_recursion?() click to toggle source
# File lib/site_checker/link_collector.rb, line 132
def stop_recursion?
  if @max_recursion_depth == -1
    false
  elsif @max_recursion_depth > @recursion_depth
    false
  else
    true
  end
end
visit(link) click to toggle source
# File lib/site_checker/link_collector.rb, line 104
def visit(link)
  register_visit(link)
  unless link.local_page?
    open_reference(link)
  else
    unless stop_recursion?
      @recursion_depth += 1
      process_local_page(link)
      @recursion_depth -= 1
    end
  end
end
visited?(link) click to toggle source
# File lib/site_checker/link_collector.rb, line 100
def visited?(link)
  @links.has_key?(link)
end