class SiteChecker::Parse::Page

Public Class Methods

parse(content, ignore_list, root) click to toggle source
# File lib/site_checker/parse/page.rb, line 4
def self.parse(content, ignore_list, root)
  links = []
  page = Nokogiri(content)

  links.concat(get_links(page, ignore_list, root))
  links.concat(get_images(page, ignore_list, root))
  links.concat(get_anchors(page))
  links.concat(local_pages_which_has_anchor_references(links, root))

  links.uniq
end

Private Class Methods

get_anchors(page) click to toggle source
# File lib/site_checker/parse/page.rb, line 62
def self.get_anchors(page)
  anchors = []
  page.xpath("//a").reject {|a| !a['id']}.each do |a|
    anchors << Link.create({:url => a['id'], :kind => :anchor})
  end
  anchors
end
get_images(page, ignore_list, root) click to toggle source
# File lib/site_checker/parse/page.rb, line 30
def self.get_images(page, ignore_list, root)
  links = []
  page.xpath("//img").reject {|img| ignored?(ignore_list, img['src'])}.each do |img|
    links << Link.create({:url => img['src'], :kind => :image})
  end
  set_location(links, root)
end
ignored?(ignore_list, link) click to toggle source
# File lib/site_checker/parse/page.rb, line 54
def self.ignored?(ignore_list, link)
  if link
    ignore_list.include? link
  else
    true
  end
end
local_pages_which_has_anchor_references(links, root) click to toggle source
# File lib/site_checker/parse/page.rb, line 70
def self.local_pages_which_has_anchor_references(links, root)
  new_links = []
  links.find_all {|link| link.anchor_ref?}.each do |link|
    uri = URI(link.url)
    if link.url.match(/(.+)#/)
      new_links << Link.create({:url => $1, :kind => :page})
    end
  end
  set_location(new_links, root)
end
set_location(links, root) click to toggle source
# File lib/site_checker/parse/page.rb, line 38
def self.set_location(links, root)
  links.each do |link|
    uri = URI(link.url)
    if uri.to_s.start_with?(root)
      link.problem = "(absolute path)"
      link.location = :local
    else
      if uri.absolute?
        link.location = :remote
      else
        link.location = :local
      end
    end
  end
end