class SiteChecker::Parse::Page
Public Class Methods
parse(content, ignore_list, root)
click to toggle source
# File lib/site_checker/parse/page.rb, line 4 def self.parse(content, ignore_list, root) links = [] page = Nokogiri(content) links.concat(get_links(page, ignore_list, root)) links.concat(get_images(page, ignore_list, root)) links.concat(get_anchors(page)) links.concat(local_pages_which_has_anchor_references(links, root)) links.uniq end
Private Class Methods
get_anchors(page)
click to toggle source
# File lib/site_checker/parse/page.rb, line 62 def self.get_anchors(page) anchors = [] page.xpath("//a").reject {|a| !a['id']}.each do |a| anchors << Link.create({:url => a['id'], :kind => :anchor}) end anchors end
get_images(page, ignore_list, root)
click to toggle source
# File lib/site_checker/parse/page.rb, line 30 def self.get_images(page, ignore_list, root) links = [] page.xpath("//img").reject {|img| ignored?(ignore_list, img['src'])}.each do |img| links << Link.create({:url => img['src'], :kind => :image}) end set_location(links, root) end
get_links(page, ignore_list, root)
click to toggle source
# File lib/site_checker/parse/page.rb, line 17 def self.get_links(page, ignore_list, root) links = [] page.xpath("//a").reject {|a| ignored?(ignore_list, a['href'])}.each do |a| if a['href'].match(/(.*)#.+/) && !URI($1).absolute? kind = :anchor_ref else kind = :page end links << Link.create({:url => a['href'], :kind => kind}) end set_location(links, root) end
ignored?(ignore_list, link)
click to toggle source
# File lib/site_checker/parse/page.rb, line 54 def self.ignored?(ignore_list, link) if link ignore_list.include? link else true end end
local_pages_which_has_anchor_references(links, root)
click to toggle source
# File lib/site_checker/parse/page.rb, line 70 def self.local_pages_which_has_anchor_references(links, root) new_links = [] links.find_all {|link| link.anchor_ref?}.each do |link| uri = URI(link.url) if link.url.match(/(.+)#/) new_links << Link.create({:url => $1, :kind => :page}) end end set_location(new_links, root) end
set_location(links, root)
click to toggle source
# File lib/site_checker/parse/page.rb, line 38 def self.set_location(links, root) links.each do |link| uri = URI(link.url) if uri.to_s.start_with?(root) link.problem = "(absolute path)" link.location = :local else if uri.absolute? link.location = :remote else link.location = :local end end end end