class Killshot::Crawler
Attributes
root[R]
whitelist[R]
Public Class Methods
new(root, whitelist)
click to toggle source
# File lib/killshot.rb, line 12 def initialize(root, whitelist) @root = root @whitelist = Set.new(whitelist) end
Public Instance Methods
crawl(&block)
click to toggle source
# File lib/killshot.rb, line 17 def crawl(&block) Anemone.crawl(root) do |anemone| anemone.on_every_page do |page| find_hotlinks(page) do |url, hotlink| block.call(url, hotlink) end end end end
Private Instance Methods
find_hotlinks(page, &block)
click to toggle source
# File lib/killshot.rb, line 35 def find_hotlinks(page, &block) doc = Nokogiri::HTML(page.body) doc.xpath("//img").each do |img| block.call(page.url.to_s, img['src']) if hotlink?(img['src']) end end
hotlink?(imgsrc)
click to toggle source
# File lib/killshot.rb, line 29 def hotlink?(imgsrc) uri = URI(URI::escape(imgsrc)) # Check if absolute, ignore relative links uri.absolute? && !whitelist.member?(uri.host) end