class Shelob::Extractor
Public Class Methods
new(fetched)
click to toggle source
# File lib/extractor.rb, line 6 def initialize fetched @fetched = fetched end
Public Instance Methods
extract()
click to toggle source
# File lib/extractor.rb, line 10 def extract content = Nokogiri::HTML(@fetched.body) raw = content.css('a').map { |anchor| anchor['href'] } raw.reject(&:nil?).map do |link| if link.start_with? '/' u = URI(@fetched.url) "#{u.scheme}://#{u.host}#{link}" else link end end end