class Shelob::Extractor

Public Class Methods

new(fetched) click to toggle source
# File lib/extractor.rb, line 6
def initialize fetched
  @fetched = fetched
end

Public Instance Methods

extract() click to toggle source
# File lib/extractor.rb, line 10
def extract
  content = Nokogiri::HTML(@fetched.body)
  raw = content.css('a').map { |anchor| anchor['href'] }
  raw.reject(&:nil?).map do |link| 
    if link.start_with? '/' 
      u = URI(@fetched.url)
      "#{u.scheme}://#{u.host}#{link}"
    else
      link
    end
  end
end