class Spieker::LinkScraper

Attributes

Public Class Methods

new(url, lang: 'en') click to toggle source
# File lib/spieker/link_scraper.rb, line 11
def initialize(url, lang: 'en')
  @url = URI.parse(url)
  @lang = lang
  Capybara.app_host = app_host

  Capybara.register_driver :tolq do |app|
    profile = Selenium::WebDriver::Firefox::Profile.new
    # Sorry internet, but we need the whitelisting
    profile['general.useragent.override'] = "Mozilla/5.0 (compatible; Googlebot TolqSpieker/#{Spieker::VERSION}; +http://www.tolq.com)"

    Capybara::Selenium::Driver.new(app, :profile => profile)
  end

  Capybara.current_driver = :tolq
end

Public Instance Methods

app_host() click to toggle source
# File lib/spieker/link_scraper.rb, line 31
def app_host
  "#{@url.scheme}://#{@url.hostname}"
end
result() click to toggle source
# File lib/spieker/link_scraper.rb, line 27
def result
  cleaned_up_links(found_links)
end

Private Instance Methods

filter_hash(link) click to toggle source
# File lib/spieker/link_scraper.rb, line 76
def filter_hash(link)
  if match = link.match(/(.*)#(.*)$/)
    match[1]
  else
    link
  end
end