class EmailCrawler::PageLinks

Constants

MAX_RETRIES
SLEEP_TIME

Public Class Methods

for(url, max_links: MAX_LINKS, logger: Logger.new("/dev/null")) click to toggle source
# File lib/email_crawler/page_links.rb, line 23
def self.for(url, max_links: MAX_LINKS, logger: Logger.new("/dev/null"))
  new(url, logger).fetch_links(max_links)
end
new(url, logger = Logger.new("/dev/null")) click to toggle source
# File lib/email_crawler/page_links.rb, line 9
def initialize(url, logger = Logger.new("/dev/null"))
  @url = url
  uri = begin
          URI(url)
        rescue; end
  scheme_and_host = if uri && uri.host
                      "#{uri.scheme}://#{uri.host}"
                    else
                      url[%r(\A(https?://([^/]+))), 1]
                    end
  @domain = Regexp.new("#{scheme_and_host}/", true)
  @logger = logger
end

Public Instance Methods