class CobwebLinks
CobwebLinks
processes links to determine whether they are internal or external links
Public Class Methods
new(options={})
click to toggle source
Initalise's internal and external patterns and sets up regular expressions
# File lib/cobweb_links.rb, line 6 def initialize(options={}) @options = options raise InternalUrlsMissingError, ":internal_urls is required" unless @options.has_key? :internal_urls raise InvalidUrlsError, ":internal_urls must be an array" unless @options[:internal_urls].kind_of? Array raise InvalidUrlsError, ":external_urls must be an array" unless !@options.has_key?(:external_urls) || @options[:external_urls].kind_of?(Array) @options[:external_urls] = [] unless @options.has_key? :external_urls @options[:debug] = false unless @options.has_key? :debug @internal_patterns = @options[:internal_urls].map{|pattern| Regexp.new("^#{Cobweb.escape_pattern_for_regex(pattern, @options)}")} @external_patterns = @options[:external_urls].map{|pattern| Regexp.new("^#{Cobweb.escape_pattern_for_regex(pattern, @options)}")} end
Public Instance Methods
allowed?(link)
click to toggle source
# File lib/cobweb_links.rb, line 20 def allowed?(link) if @options[:obey_robots] robot = Robots.new(:url => link, :user_agent => @options[:user_agent]) return robot.allowed?(link) else return true end end
external?(link)
click to toggle source
Returns true if the link is matched to an external_url or not matched to an internal_url
# File lib/cobweb_links.rb, line 35 def external?(link) !@internal_patterns.any?{|pattern| link.match(pattern)} || @external_patterns.any?{|pattern| link.match(pattern)} end
internal?(link)
click to toggle source
Returns true if the link is matched to an internal_url and not matched to an external_url
# File lib/cobweb_links.rb, line 30 def internal?(link) @internal_patterns.any?{|pattern| link.match(pattern)} && !@external_patterns.any?{|pattern| link.match(pattern)} end
matches_external?(link)
click to toggle source
# File lib/cobweb_links.rb, line 39 def matches_external?(link) @external_patterns.any?{|pattern| link.match(pattern)} end