class SiteMapper::Robots

Based on: rubygems.org/gems/robots, v0.10.1 Provided a base URL it checks whether a given URL is allowed to be crawled according to /robots.txt. @see rubygems.org/gems/robots

Public Class Methods

new(robots_txt, hostname, user_agent) click to toggle source

@param [String] robots_txt contents of /robots.txt @param [String] hostname for the passed robots_txt @param [String] user_agent to check

# File lib/site_mapper/robots.rb, line 124
def initialize(robots_txt, hostname, user_agent)
  @robots_txt = robots_txt
  @hostname   = hostname
  @user_agent = user_agent
  @parsed     = {}
end

Public Instance Methods

allowed?(uri) click to toggle source

@param [String, URI] uri String or URI to check @return [Boolean] true if uri is allowed to be crawled @example Check if www.google.com/googlesites is allowed to be crawled

robots = Robots.new('google.com', 'SiteMapper')
robots.allowed?('http://www.google.com/googlesites') # => false (as of 2014-10-22)
# File lib/site_mapper/robots.rb, line 136
def allowed?(uri)
  uri  = to_uri(uri)
  host = uri.host
  @parsed[host] ||= ParsedRobots.new(@robots_txt, @user_agent)
  @parsed[host].allowed?(uri, @user_agent)
end
other_values() click to toggle source

@param [String, URI] uri String or URI get other_values from @return [Hash] key/value pairs from robots.txt @example Get other values for google.com

robots = Robots.new('google.com', 'SiteMapper')
robots.other_values
# File lib/site_mapper/robots.rb, line 158
def other_values
  host = @hostname
  @parsed[host] ||= ParsedRobots.new(@robots_txt, @user_agent)
  @parsed[host].other_values
end
sitemaps() click to toggle source

@return [Array] array of sitemaps defined in robots.txt @example Get sitemap for google.com

robots = Robots.new('google.com', 'SiteMapper')
robots.sitemaps
# File lib/site_mapper/robots.rb, line 147
def sitemaps
  host = @hostname
  @parsed[host] ||= ParsedRobots.new(@robots_txt, @user_agent)
  @parsed[host].sitemaps
end

Private Instance Methods

to_uri(uri) click to toggle source
# File lib/site_mapper/robots.rb, line 166
def to_uri(uri)
  uri = URI.parse(uri.to_s) unless uri.is_a?(URI)
  uri
end