class WebRobots::RobotsTxt
Constants
- DISALLOW_ALL
Attributes
Public Class Methods
Source
# File lib/webrobots/robotstxt.rb, line 533 def initialize(site, records, options = nil) @timestamp = Time.now @site = site @options = options || {} @last_checked_at = nil @error = @options[:error] @target = @options[:target] @sitemaps = @options[:sitemaps] || [] @crawl_delay_handler = @options[:crawl_delay_handler] if records && !records.empty? @records, defaults = [], [] records.each { |record| if record.default? defaults << record elsif !@target || record.match?(@target) @records << record end } @records.concat(defaults) else @records = [] end end
Source
# File lib/webrobots/robotstxt.rb, line 610 def self.unfetchable(site, reason, target = nil) Parser.new(target).parse(DISALLOW_ALL, site).tap { |robots_txt| robots_txt.error = reason } end
Public Instance Methods
Source
# File lib/webrobots/robotstxt.rb, line 585 def allow?(request_uri, user_agent = nil) record = find_record(user_agent) or return true allow = record.allow?(request_uri) if delay = record.delay and @crawl_delay_handler @crawl_delay_handler.call(delay, @last_checked_at) end @last_checked_at = Time.now return allow end
Source
# File lib/webrobots/robotstxt.rb, line 595 def crawl_delay(user_agent = nil) record = find_record(user_agent) or return 0 record.delay or return 0 end
Source
# File lib/webrobots/robotstxt.rb, line 600 def options(user_agent = nil) record = find_record(user_agent) or return {} record.options end
Private Instance Methods
Source
# File lib/webrobots/robotstxt.rb, line 577 def find_record(user_agent = nil) user_agent = target(user_agent) @records.find { |record| record.match?(user_agent) } end
Source
# File lib/webrobots/robotstxt.rb, line 566 def target(user_agent = nil) if user_agent raise ArgumentError, "this instance is targeted for #{@target}" if @target user_agent else raise ArgumentError, "user_agent is mandatory for an untargeted instance" if !@target @target end end