module Spidr::Events

The {Events} module adds methods to {Agent} for registering callbacks which will receive URLs, links, headers and pages, when they are visited.

Public Instance Methods

all_headers() { |headers| ... } click to toggle source

Pass the headers from every response the agent receives to a given block.

@yield [headers]

The block will be passed the headers of every response.

@yieldparam [Hash] headers

The headers from a response.
# File lib/spidr/events.rb, line 73
def all_headers
  every_page { |page| yield page.headers }
end
every_atom_doc() { |doc| ... } click to toggle source

Pass every Atom document that the agent parses to a given block.

@yield [doc]

The block will be passed every Atom document parsed.

@yieldparam [Nokogiri::XML::Document] doc

A parsed XML document.

@see nokogiri.rubyforge.org/nokogiri/Nokogiri/XML/Document.html

# File lib/spidr/events.rb, line 392
def every_atom_doc
  every_page do |page|
    if (block_given? && page.atom?)
      if (doc = page.doc)
        yield doc
      end
    end
  end
end
every_atom_page() { |page| ... } click to toggle source

Pass every Atom feed that the agent visits to a given block.

@yield [feed]

The block will be passed every Atom feed visited.

@yieldparam [Page] feed

A visited page.
# File lib/spidr/events.rb, line 456
def every_atom_page
  every_page do |page|
    yield page if (block_given? && page.atom?)
  end
end
every_bad_request_page() { |page| ... } click to toggle source

Pass every Bad Request page that the agent visits to a given block.

@yield [page]

The block will be passed every Bad Request page visited.

@yieldparam [Page] page

A visited page.
# File lib/spidr/events.rb, line 145
def every_bad_request_page
  every_page do |page|
    yield page if (block_given? && page.bad_request?)
  end
end
every_css_page() { |page| ... } click to toggle source

Pass every CSS page that the agent visits to a given block.

@yield [page]

The block will be passed every CSS page visited.

@yieldparam [Page] page

A visited page.
# File lib/spidr/events.rb, line 426
def every_css_page
  every_page do |page|
    yield page if (block_given? && page.css?)
  end
end
every_doc() { |doc| ... } click to toggle source

Pass every HTML or XML document that the agent parses to a given block.

@yield [doc]

The block will be passed every HTML or XML document parsed.

@yieldparam [Nokogiri::HTML::Document, Nokogiri::XML::Document] doc

A parsed HTML or XML document.

@see nokogiri.rubyforge.org/nokogiri/Nokogiri/XML/Document.html @see nokogiri.rubyforge.org/nokogiri/Nokogiri/HTML/Document.html

# File lib/spidr/events.rb, line 286
def every_doc
  every_page do |page|
    if block_given?
      if (doc = page.doc)
        yield doc
      end
    end
  end
end
every_failed_url(&block) click to toggle source

Pass each URL that could not be requested to the given block.

@yield [url]

The block will be passed every URL that could not be requested.

@yieldparam [URI::HTTP] url

A failed URL.
# File lib/spidr/events.rb, line 31
def every_failed_url(&block)
  @every_failed_url_blocks << block
  return self
end
every_forbidden_page() { |page| ... } click to toggle source

Pass every Forbidden page that the agent visits to a given block.

@yield [page]

The block will be passed every Forbidden page visited.

@yieldparam [Page] page

A visited page.
# File lib/spidr/events.rb, line 175
def every_forbidden_page
  every_page do |page|
    yield page if (block_given? && page.forbidden?)
  end
end
every_html_doc() { |doc| ... } click to toggle source

Pass every HTML document that the agent parses to a given block.

@yield [doc]

The block will be passed every HTML document parsed.

@yieldparam [Nokogiri::HTML::Document] doc

A parsed HTML document.

@see nokogiri.rubyforge.org/nokogiri/Nokogiri/HTML/Document.html

# File lib/spidr/events.rb, line 307
def every_html_doc
  every_page do |page|
    if (block_given? && page.html?)
      if (doc = page.doc)
        yield doc
      end
    end
  end
end
every_html_page() { |page| ... } click to toggle source

Pass every HTML page that the agent visits to a given block.

@yield [page]

The block will be passed every HTML page visited.

@yieldparam [Page] page

A visited page.
# File lib/spidr/events.rb, line 236
def every_html_page
  every_page do |page|
    yield page if (block_given? && page.html?)
  end
end
every_internal_server_error_page() { |page| ... } click to toggle source

Pass every Internal Server Error page that the agent visits to a given block.

@yield [page]

The block will be passed every Internal Server Error page visited.

@yieldparam [Page] page

A visited page.
# File lib/spidr/events.rb, line 206
def every_internal_server_error_page
  every_page do |page|
    yield page if (block_given? && page.had_internal_server_error?)
  end
end
every_javascript_page() { |page| ... } click to toggle source

Pass every JavaScript page that the agent visits to a given block.

@yield [page]

The block will be passed every JavaScript page visited.

@yieldparam [Page] page

A visited page.
# File lib/spidr/events.rb, line 411
def every_javascript_page
  every_page do |page|
    yield page if (block_given? && page.javascript?)
  end
end
every_missing_page() { |page| ... } click to toggle source

Pass every Missing page that the agent visits to a given block.

@yield [page]

The block will be passed every Missing page visited.

@yieldparam [Page] page

A visited page.
# File lib/spidr/events.rb, line 190
def every_missing_page
  every_page do |page|
    yield page if (block_given? && page.missing?)
  end
end
every_ms_word_page() { |page| ... } click to toggle source

Pass every MS Word page that the agent visits to a given block.

@yield [page]

The block will be passed every MS Word page visited.

@yieldparam [Page] page

A visited page.
# File lib/spidr/events.rb, line 471
def every_ms_word_page
  every_page do |page|
    yield page if (block_given? && page.ms_word?)
  end
end
every_ok_page() { |page| ... } click to toggle source

Pass every OK page that the agent visits to a given block.

@yield [page]

The block will be passed every OK page visited.

@yieldparam [Page] page

A visited page.
# File lib/spidr/events.rb, line 100
def every_ok_page
  every_page do |page|
    yield page if (block_given? && page.ok?)
  end
end
every_page(&block) click to toggle source

Pass every page that the agent visits to a given block.

@yield [page]

The block will be passed every page visited.

@yieldparam [Page] page

A visited page.
# File lib/spidr/events.rb, line 86
def every_page(&block)
  @every_page_blocks << block
  return self
end
every_pdf_page() { |page| ... } click to toggle source

Pass every PDF page that the agent visits to a given block.

@yield [page]

The block will be passed every PDF page visited.

@yieldparam [Page] page

A visited page.
# File lib/spidr/events.rb, line 486
def every_pdf_page
  every_page do |page|
    yield page if (block_given? && page.pdf?)
  end
end
every_redirect_page() { |page| ... } click to toggle source

Pass every Redirect page that the agent visits to a given block.

@yield [page]

The block will be passed every Redirect page visited.

@yieldparam [Page] page

A visited page.
# File lib/spidr/events.rb, line 115
def every_redirect_page
  every_page do |page|
    yield page if (block_given? && page.redirect?)
  end
end
every_rss_doc() { |doc| ... } click to toggle source

Pass every RSS document that the agent parses to a given block.

@yield [doc]

The block will be passed every RSS document parsed.

@yieldparam [Nokogiri::XML::Document] doc

A parsed XML document.

@see nokogiri.rubyforge.org/nokogiri/Nokogiri/XML/Document.html

# File lib/spidr/events.rb, line 371
def every_rss_doc
  every_page do |page|
    if (block_given? && page.rss?)
      if (doc = page.doc)
        yield doc
      end
    end
  end
end
every_rss_page() { |page| ... } click to toggle source

Pass every RSS feed that the agent visits to a given block.

@yield [feed]

The block will be passed every RSS feed visited.

@yieldparam [Page] feed

A visited page.
# File lib/spidr/events.rb, line 441
def every_rss_page
  every_page do |page|
    yield page if (block_given? && page.rss?)
  end
end
every_timedout_page() { |page| ... } click to toggle source

Pass every Timeout page that the agent visits to a given block.

@yield [page]

The block will be passed every Timeout page visited.

@yieldparam [Page] page

A visited page.
# File lib/spidr/events.rb, line 130
def every_timedout_page
  every_page do |page|
    yield page if (block_given? && page.timedout?)
  end
end
every_txt_page() { |page| ... } click to toggle source

Pass every Plain Text page that the agent visits to a given block.

@yield [page]

The block will be passed every Plain Text page visited.

@yieldparam [Page] page

A visited page.
# File lib/spidr/events.rb, line 221
def every_txt_page
  every_page do |page|
    yield page if (block_given? && page.txt?)
  end
end
every_unauthorized_page() { |page| ... } click to toggle source

Pass every Unauthorized page that the agent visits to a given block.

@yield [page]

The block will be passed every Unauthorized page visited.

@yieldparam [Page] page

A visited page.
# File lib/spidr/events.rb, line 160
def every_unauthorized_page
  every_page do |page|
    yield page if (block_given? && page.unauthorized?)
  end
end
every_url(&block) click to toggle source

Pass each URL from each page visited to the given block.

@yield [url]

The block will be passed every URL from every page visited.

@yieldparam [URI::HTTP] url

Each URL from each page visited.
# File lib/spidr/events.rb, line 17
def every_url(&block)
  @every_url_blocks << block
  return self
end
every_url_like(pattern,&block) click to toggle source

Pass every URL that the agent visits, and matches a given pattern, to a given block.

@param [Regexp, String] pattern

The pattern to match URLs with.

@yield [url]

The block will be passed every URL that matches the given pattern.

@yieldparam [URI::HTTP] url

A matching URL.

@since 0.3.2

# File lib/spidr/events.rb, line 51
def every_url_like(pattern,&block)
  @every_url_like_blocks[pattern] << block
  return self
end
every_xml_doc() { |doc| ... } click to toggle source

Pass every XML document that the agent parses to a given block.

@yield [doc]

The block will be passed every XML document parsed.

@yieldparam [Nokogiri::XML::Document] doc

A parsed XML document.

@see nokogiri.rubyforge.org/nokogiri/Nokogiri/XML/Document.html

# File lib/spidr/events.rb, line 328
def every_xml_doc
  every_page do |page|
    if (block_given? && page.xml?)
      if (doc = page.doc)
        yield doc
      end
    end
  end
end
every_xml_page() { |page| ... } click to toggle source

Pass every XML page that the agent visits to a given block.

@yield [page]

The block will be passed every XML page visited.

@yieldparam [Page] page

A visited page.
# File lib/spidr/events.rb, line 251
def every_xml_page
  every_page do |page|
    yield page if (block_given? && page.xml?)
  end
end
every_xsl_doc() { |doc| ... } click to toggle source

Pass every XML Stylesheet (XSL) that the agent parses to a given block.

@yield [doc]

The block will be passed every XSL Stylesheet (XSL) parsed.

@yieldparam [Nokogiri::XML::Document] doc

A parsed XML document.

@see nokogiri.rubyforge.org/nokogiri/Nokogiri/XML/Document.html

# File lib/spidr/events.rb, line 350
def every_xsl_doc
  every_page do |page|
    if (block_given? && page.xsl?)
      if (doc = page.doc)
        yield doc
      end
    end
  end
end
every_xsl_page() { |page| ... } click to toggle source

Pass every XML Stylesheet (XSL) page that the agent visits to a given block.

@yield [page]

The block will be passed every XML Stylesheet (XSL) page visited.

@yieldparam [Page] page

A visited page.
# File lib/spidr/events.rb, line 267
def every_xsl_page
  every_page do |page|
    yield page if (block_given? && page.xsl?)
  end
end
every_zip_page() { |page| ... } click to toggle source

Pass every ZIP page that the agent visits to a given block.

@yield [page]

The block will be passed every ZIP page visited.

@yieldparam [Page] page

A visited page.
# File lib/spidr/events.rb, line 501
def every_zip_page
  every_page do |page|
    yield page if (block_given? && page.zip?)
  end
end
urls_like(pattern,&block) click to toggle source

@see every_url_like

# File lib/spidr/events.rb, line 59
def urls_like(pattern,&block)
  every_url_like(pattern,&block)
end

Protected Instance Methods

initialize_events(options={}) click to toggle source
# File lib/spidr/events.rb, line 528
def initialize_events(options={})
  @every_url_blocks        = []
  @every_failed_url_blocks = []
  @every_url_like_blocks   = Hash.new { |hash,key| hash[key] = [] }

  @every_page_blocks = []
  @every_link_blocks = []
end