module Spidr::Body
Public Instance Methods
at(*arguments)
click to toggle source
Searches for the first occurrence an XPath or CSS Path expression.
@return [Nokogiri::HTML::Node, Nokogiri::XML::Node, nil]
The first matched node. Returns `nil` if no nodes could be matched, or if the page is not a HTML or XML document.
@example
page.at('//title')
@see nokogiri.rubyforge.org/nokogiri/Nokogiri/XML/Node.html#M000251
# File lib/spidr/body.rb, line 75 def at(*arguments) if doc doc.at(*arguments) end end
Also aliased as: %
body()
click to toggle source
The body of the response.
@return [String]
The body of the response.
# File lib/spidr/body.rb, line 11 def body (response.body || '') end
Also aliased as: to_s
doc()
click to toggle source
Returns a parsed document object for HTML, XML, RSS and Atom pages.
@return [Nokogiri::HTML::Document, Nokogiri::XML::Document, nil]
The document that represents HTML or XML pages. Returns `nil` if the page is neither HTML, XML, RSS, Atom or if the page could not be parsed properly.
@see nokogiri.rubyforge.org/nokogiri/Nokogiri/XML/Document.html @see nokogiri.rubyforge.org/nokogiri/Nokogiri/HTML/Document.html
# File lib/spidr/body.rb, line 26 def doc unless body.empty? begin if html? @doc ||= Nokogiri::HTML(body, @url.to_s, content_charset) elsif (rss? || atom? || xml? || xsl?) @doc ||= Nokogiri::XML(body, @url.to_s, content_charset) end rescue end end end
search(*paths)
click to toggle source
Searches the document for XPath or CSS Path paths.
@param [Array<String>] paths
CSS or XPath expressions to search the document with.
@return [Array]
The matched nodes from the document. Returns an empty Array if no nodes were matched, or if the page is not an HTML or XML document.
@example
page.search('//a[@href]')
@see nokogiri.rubyforge.org/nokogiri/Nokogiri/XML/Node.html#M000239
# File lib/spidr/body.rb, line 55 def search(*paths) if doc doc.search(*paths) else [] end end
Also aliased as: /
title()
click to toggle source
The title of the HTML page.
@return [String]
The inner-text of the title element of the page.
# File lib/spidr/body.rb, line 90 def title if (node = at('//title')) node.inner_text end end