module Spidr::Body

Public Instance Methods

%(*arguments)
Alias for: at
/(*paths)
Alias for: search
at(*arguments) click to toggle source

Searches for the first occurrence an XPath or CSS Path expression.

@return [Nokogiri::HTML::Node, Nokogiri::XML::Node, nil]

The first matched node. Returns `nil` if no nodes could be matched,
or if the page is not a HTML or XML document.

@example

page.at('//title')

@see nokogiri.rubyforge.org/nokogiri/Nokogiri/XML/Node.html#M000251

# File lib/spidr/body.rb, line 75
def at(*arguments)
  if doc
    doc.at(*arguments)
  end
end
Also aliased as: %
body() click to toggle source

The body of the response.

@return [String]

The body of the response.
# File lib/spidr/body.rb, line 11
def body
  (response.body || '')
end
Also aliased as: to_s
doc() click to toggle source

Returns a parsed document object for HTML, XML, RSS and Atom pages.

@return [Nokogiri::HTML::Document, Nokogiri::XML::Document, nil]

The document that represents HTML or XML pages.
Returns `nil` if the page is neither HTML, XML, RSS, Atom or if
the page could not be parsed properly.

@see nokogiri.rubyforge.org/nokogiri/Nokogiri/XML/Document.html @see nokogiri.rubyforge.org/nokogiri/Nokogiri/HTML/Document.html

# File lib/spidr/body.rb, line 26
def doc
  unless body.empty?
    begin
      if html?
        @doc ||= Nokogiri::HTML(body, @url.to_s, content_charset)
      elsif (rss? || atom? || xml? || xsl?)
        @doc ||= Nokogiri::XML(body, @url.to_s, content_charset)
      end
    rescue
    end
  end
end
title() click to toggle source

The title of the HTML page.

@return [String]

The inner-text of the title element of the page.
# File lib/spidr/body.rb, line 90
def title
  if (node = at('//title'))
    node.inner_text
  end
end
to_s()
Alias for: body