class Rackful::Parser::XHTML
Parses XHTML
as generated by {Serializer::XHTML}.
Public Instance Methods
parse(response, resource)
click to toggle source
@see Parser#parse
# File lib/rackful/parser.rb, line 108 def parse response, resource # Try to find the actual content: content = self.document.root.xpath( '//html:div[@id="rackful-content"]', 'html' => 'http://www.w3.org/1999/xhtml' ) # There must be exactly one element <div id="rackful_content"/> in the document: if content.empty? raise HTTP400BadRequest, 'Couldn’t find div#rackful-content in request body.' end if content.length > 1 raise HTTP400BadRequest, 'Multiple instances of div#rackful-content found in request body.' end # Initialize @base_url: base_url = self.document.root.xpath( '/html:html/html:head/html:base', 'html' => 'http://www.w3.org/1999/xhtml' ) if base_url.empty? @base_url = self.request.canonical_uri.dup else @base_url = URI( base_url.first.attribute('href').text ).normalize if @base_url.relative? @base_url = self.request.canonical_uri + @base_url end end # Parse the f*cking thing: resource.to_rackful = self.parse_recursive content.first end
parse_object(node)
click to toggle source
@api private
# File lib/rackful/parser.rb, line 201 def parse_object node current_property = nil r = {} node.children.each do |child| if 'dt' == child.name && 'http://www.w3.org/1999/xhtml' == child.namespace.href if current_property raise HTTP400BadRequest, 'Can’t parse:<br/>' + Rack::Utils.escape_html(node.to_xml) end current_property = child.inner_text.strip.split(' ').join('_').to_sym elsif 'dd' == child.name && 'http://www.w3.org/1999/xhtml' == child.namespace.href unless current_property raise HTTP400BadRequest, 'Can’t parse:<br/>' + Rack::Utils.escape_html(node.to_xml) end r[current_property] = self.parse_recursive( child ) current_property = nil end end r end
parse_object_list(node)
click to toggle source
@api private
# File lib/rackful/parser.rb, line 225 def parse_object_list node properties = node.xpath( 'html:thead/html:tr/html:th', 'html' => 'http://www.w3.org/1999/xhtml' ).collect do |th| th.inner_text.strip.split(' ').join('_').to_sym end if properties.empty? raise HTTP400BadRequest, 'Can’t parse:<br/>' + Rack::Utils.escape_html(node.to_xml) end n = properties.length node.xpath( 'html:tbody/html:tr', 'html' => 'http://www.w3.org/1999/xhtml' ).collect do |row| values = row.xpath( 'html:td', 'html' => 'http://www.w3.org/1999/xhtml' ) unless values.length == n raise HTTP400BadRequest, 'Can’t parse:<br/>' + Rack::Utils.escape_html(row.to_xml) end object = {} Range.new(0,n-1).each do |i| object[properties[i]] = self.parse_recursive( values[i] ) end object end end
parse_recursive(node)
click to toggle source
@api private
# File lib/rackful/parser.rb, line 140 def parse_recursive node # A URI: if ( nodelist = node.xpath( 'html:a', 'html' => 'http://www.w3.org/1999/xhtml' ) ).length == 1 r = URI( nodelist.first.attribute('href').text ) r.relative? ? @base_url + r : r # An Object (AKA a Hash) elsif ( nodelist = node.xpath( 'html:dl', 'html' => 'http://www.w3.org/1999/xhtml' ) ).length == 1 self.parse_object nodelist.first # A list of Objects with identical keys: elsif ( nodelist = node.xpath( 'html:table', 'html' => 'http://www.w3.org/1999/xhtml' ) ).length == 1 self.parse_object_list nodelist.first # A list of things (AKA an Array): elsif ( nodelist = node.xpath( 'html:ul', 'html' => 'http://www.w3.org/1999/xhtml' ) ).length == 1 nodelist.first.xpath( 'html:li', 'html' => 'http://www.w3.org/1999/xhtml' ).collect do |n| self.parse_recursive n end # A simple type: elsif type = node.attribute_with_ns( 'type', 'http://www.w3.org/2001/XMLSchema' ) prefix, typename = type.text.split(':', 2) unless typename && 'http://www.w3.org/2001/XMLSchema' == node.namespaces["xmlns:#{prefix}"] raise HTTP400BadRequest, "Unknown XML Schema type: #{type}" end self.parse_simple_type node, typename else raise HTTP400BadRequest, 'Can’t parse:<br/>' + Rack::Utils.escape_html(node.to_xml) end end
parse_simple_type(node, typename)
click to toggle source
@api private
# File lib/rackful/parser.rb, line 176 def parse_simple_type node, typename case typename when 'boolean' case node.inner_text.strip when 'true' then true when 'false' then false else nil end when 'integer' node.inner_text.strip.to_i when 'numeric' node.inner_text.strip.to_f when 'dateTime' Time.xmlschema(node.inner_text.strip) when 'base64Binary' Base64.decode64(node.inner_text) when 'string' node.inner_text else raise HTTP400BadRequest, "Unknown XML Schema type: #{type}" end end