class Rackful::Parser::XHTML

Parses XHTML as generated by {Serializer::XHTML}.

Public Instance Methods

parse(response, resource) click to toggle source

@see Parser#parse

# File lib/rackful/parser.rb, line 108
def parse response, resource
  # Try to find the actual content:
  content = self.document.root.xpath(
    '//html:div[@id="rackful-content"]',
    'html' => 'http://www.w3.org/1999/xhtml'
  )
  # There must be exactly one element <div id="rackful_content"/> in the document:
  if content.empty?
    raise HTTP400BadRequest, 'Couldn’t find div#rackful-content in request body.'
  end
  if content.length > 1
    raise HTTP400BadRequest, 'Multiple instances of div#rackful-content found in request body.'
  end
  # Initialize @base_url:
  base_url = self.document.root.xpath(
    '/html:html/html:head/html:base',
    'html' => 'http://www.w3.org/1999/xhtml'
  )
  if base_url.empty?
    @base_url = self.request.canonical_uri.dup
  else
    @base_url = URI( base_url.first.attribute('href').text ).normalize
    if @base_url.relative?
      @base_url = self.request.canonical_uri + @base_url
    end
  end
  # Parse the f*cking thing:
  resource.to_rackful = self.parse_recursive content.first
end
parse_object(node) click to toggle source

@api private

# File lib/rackful/parser.rb, line 201
def parse_object node
  current_property = nil
  r = {}
  node.children.each do |child|
    if 'dt' == child.name &&
       'http://www.w3.org/1999/xhtml' == child.namespace.href
      if current_property
        raise HTTP400BadRequest, 'Can’t parse:<br/>' + Rack::Utils.escape_html(node.to_xml)
      end
      current_property = child.inner_text.strip.split(' ').join('_').to_sym
    elsif 'dd' == child.name &&
          'http://www.w3.org/1999/xhtml' == child.namespace.href
      unless current_property
        raise HTTP400BadRequest, 'Can’t parse:<br/>' + Rack::Utils.escape_html(node.to_xml)
      end
      r[current_property] = self.parse_recursive( child )
      current_property = nil
    end
  end
  r
end
parse_object_list(node) click to toggle source

@api private

# File lib/rackful/parser.rb, line 225
def parse_object_list node
  properties = node.xpath(
    'html:thead/html:tr/html:th',
    'html' => 'http://www.w3.org/1999/xhtml'
  ).collect do |th|
    th.inner_text.strip.split(' ').join('_').to_sym
  end
  if properties.empty?
    raise HTTP400BadRequest, 'Can’t parse:<br/>' + Rack::Utils.escape_html(node.to_xml)
  end
  n = properties.length
  node.xpath(
    'html:tbody/html:tr',
    'html' => 'http://www.w3.org/1999/xhtml'
  ).collect do |row|
    values = row.xpath(
      'html:td', 'html' => 'http://www.w3.org/1999/xhtml'
    )
    unless values.length == n
      raise HTTP400BadRequest, 'Can’t parse:<br/>' + Rack::Utils.escape_html(row.to_xml)
    end
    object = {}
    Range.new(0,n-1).each do |i|
      object[properties[i]] = self.parse_recursive( values[i] )
    end
    object
  end
end
parse_recursive(node) click to toggle source

@api private

# File lib/rackful/parser.rb, line 140
def parse_recursive node

  # A URI:
  if ( nodelist = node.xpath( 'html:a', 'html' => 'http://www.w3.org/1999/xhtml' ) ).length == 1
    r = URI( nodelist.first.attribute('href').text )
    r.relative? ? @base_url + r : r

  # An Object (AKA a Hash)
  elsif ( nodelist = node.xpath( 'html:dl', 'html' => 'http://www.w3.org/1999/xhtml' ) ).length == 1
    self.parse_object nodelist.first

  # A list of Objects with identical keys:
  elsif ( nodelist = node.xpath( 'html:table', 'html' => 'http://www.w3.org/1999/xhtml' ) ).length == 1
    self.parse_object_list nodelist.first

  # A list of things (AKA an Array):
  elsif ( nodelist = node.xpath( 'html:ul', 'html' => 'http://www.w3.org/1999/xhtml' ) ).length == 1
    nodelist.first.xpath(
      'html:li',
      'html' => 'http://www.w3.org/1999/xhtml'
    ).collect do |n| self.parse_recursive n end

  # A simple type:
  elsif type = node.attribute_with_ns( 'type', 'http://www.w3.org/2001/XMLSchema' )
    prefix, typename = type.text.split(':', 2)
    unless typename && 'http://www.w3.org/2001/XMLSchema' == node.namespaces["xmlns:#{prefix}"]
      raise HTTP400BadRequest, "Unknown XML Schema type: #{type}"
    end
    self.parse_simple_type node, typename
  else
    raise HTTP400BadRequest, 'Can’t parse:<br/>' + Rack::Utils.escape_html(node.to_xml)
  end
end
parse_simple_type(node, typename) click to toggle source

@api private

# File lib/rackful/parser.rb, line 176
def parse_simple_type node, typename
  case typename
  when 'boolean'
    case node.inner_text.strip
    when 'true'  then true
    when 'false' then false
    else nil
    end
  when 'integer'
    node.inner_text.strip.to_i
  when 'numeric'
    node.inner_text.strip.to_f
  when 'dateTime'
    Time.xmlschema(node.inner_text.strip)
  when 'base64Binary'
    Base64.decode64(node.inner_text)
  when 'string'
    node.inner_text
  else
    raise HTTP400BadRequest, "Unknown XML Schema type: #{type}"
  end
end