module RDF::RDFa::Reader::REXML

REXML implementation of an XML parser.

@see www.germane-software.com/software/rexml/

Public Class Methods

library() click to toggle source

Returns the name of the underlying XML library.

@return [Symbol]

# File lib/rdf/rdfa/reader/rexml.rb, line 14
def self.library
  :rexml
end

Public Instance Methods

detect_host_language_version(input, **options) click to toggle source

Determine the host language and/or version from options and the input document

# File lib/rdf/rdfa/reader/rexml.rb, line 232
def detect_host_language_version(input, **options)
  @host_language = options[:host_language] ? options[:host_language].to_sym : nil
  @version = options[:version] ? options[:version].to_sym : nil
  return if @host_language && @version

  # Snif version based on input
  case input
  when ::REXML::Document
    doc_type_string = input.doctype.to_s
    version_attr = input.root && input.root.attribute("version").to_s
    root_element = input.root.name.downcase
    content_type = "application/xhtml+html" # FIXME: what about other possible XML types?
  else
    content_type = input.content_type if input.respond_to?(:content_type)

    # Determine from head of document
    head = if input.respond_to?(:read)
      input.rewind
      string = input.read(1000)
      input.rewind
      string.to_s
    else
      input.to_s[0..1000]
    end

    doc_type_string = head.match(%r(<!DOCTYPE[^>]*>)m).to_s
    root = head.match(%r(<[^!\?>]*>)m).to_s
    root_element = root.match(%r(^<(\S+)[ >])) ? $1 : ""
    version_attr = root.match(/version\s*=\s*"([^"]+)"/m) ? $1 : ""
    head_element = head.match(%r(<head.*<\/head>)mi)
    head_doc = ::REXML::Document.new(head_element.to_s)

    # May determine content-type and/or charset from meta
    # Easist way is to parse head into a document and iterate
    # of CSS matches
    ::REXML::XPath.each(head_doc, "//meta") do |e|
      if e.attribute("http-equiv").to_s.downcase == 'content-type'
        content_type, e = e.attribute("content").to_s.downcase.split(";")
        options[:encoding] = $1.downcase if e.to_s =~ /charset=([^\s]*)$/i
      elsif e.attribute("charset")
        options[:encoding] = e.attr("charset").to_s.downcase
      end
    end
  end

  # Already using XML parser, determine from DOCTYPE and/or root element
  @version ||= :"rdfa1.0" if doc_type_string =~ /RDFa 1\.0/
  @version ||= :"rdfa1.0" if version_attr =~ /RDFa 1\.0/
  @version ||= :"rdfa1.1" if version_attr =~ /RDFa 1\.1/
  @version ||= :"rdfa1.1"

  @host_language ||= case content_type
  when "application/xml"  then :xml
  when "image/svg+xml"    then :svg
  when "text/html"
    case doc_type_string
    when /html 4/i        then :html4
    when /xhtml/i         then :xhtml1
    when /html/i          then :html5
    else                       :html5
    end
  when "application/xhtml+xml"
    case doc_type_string
    when /html 4/i        then :html4
    when /xhtml/i         then :xhtml1
    when /html/i          then :xhtml5
    else                       :xhtml5
    end
  else
    case root_element
    when /svg/i           then :svg
    else                       :html5
    end
  end
end
doc_base(base) click to toggle source

Find value of document base

@param [String] base Existing base from URI or :base_uri @return [String]

# File lib/rdf/rdfa/reader/rexml.rb, line 327
def doc_base(base)
  # find if the document has a base element
  case @host_language
  when :xhtml1, :xhtml5, :html4, :html5
    base_el = ::REXML::XPath.first(@doc, "/html/head/base") rescue nil
    base = base.join(base_el.attribute("href").to_s.split("#").first) if base_el
  else
    xml_base = root.attribute("base", "http://www.w3.org/XML/1998/namespace") || root.attribute('xml:base') if root
    base = base.join(xml_base) if xml_base
  end

  base || @base_uri
end
doc_errors() click to toggle source

Document errors

# File lib/rdf/rdfa/reader/rexml.rb, line 318
def doc_errors
  []
end
initialize_xml(input, **options) click to toggle source

Initializes the underlying XML library.

@param [Hash{Symbol => Object}] options @return [void]

# File lib/rdf/rdfa/reader/rexml.rb, line 211
def initialize_xml(input, **options)
  require 'rexml/document' unless defined?(::REXML)
  @doc = case input
  when ::REXML::Document
    input
  else
    # Try to detect charset from input
    options[:encoding] ||= input.charset if input.respond_to?(:charset)

    # Otherwise, default is utf-8
    options[:encoding] ||= 'utf-8'

    # Set xml:base for the document element, if defined
    @base_uri = base_uri ? base_uri.to_s : nil

    # Only parse as XML, no HTML mode
    ::REXML::Document.new(input.respond_to?(:read) ? input.read : input.to_s)
  end
end
root() click to toggle source

Return proxy for document root

# File lib/rdf/rdfa/reader/rexml.rb, line 312
def root
  @root ||= NodeProxy.new(@doc.root) if @doc && @doc.root
end