module RDF::RDFa::Reader::REXML
REXML
implementation of an XML parser.
Public Class Methods
library()
click to toggle source
Returns the name of the underlying XML library.
@return [Symbol]
# File lib/rdf/rdfa/reader/rexml.rb, line 14 def self.library :rexml end
Public Instance Methods
detect_host_language_version(input, **options)
click to toggle source
Determine the host language and/or version from options and the input document
# File lib/rdf/rdfa/reader/rexml.rb, line 232 def detect_host_language_version(input, **options) @host_language = options[:host_language] ? options[:host_language].to_sym : nil @version = options[:version] ? options[:version].to_sym : nil return if @host_language && @version # Snif version based on input case input when ::REXML::Document doc_type_string = input.doctype.to_s version_attr = input.root && input.root.attribute("version").to_s root_element = input.root.name.downcase content_type = "application/xhtml+html" # FIXME: what about other possible XML types? else content_type = input.content_type if input.respond_to?(:content_type) # Determine from head of document head = if input.respond_to?(:read) input.rewind string = input.read(1000) input.rewind string.to_s else input.to_s[0..1000] end doc_type_string = head.match(%r(<!DOCTYPE[^>]*>)m).to_s root = head.match(%r(<[^!\?>]*>)m).to_s root_element = root.match(%r(^<(\S+)[ >])) ? $1 : "" version_attr = root.match(/version\s*=\s*"([^"]+)"/m) ? $1 : "" head_element = head.match(%r(<head.*<\/head>)mi) head_doc = ::REXML::Document.new(head_element.to_s) # May determine content-type and/or charset from meta # Easist way is to parse head into a document and iterate # of CSS matches ::REXML::XPath.each(head_doc, "//meta") do |e| if e.attribute("http-equiv").to_s.downcase == 'content-type' content_type, e = e.attribute("content").to_s.downcase.split(";") options[:encoding] = $1.downcase if e.to_s =~ /charset=([^\s]*)$/i elsif e.attribute("charset") options[:encoding] = e.attr("charset").to_s.downcase end end end # Already using XML parser, determine from DOCTYPE and/or root element @version ||= :"rdfa1.0" if doc_type_string =~ /RDFa 1\.0/ @version ||= :"rdfa1.0" if version_attr =~ /RDFa 1\.0/ @version ||= :"rdfa1.1" if version_attr =~ /RDFa 1\.1/ @version ||= :"rdfa1.1" @host_language ||= case content_type when "application/xml" then :xml when "image/svg+xml" then :svg when "text/html" case doc_type_string when /html 4/i then :html4 when /xhtml/i then :xhtml1 when /html/i then :html5 else :html5 end when "application/xhtml+xml" case doc_type_string when /html 4/i then :html4 when /xhtml/i then :xhtml1 when /html/i then :xhtml5 else :xhtml5 end else case root_element when /svg/i then :svg else :html5 end end end
doc_base(base)
click to toggle source
Find value of document base
@param [String] base Existing base from URI or :base_uri @return [String]
# File lib/rdf/rdfa/reader/rexml.rb, line 327 def doc_base(base) # find if the document has a base element case @host_language when :xhtml1, :xhtml5, :html4, :html5 base_el = ::REXML::XPath.first(@doc, "/html/head/base") rescue nil base = base.join(base_el.attribute("href").to_s.split("#").first) if base_el else xml_base = root.attribute("base", "http://www.w3.org/XML/1998/namespace") || root.attribute('xml:base') if root base = base.join(xml_base) if xml_base end base || @base_uri end
doc_errors()
click to toggle source
Document errors
# File lib/rdf/rdfa/reader/rexml.rb, line 318 def doc_errors [] end
initialize_xml(input, **options)
click to toggle source
Initializes the underlying XML library.
@param [Hash{Symbol => Object}] options @return [void]
# File lib/rdf/rdfa/reader/rexml.rb, line 211 def initialize_xml(input, **options) require 'rexml/document' unless defined?(::REXML) @doc = case input when ::REXML::Document input else # Try to detect charset from input options[:encoding] ||= input.charset if input.respond_to?(:charset) # Otherwise, default is utf-8 options[:encoding] ||= 'utf-8' # Set xml:base for the document element, if defined @base_uri = base_uri ? base_uri.to_s : nil # Only parse as XML, no HTML mode ::REXML::Document.new(input.respond_to?(:read) ? input.read : input.to_s) end end
root()
click to toggle source
Return proxy for document root
# File lib/rdf/rdfa/reader/rexml.rb, line 312 def root @root ||= NodeProxy.new(@doc.root) if @doc && @doc.root end