class RDF::Microdata::RdfaReader
Update DOM to turn Microdata
into RDFa and parse using the RDFa Reader
Attributes
rdfa[R]
The transformed DOM using RDFa @return [RDF::HTML::Document]
Public Class Methods
format(klass = nil)
click to toggle source
Calls superclass method
# File lib/rdf/microdata/rdfa_reader.rb, line 11 def self.format(klass = nil) if klass.nil? RDF::Microdata::Format else super end end
new(input = $stdin, **options, &block)
click to toggle source
Initializes the RdfaReader
instance.
@param [IO, File, String] input
the input stream to read
@param [Hash{Symbol => Object}] options
any additional options (see `RDF::Reader#initialize`)
@return [reader] @yield [reader] ‘self` @yieldparam [RDF::Reader] reader @yieldreturn [void] ignored @raise [RDF::ReaderError] if validate
Calls superclass method
# File lib/rdf/microdata/rdfa_reader.rb, line 31 def initialize(input = $stdin, **options, &block) @options = options log_debug('', "using RDFa transformation reader") input = case input when ::Nokogiri::XML::Document, ::Nokogiri::HTML::Document then input else # Try to detect charset from input options[:encoding] ||= input.charset if input.respond_to?(:charset) # Otherwise, default is utf-8 options[:encoding] ||= 'utf-8' options[:encoding] = options[:encoding].to_s if options[:encoding] begin input = input.read if input.respond_to?(:read) ::Nokogiri::HTML5(input.force_encoding(options[:encoding]), max_parse_errors: 1000) rescue LoadError, NoMethodError ::Nokogiri::HTML.parse(input, base_uri.to_s, options[:encoding]) end end # For all members having @itemscope input.css("[itemscope]").each do |item| # Get @itemtypes to create @type and @vocab item.attribute('itemscope').remove if item['itemtype'] # Only absolute URLs types = item.attribute('itemtype'). remove. to_s. split(/\s+/). select {|t| RDF::URI(t).absolute?} item['typeof'] = types.join(' ') unless types.empty? if vocab = types.first vocab = begin type_vocab = vocab.to_s.sub(/([\/\#])[^\/\#]*$/, '\1') Registry.new(type_vocab) if type_vocab end item['vocab'] = vocab.uri.to_s if vocab end end item['typeof'] ||= '' # Change each itemid attribute to an resource attribute with the same value if item['itemid'] id = item.attribute('itemid').remove item['resource'] = id end end # Add @resource for all itemprop values of object based on a @data value input.css("object[itemprop][data]").each do |item| item['resource'] ||= item['data'] end # Replace all @itemprop values with @property input.css("[itemprop]").each {|item| item['property'] = item.attribute('itemprop').remove} # Wrap all @itemref properties input.css("[itemref]").each do |item| item_vocab = item['vocab'] || item.ancestors.detect {|a| a.attribute('vocab')} item_vocab = item_vocab.to_s if item_vocab item.attribute('itemref').remove.to_s.split(/\s+/).each do |ref| if referenced = input.css("##{ref}") # Add @vocab to referenced using the closest ansestor having @vocab of item. # If the element with id reference has no resource attribute, add a resource attribute whose value is a NUMBER SIGN U+0023 followed by reference to the element. # If the element with id reference has no typeof attribute, add a typeof="rdfa:Pattern" attribute to the element. referenced.wrap(%(<div vocab="#{item_vocab}" resource="##{ref}" typeof="rdfa:Pattern" />)) # Add a link child element to the element that represents the item, with a rel="rdfa:copy" attribute and an href attribute whose value is a NUMBER SIGN U+0023 followed by reference link = ::Nokogiri::XML::Node.new('link', input) link['rel'] = 'rdfa:copy' link['href'] = "##{ref}" item << link end end end @rdfa = input log_debug('', "Transformed document: #{input.to_html}") options = options.merge( library: :nokogiri, reference_folding: true, host_language: :html5, version: :"rdfa1.1") # Rely on RDFa reader super(input, **options, &block) end