class RDF::KV
Constants
- DECLARATION
- DEFAULT_NS
just the classics
- DESIGNATOR
- GENERATED
macros are initially represented as a pair: the macro value and a flag denoting whether or not the macro itself contains macros and to try to dereference it.
- GRAMMAR
- MACRO
- MACROS
- MAP
- MODIFIER
the actual rdf-kv grammar
- NCNAME
- NCNAMECHAR
- NCNSCHAR
some xml grammar
- NOT_MACRO
- NSCHAR
- PARTIAL_STMT
- PREFIX
- RFC5646
- SPECIALS
these should be instance_exec'd
- TERM
- VERSION
Attributes
Public Class Methods
Initialize the processor.
@param subject [RDF::URI] The default subject. Required. @param graph [RDF::URI] The default context. Optional. @param namespaces [Hash] Namespace/prefix mappings. Optional. @param callback [#call] A callback that expects and returns a term.
Optional.
# File lib/rdf/kv.rb, line 257 def initialize subject: nil, graph: nil, namespaces: {}, callback: nil # look at all of our pretty assertions raise ArgumentError, 'subject must be an RDF::Resource' unless subject.is_a? RDF::Resource raise ArgumentError, 'graph must be an RDF::Resource' unless graph.nil? or graph.is_a? RDF::Resource raise ArgumentError, 'namespaces must be hashable' unless namespaces.respond_to? :to_h rase ArgumentError, 'callback must be callable' unless callback.nil? or callback.respond_to? :call @subject = subject @graph = graph @callback = callback @namespaces = DEFAULT_NS.merge(namespaces.to_h.map do |k, v| k = k.to_s.to_sym unless k.is_a? Symbol # coerce to uri v = RDF::URI(v.to_s) unless v.is_a? RDF::Resource # now coerce to vocabulary v = RDF::Vocabulary.new v unless v.is_a? RDF::Vocabulary [k, v] end.to_h) end
Public Instance Methods
@note This operation may change the state of the processor, so
while this object can be reused for multiple hashes, it is unwise to reuse it across requests.
@param data [Hash] The data coming, e.g., from the Web form. @return [RDF::Changeset] A changeset containing the results.
# File lib/rdf/kv.rb, line 290 def process data raise ArgumentError, 'data must be a hash' unless data.is_a? Hash macros = GENERATED.dup maybe = {} # candidates neither = {} # discard pile data.each do |k, *v| # step 0: get the values to a homogeneous list k = k.to_s v = v.flatten.map(&:to_s) # step 1: pull out all the macro declarations if (m = /#{DECLARATION}/o.match k) name = m[1].to_sym sigil = !!(m[2] && !m[2].empty?) # skip over generated macros next if GENERATED.key? name # step 1.0.1: create [content, deref flag] pairs (macros[name] ||= []).concat v.map { |x| [x, sigil] } elsif (m = /(?:^\s*\S+\s+\S+.*?$|[:\$])/.match k) (maybe[k] ||= []).concat v else (neither[k] ||= []).concat v end end # step 2: dereference all the macros (that asked to be dereferenced) begin macros = massage_macros macros rescue e # XXX we should do something more here raise e end # step 3: apply special control macros (which modify self) begin SPECIALS.each do |k, macro| instance_exec macros[k], ¯o if macros[k] end rescue Exception => e # again this should be nicer raise e end # this will be our output patch = RDF::Changeset.new maybe.each do |k, v| # this will return an array now k = deref_content(k, macros).compact v = v.compact.map(&:strip).uniq # this is only this way because of macros k.each do |template| tokens = GRAMMAR.match(template) or next tokens = tokens.captures raise 'INTERNAL ERROR: Regexp captures do not match template' unless tokens.length == MAP.length # i had something much cleverer here but of course it didn't DWIW contents = {} MAP.each_index { |i| contents[MAP[i]] ||= tokens[i] } contents.compact! contents[:modifier] = (contents[:modifier] || '').chars.map do |c| [c, true] end.to_h if contents[:designator] sigil, symbol = contents[:designator].split '', 2 symbol = resolve_term symbol if sigil == ?^ contents[:designator] = symbol.to_s.empty? ? [sigil] : [sigil, symbol] else contents[:designator] = [contents[:modifier][?!] ? ?: : ?'] end %i[term1 term2 graph].filter { |t| contents[t] }.each do |which| contents[which] = resolve_term contents[which] end # these are the values we actually use; ensure they are duplicated values = (contents[:deref] ? deref_content(v, macros) : v).dup g = coerce_term(contents[:graph]) || graph # initialize the triple s, p, o = nil # shorthand for reverse if reverse = !!contents[:modifier][?!] # literals make no sense on reverse statements # (XXX this is a candidate for diagnostics) next unless [?_, ?:].include? contents[:designator].first # these terms have already been resolved/coerced p = contents[:term1] o = contents[:term2] || subject else s, p = (contents[:term2] ? contents.values_at(:term1, :term2) : [subject, contents[:term1]]).map { |t| resolve_term t } end # the operation depends on whether the `-` modifier is present op = contents[:modifier][?-] ? :delete : :insert # if we're deleting triples and the values contain an empty # string then we're deleting a wildcard, same if we `=` overwrite if !reverse and op == :delete && values.include?('') || contents[:modifier][?=] # i can't remember why we don't do this in reverse, probably # because it is too easy to shoot yourself in the foot patch.delete RDF::Statement(s, p, nil, graph_name: g) # nuke these since it will be pointless to evaluate further values.clear if op == :delete end # otherwise the code is basically the same values.each do |x| # get what should be guaranteed to be an RDF term or nil x = coerce_term(x, *contents[:designator]) or next # now we assign the appropriate direction reverse ? s = x : o = x # this will be either insert or delete patch.send op, RDF::Statement(s, p, o, graph_name: g) end end end patch end
Private Instance Methods
may accept and respond with nil
# File lib/rdf/kv.rb, line 218 def coerce_term token, hint = nil, langdt = nil return unless token return token if token.is_a? RDF::Term hint ||= ?: term = nil if [?:, ?_].include? hint return if token.empty? token = '_:' + token if hint == ?_ and !token.start_with? '_:' term = resolve_term token elsif hint == ?@ term = RDF::Literal(token, language: langdt.to_s.to_sym) elsif hint == ?^ raise 'datatype must be an RDF::Resource' unless langdt.is_a? RDF::Resource term = RDF::Literal(token, datatype: langdt) elsif hint == ?' term = RDF::Literal(token) else raise ArgumentError, "Unrecognized hint (#{hint})" end # call the callback if we have one term = callback.call term if callback term end
# File lib/rdf/kv.rb, line 73 def deref_content strings, macros strings = [strings] unless strings.is_a? Array # bail out early if there is nothing to do return strings unless strings.any? { |s| /#{MACRO}/o.match s } out = [] strings.each do |s| # sometimes these are arrays of arrays #s = s.first if s.is_a? Array # chunks are parallel output; each element is a value chunks = [] s.scan(/\G#{MACROS}/o) do |m| pre = m.first macro = m[1] || m[2] post = m[3] # skip if there was no macro unless macro # nothing to do next if pre + post == "" chunks = chunks.empty? ? [pre, post] : chunks.map do |x| "#{x}#{pre}#{post}" end next end # dereference the macro (or noop if unbound) macro = macro.to_sym x = if macros[macro] macros[macro].map do |m| '%s%s%s' % [pre, m.respond_to?(:call) ? m.call : m, post] end else # this is a noop ["#{pre}$#{macro}#{post}"] end # initialize chunks if chunks.empty? chunks = x next elsif !x.empty? # replace chunks with the product of itself and x y = [] chunks.each { |c| x.each { |d| y << "#{c}#{d}" } } chunks = y end end out.concat chunks end out end
# File lib/rdf/kv.rb, line 128 def massage_macros macros seen = {} done = GENERATED.transform_values { |v| v.map { |w| w.first } } pending = macros.reject { |k, _| GENERATED.key? k } queue = pending.keys.slice 0..0 # take a zero-or-one-element slice until queue.empty? k = queue.shift seen[k] = true vals = macros[k] # done and pending macros within the macros dm = {} pm = {} vals.each do |pair| val, deref = pair next unless deref if deref.is_a? Array deref.each do |m| done[m] ? dm[m] = true : pm[m] = true end else m = {} val.scan(/#{MACRO}/o).compact.each do |x| x = x.to_sym next unless macros[x] raise "Self-reference found: #{x}" if x == k m[x] = true done[m] ? dm[m] = true : pm[m] = true end # push the deref pair[1] = m.empty? ? false : m.keys.sort end end # macro values have pending matches if !pm.empty? q = [] pm.keys.each do |m| raise "Cycle detected between #{k} and #{m}" if seen[m] q << m end queue = q + [k] + queue next end unless dm.empty? done[k] = deref_content vals, done else done[k] = vals.map(&:first) end # remember to remove this guy or we'll loop forever pending.delete k # replenish the queue with another pending object queue << pending.keys.first if queue.empty? and !pending.keys.empty? end done end
unconditionally return a uri or bnode
# File lib/rdf/kv.rb, line 198 def resolve_term term return term if term.is_a? RDF::Term term = term.to_s # bnode ahoy return RDF::Node.new term.delete_prefix '_:' if term.start_with? '_:' # ugh now we gotta do urls if m = /^(#{NCNAME}):(\S*)$/o.match(term) prefix, slug = m.captures if !slug.start_with?(?/) and vocab = namespaces[prefix.to_sym] return vocab[slug] end end # now resolve against base RDF::URI((URI(subject.to_s) + term).to_s) end