class RDF::KV

Constants

DECLARATION
DEFAULT_NS

just the classics

DESIGNATOR
GENERATED

macros are initially represented as a pair: the macro value and a flag denoting whether or not the macro itself contains macros and to try to dereference it.

GRAMMAR
MACRO
MACROS
MAP
MODIFIER

the actual rdf-kv grammar

NCNAME
NCNAMECHAR
NCNSCHAR

some xml grammar

NOT_MACRO
NSCHAR
PARTIAL_STMT
PREFIX
RFC5646
SPECIALS

these should be instance_exec'd

TERM
VERSION

Attributes

callback[R]
graph[R]
namespaces[R]
subject[R]

Public Class Methods

new(subject: nil, graph: nil, namespaces: {}) click to toggle source

Initialize the processor.

@param subject [RDF::URI] The default subject. Required. @param graph [RDF::URI] The default context. Optional. @param namespaces [Hash] Namespace/prefix mappings. Optional. @param callback [#call] A callback that expects and returns a term.

Optional.
# File lib/rdf/kv.rb, line 257
def initialize subject: nil, graph: nil, namespaces: {}, callback: nil
  # look at all of our pretty assertions
  raise ArgumentError, 'subject must be an RDF::Resource' unless
    subject.is_a? RDF::Resource
  raise ArgumentError, 'graph must be an RDF::Resource' unless
    graph.nil? or graph.is_a? RDF::Resource
  raise ArgumentError, 'namespaces must be hashable' unless
    namespaces.respond_to? :to_h
  rase ArgumentError, 'callback must be callable' unless
    callback.nil? or callback.respond_to? :call

  @subject    = subject
  @graph      = graph
  @callback   = callback
  @namespaces = DEFAULT_NS.merge(namespaces.to_h.map do |k, v|
    k = k.to_s.to_sym    unless k.is_a? Symbol
    # coerce to uri
    v = RDF::URI(v.to_s) unless v.is_a? RDF::Resource
    # now coerce to vocabulary
    v = RDF::Vocabulary.new v unless v.is_a? RDF::Vocabulary
    [k, v]
  end.to_h)
end

Public Instance Methods

process(data) click to toggle source

@note This operation may change the state of the processor, so

while this object can be reused for multiple hashes, it is unwise
to reuse it across requests.

@param data [Hash] The data coming, e.g., from the Web form. @return [RDF::Changeset] A changeset containing the results.

# File lib/rdf/kv.rb, line 290
def process data
  raise ArgumentError, 'data must be a hash' unless data.is_a? Hash
  macros  = GENERATED.dup
  maybe   = {} # candidates
  neither = {} # discard pile

  data.each do |k, *v|
    # step 0: get the values to a homogeneous list
    k = k.to_s
    v = v.flatten.map(&:to_s)
    # step 1: pull out all the macro declarations
    if (m = /#{DECLARATION}/o.match k)
      name  = m[1].to_sym
      sigil = !!(m[2] && !m[2].empty?)
      # skip over generated macros
      next if GENERATED.key? name
      # step 1.0.1: create [content, deref flag] pairs
      (macros[name] ||= []).concat v.map { |x| [x, sigil] }
    elsif (m = /(?:^\s*\S+\s+\S+.*?$|[:\$])/.match k)
      (maybe[k] ||= []).concat v
    else
      (neither[k] ||= []).concat v
    end
  end

  # step 2: dereference all the macros (that asked to be dereferenced)
  begin
    macros = massage_macros macros
  rescue e
    # XXX we should do something more here
    raise e
  end

  # step 3: apply special control macros (which modify self)
  begin
    SPECIALS.each do |k, macro|
      instance_exec macros[k], &macro if macros[k]
    end
  rescue Exception => e
    # again this should be nicer
    raise e
  end

  # this will be our output
  patch = RDF::Changeset.new

  maybe.each do |k, v|
    # this will return an array now
    k = deref_content(k, macros).compact
    v = v.compact.map(&:strip).uniq

    # this is only this way because of macros
    k.each do |template|
      tokens = GRAMMAR.match(template) or next
      tokens = tokens.captures

      raise 'INTERNAL ERROR: Regexp captures do not match template' unless
        tokens.length == MAP.length

      # i had something much cleverer here but of course it didn't DWIW
      contents = {}
      MAP.each_index { |i| contents[MAP[i]] ||= tokens[i] }
      contents.compact!

      contents[:modifier] = (contents[:modifier] || '').chars.map do |c|
        [c, true]
      end.to_h

      if contents[:designator]
        sigil, symbol = contents[:designator].split '', 2
        symbol = resolve_term symbol if sigil == ?^
        contents[:designator] = symbol.to_s.empty? ? [sigil] : [sigil, symbol]
      else
        contents[:designator] = [contents[:modifier][?!] ? ?: : ?']
      end

      %i[term1 term2 graph].filter { |t| contents[t] }.each do |which|
        contents[which] = resolve_term contents[which]
      end

      # these are the values we actually use; ensure they are duplicated
      values = (contents[:deref] ? deref_content(v, macros) : v).dup

      g = coerce_term(contents[:graph]) || graph
      # initialize the triple
      s, p, o = nil

      # shorthand for reverse
      if reverse = !!contents[:modifier][?!]
        # literals make no sense on reverse statements
        # (XXX this is a candidate for diagnostics)
        next unless [?_, ?:].include? contents[:designator].first
        # these terms have already been resolved/coerced
        p = contents[:term1]
        o = contents[:term2] || subject
      else
        s, p = (contents[:term2] ? contents.values_at(:term1, :term2) :
                [subject, contents[:term1]]).map { |t| resolve_term t }
      end

      # the operation depends on whether the `-` modifier is present
      op = contents[:modifier][?-] ? :delete : :insert

      # if we're deleting triples and the values contain an empty
      # string then we're deleting a wildcard, same if we `=` overwrite
      if !reverse and op == :delete && values.include?('') ||
          contents[:modifier][?=]
        # i can't remember why we don't do this in reverse, probably
        # because it is too easy to shoot yourself in the foot
        patch.delete RDF::Statement(s, p, nil, graph_name: g)

        # nuke these since it will be pointless to evaluate further
        values.clear if op == :delete
      end

      # otherwise the code is basically the same
      values.each do |x|
        # get what should be guaranteed to be an RDF term or nil
        x = coerce_term(x, *contents[:designator]) or next

        # now we assign the appropriate direction
        reverse ? s = x : o = x

        # this will be either insert or delete
        patch.send op, RDF::Statement(s, p, o, graph_name: g)
      end
    end
  end

  patch
end

Private Instance Methods

coerce_term(token, hint = nil, langdt = nil) click to toggle source

may accept and respond with nil

# File lib/rdf/kv.rb, line 218
def coerce_term token, hint = nil, langdt = nil
  return unless token
  return token if token.is_a? RDF::Term
  hint ||= ?:
  term = nil
  if [?:, ?_].include? hint
    return if token.empty?
    token = '_:' + token if hint == ?_ and !token.start_with? '_:'
    term = resolve_term token
  elsif hint == ?@
    term = RDF::Literal(token, language: langdt.to_s.to_sym)
  elsif hint == ?^
    raise 'datatype must be an RDF::Resource' unless
      langdt.is_a? RDF::Resource
    term = RDF::Literal(token, datatype: langdt)
  elsif hint == ?'
    term = RDF::Literal(token)
  else
    raise ArgumentError, "Unrecognized hint (#{hint})"
  end

  # call the callback if we have one
  term = callback.call term if callback
    
  term
end
deref_content(strings, macros) click to toggle source
# File lib/rdf/kv.rb, line 73
def deref_content strings, macros
  strings = [strings] unless strings.is_a? Array
  # bail out early if there is nothing to do
  return strings unless strings.any? { |s| /#{MACRO}/o.match s }
  out = []
  strings.each do |s|
    # sometimes these are arrays of arrays
    #s = s.first if s.is_a? Array

    # chunks are parallel output; each element is a value
    chunks = []
    s.scan(/\G#{MACROS}/o) do |m|
      pre   = m.first
      macro = m[1] || m[2]
      post  = m[3]

      # skip if there was no macro
      unless macro
        # nothing to do
        next if pre + post == ""
        chunks = chunks.empty? ? [pre, post] : chunks.map do |x|
          "#{x}#{pre}#{post}"
        end
        next
      end

      # dereference the macro (or noop if unbound)
      macro = macro.to_sym
      x = if macros[macro]
            macros[macro].map do |m|
              '%s%s%s' % [pre, m.respond_to?(:call) ? m.call : m, post]
            end
          else
            # this is a noop
            ["#{pre}$#{macro}#{post}"]
          end

      # initialize chunks
      if chunks.empty?
        chunks = x
        next
      elsif !x.empty?
        # replace chunks with the product of itself and x
        y = []
        chunks.each { |c| x.each { |d| y << "#{c}#{d}" } }
        chunks = y
      end
    end

    out.concat chunks
  end

  out
end
massage_macros(macros) click to toggle source
# File lib/rdf/kv.rb, line 128
def massage_macros macros
  seen = {}
  done = GENERATED.transform_values { |v| v.map { |w| w.first } }
  pending = macros.reject { |k, _| GENERATED.key? k }
  queue   = pending.keys.slice 0..0 # take a zero-or-one-element slice

  until queue.empty?
    k = queue.shift
    seen[k] = true

    vals = macros[k]

    # done and pending macros within the macros
    dm = {}
    pm = {}

    vals.each do |pair|
      val, deref = pair

      next unless deref

      if deref.is_a? Array
        deref.each do |m|
          done[m] ? dm[m] = true : pm[m] = true
        end
      else
        m = {}
        val.scan(/#{MACRO}/o).compact.each do |x|
          x = x.to_sym
          next unless macros[x]
          raise "Self-reference found: #{x}" if x == k

          m[x] = true

          done[m] ? dm[m] = true : pm[m] = true
        end
        # push the deref
        pair[1] = m.empty? ? false : m.keys.sort
      end
    end

    # macro values have pending matches
    if !pm.empty?
      q = []
      pm.keys.each do |m|
        raise "Cycle detected between #{k} and #{m}" if seen[m]
        q << m
      end

      queue = q + [k] + queue
      next
    end

    unless dm.empty?
      done[k] = deref_content vals, done
    else
      done[k] = vals.map(&:first)
    end

    # remember to remove this guy or we'll loop forever
    pending.delete k

    # replenish the queue with another pending object
    queue << pending.keys.first if queue.empty? and !pending.keys.empty?
  end

  done
end
resolve_term(term) click to toggle source

unconditionally return a uri or bnode

# File lib/rdf/kv.rb, line 198
def resolve_term term
  return term if term.is_a? RDF::Term
  term = term.to_s

  # bnode ahoy
  return RDF::Node.new term.delete_prefix '_:' if term.start_with? '_:'

  # ugh now we gotta do urls
  if m = /^(#{NCNAME}):(\S*)$/o.match(term)
    prefix, slug = m.captures
    if !slug.start_with?(?/) and vocab = namespaces[prefix.to_sym]
      return vocab[slug]
    end
  end

  # now resolve against base
  RDF::URI((URI(subject.to_s) + term).to_s)
end