class RDF::Normalize::RDFC10::NormalizationState

Attributes

bnode_to_statements[RW]
canonical_issuer[RW]
hash_algorithm[RW]
hash_to_bnodes[RW]
max_calls[RW]
total_calls[RW]

Public Class Methods

new(**options) click to toggle source
# File lib/rdf/normalize/rdfc10.rb, line 173
def initialize(**options)
  @options = options
  @hash_algorithm = Digest.const_get(options.fetch(:hash_algorithm, :SHA256))
  @bnode_to_statements, @hash_to_bnodes, @canonical_issuer = {}, {}, IdentifierIssuer.new("c14n")
  @max_calls, @total_calls = nil, 0
end

Public Instance Methods

add_bnode_hash(node, hash) click to toggle source
# File lib/rdf/normalize/rdfc10.rb, line 185
def add_bnode_hash(node, hash)
  hash_to_bnodes[hash] ||= []
  # Match on object IDs of nodes, rather than simple node equality
  hash_to_bnodes[hash] << node unless hash_to_bnodes[hash].any? {|n| n.eql?(node)}
end
add_statement(node, statement) click to toggle source
# File lib/rdf/normalize/rdfc10.rb, line 180
def add_statement(node, statement)
  bnode_to_statements[node] ||= []
  bnode_to_statements[node] << statement unless bnode_to_statements[node].any? {|st| st.eql?(statement)}
end
hash_first_degree_quads(node) click to toggle source

This algorithm calculates a hash for a given blank node across the quads in a dataset in which that blank node is a component. If the hash uniquely identifies that blank node, no further examination is necessary. Otherwise, a hash will be created for the blank node using the algorithm in [4.9 Hash N-Degree Quads](w3c.github.io/rdf-canon/spec/#hash-nd-quads) invoked via [4.5 Canonicalization Algorithm](w3c.github.io/rdf-canon/spec/#canon-algorithm).

@param [RDF::Node] node The reference blank node identifier @return [String] the SHA256 hexdigest hash of statements using this node, with replacements

# File lib/rdf/normalize/rdfc10.rb, line 195
def hash_first_degree_quads(node)
  nquads = bnode_to_statements[node].
    map do |statement|
      quad = statement.to_quad.map do |t|
        case t
        when node then RDF::Node("a")
        when RDF::Node then RDF::Node("z")
        else t
        end
      end
      RDF::Statement.from(quad).to_nquads
    end
  log_debug("log point", "Hash First Degree Quads function (4.7.3).")
  log_debug("nquads:")
  nquads.each do |q|
    log_debug {"  - #{q.strip}"}
  end

  result = hexdigest(nquads.sort.join)
  log_debug("hash") {result}
  result
end
hash_n_degree_quads(node, issuer) click to toggle source

@param [RDF::Node] node @param [IdentifierIssuer] issuer @return [Array<String,IdentifierIssuer>] the Hash and issuer @raise [MaxCallsExceeded] If total number of calls has exceeded ‘max_calls` times the number of blank nodes in the dataset.

# File lib/rdf/normalize/rdfc10.rb, line 245
def hash_n_degree_quads(node, issuer)
  log_debug("hndq:")
  log_debug("  log point", "Hash N-Degree Quads function (4.9.3).")
  log_debug("  identifier") {node.id}
  log_debug("  issuer") {issuer.inspect}

  if max_calls && total_calls >= max_calls
    raise MaxCallsExceeded, "Exceeded maximum number of calls (#{total_calls}) allowed to hash_n_degree_quads"
  end
  @total_calls += 1

  # hash to related blank nodes map
  hn = {}

  log_debug("  hndq.2:")
  log_debug("    log point", "Quads for identifier (4.9.3 (2)).")
  log_debug("    quads:")
  bnode_to_statements[node].each do |s|
    log_debug {"    - #{s.to_nquads.strip}"}
  end

  # Step 3
  log_debug("  hndq.3:")
  log_debug("    log point", "Hash N-Degree Quads function (4.9.3 (3)).")
  log_debug("    with:") unless bnode_to_statements[node].empty?
  bnode_to_statements[node].each do |statement|
    log_debug {"      - quad: #{statement.to_nquads.strip}"}
    log_debug("        hndq.3.1:")
    log_debug("          log point", "Hash related bnode component (4.9.3 (3.1))")
    log_depth(depth: 10) {hash_related_statement(node, statement, issuer, hn)}
  end
  log_debug("    Hash to bnodes:")
  hn.each do |k,v|
    log_debug("      #{k}:")
    v.each do |vv|
      log_debug("        - #{vv.id}")
    end
  end

  data_to_hash = ""

  # Step 5
  log_debug("  hndq.5:")
  log_debug("    log point", "Hash N-Degree Quads function (4.9.3 (5)), entering loop.")
  log_debug("    with:")
  hn.keys.sort.each do |hash|
    log_debug("      - related hash", hash)
    log_debug("        data to hash") {data_to_hash.to_json}
    list = hn[hash]
    # Iterate over related nodes
    chosen_path, chosen_issuer = "", nil
    data_to_hash += hash

    log_debug("        hndq.5.4:")
    log_debug("          log point", "Hash N-Degree Quads function (4.9.3 (5.4)), entering loop.")
    log_debug("          with:") unless list.empty?
    list.permutation do |permutation|
      log_debug("          - perm") {permutation.map(&:id).to_json(indent: ' ', space: ' ')}
      issuer_copy, path, recursion_list = issuer.dup, "", []

      log_debug("            hndq.5.4.4:")
      log_debug("              log point", "Hash N-Degree Quads function (4.9.3 (5.4.4)), entering loop.")
      log_debug("              with:")
      permutation.each do |related|
        log_debug("                - related") {related.id}
        log_debug("                  path") {path.to_json}
        if canonical_issuer.identifier(related)
          path << '_:' + canonical_issuer.issue_identifier(related)
        else
          recursion_list << related if !issuer_copy.identifier(related)
          path << '_:' + issuer_copy.issue_identifier(related)
        end

        # Skip to the next permutation if chosen path isn't empty and the path is greater than the chosen path
        break if !chosen_path.empty? && path.length >= chosen_path.length
      end

      log_debug("            hndq.5.4.5:")
      log_debug("              log point", "Hash N-Degree Quads function (4.9.3 (5.4.5)), before possible recursion.")
      log_debug("              recursion list") {recursion_list.map(&:id).to_json(indent: ' ')}
      log_debug("              path") {path.to_json}
      log_debug("              with:") unless recursion_list.empty?
      recursion_list.each do |related|
        log_debug("                - related") {related.id}
        result = log_depth(depth: 18) do
          hash_n_degree_quads(related, issuer_copy)
        end
        path << '_:' + issuer_copy.issue_identifier(related)
        path << "<#{result.first}>"
        issuer_copy = result.last
        log_debug("                  hndq.5.4.5.4:") 
        log_debug("                    log point", "Hash N-Degree Quads function (4.9.3 (5.4.5.4)), combine result of recursion.")
        log_debug("                    path") {path.to_json}
        log_debug("                    issuer copy") {issuer_copy.inspect}
        break if !chosen_path.empty? && path.length >= chosen_path.length && path > chosen_path
      end

      if chosen_path.empty? || path < chosen_path
        chosen_path, chosen_issuer = path, issuer_copy
      end
    end

    data_to_hash += chosen_path
    log_debug("        hndq.5.5:")
    log_debug("          log point", "Hash N-Degree Quads function (4.9.3 (5.5). End of current loop with Hn hashes.")
    log_debug("          chosen path") {chosen_path.to_json}
    log_debug("          data to hash") {data_to_hash.to_json}
    issuer = chosen_issuer
  end

  log_debug("  hndq.6:")
  log_debug("    log point", "Leaving Hash N-Degree Quads function (4.9.3).")
  log_debug("    hash") {hexdigest(data_to_hash)}
  log_depth(depth: 4) {log_debug("issuer") {issuer.inspect}}
  return [hexdigest(data_to_hash), issuer]
end
inspect() click to toggle source
# File lib/rdf/normalize/rdfc10.rb, line 362
def inspect
  "NormalizationState:\nbnode_to_statements: #{inspect_bnode_to_statements}\nhash_to_bnodes: #{inspect_hash_to_bnodes}\ncanonical_issuer: #{canonical_issuer.inspect}"
end
inspect_bnode_to_statements() click to toggle source
# File lib/rdf/normalize/rdfc10.rb, line 366
def inspect_bnode_to_statements
  bnode_to_statements.map do |n, statements|
    "#{n.id}: #{statements.map {|s| s.to_nquads.strip}}"
  end.join(", ")
end
inspect_hash_to_bnodes() click to toggle source
# File lib/rdf/normalize/rdfc10.rb, line 372
def inspect_hash_to_bnodes
end

Protected Instance Methods

hexdigest(val) click to toggle source
# File lib/rdf/normalize/rdfc10.rb, line 377
def hexdigest(val)
  hash_algorithm.hexdigest(val)
end