class RDF::Normalize::RDFC10::NormalizationState
Attributes
Public Class Methods
# File lib/rdf/normalize/rdfc10.rb, line 173 def initialize(**options) @options = options @hash_algorithm = Digest.const_get(options.fetch(:hash_algorithm, :SHA256)) @bnode_to_statements, @hash_to_bnodes, @canonical_issuer = {}, {}, IdentifierIssuer.new("c14n") @max_calls, @total_calls = nil, 0 end
Public Instance Methods
# File lib/rdf/normalize/rdfc10.rb, line 185 def add_bnode_hash(node, hash) hash_to_bnodes[hash] ||= [] # Match on object IDs of nodes, rather than simple node equality hash_to_bnodes[hash] << node unless hash_to_bnodes[hash].any? {|n| n.eql?(node)} end
# File lib/rdf/normalize/rdfc10.rb, line 180 def add_statement(node, statement) bnode_to_statements[node] ||= [] bnode_to_statements[node] << statement unless bnode_to_statements[node].any? {|st| st.eql?(statement)} end
This algorithm calculates a hash for a given blank node across the quads in a dataset in which that blank node is a component. If the hash uniquely identifies that blank node, no further examination is necessary. Otherwise, a hash will be created for the blank node using the algorithm in [4.9 Hash N-Degree Quads](w3c.github.io/rdf-canon/spec/#hash-nd-quads) invoked via [4.5 Canonicalization Algorithm](w3c.github.io/rdf-canon/spec/#canon-algorithm).
@param [RDF::Node] node The reference blank node identifier @return [String] the SHA256 hexdigest hash of statements using this node, with replacements
# File lib/rdf/normalize/rdfc10.rb, line 195 def hash_first_degree_quads(node) nquads = bnode_to_statements[node]. map do |statement| quad = statement.to_quad.map do |t| case t when node then RDF::Node("a") when RDF::Node then RDF::Node("z") else t end end RDF::Statement.from(quad).to_nquads end log_debug("log point", "Hash First Degree Quads function (4.7.3).") log_debug("nquads:") nquads.each do |q| log_debug {" - #{q.strip}"} end result = hexdigest(nquads.sort.join) log_debug("hash") {result} result end
@param [RDF::Node] node @param [IdentifierIssuer] issuer @return [Array<String,IdentifierIssuer>] the Hash and issuer @raise [MaxCallsExceeded] If total number of calls has exceeded ‘max_calls` times the number of blank nodes in the dataset.
# File lib/rdf/normalize/rdfc10.rb, line 245 def hash_n_degree_quads(node, issuer) log_debug("hndq:") log_debug(" log point", "Hash N-Degree Quads function (4.9.3).") log_debug(" identifier") {node.id} log_debug(" issuer") {issuer.inspect} if max_calls && total_calls >= max_calls raise MaxCallsExceeded, "Exceeded maximum number of calls (#{total_calls}) allowed to hash_n_degree_quads" end @total_calls += 1 # hash to related blank nodes map hn = {} log_debug(" hndq.2:") log_debug(" log point", "Quads for identifier (4.9.3 (2)).") log_debug(" quads:") bnode_to_statements[node].each do |s| log_debug {" - #{s.to_nquads.strip}"} end # Step 3 log_debug(" hndq.3:") log_debug(" log point", "Hash N-Degree Quads function (4.9.3 (3)).") log_debug(" with:") unless bnode_to_statements[node].empty? bnode_to_statements[node].each do |statement| log_debug {" - quad: #{statement.to_nquads.strip}"} log_debug(" hndq.3.1:") log_debug(" log point", "Hash related bnode component (4.9.3 (3.1))") log_depth(depth: 10) {hash_related_statement(node, statement, issuer, hn)} end log_debug(" Hash to bnodes:") hn.each do |k,v| log_debug(" #{k}:") v.each do |vv| log_debug(" - #{vv.id}") end end data_to_hash = "" # Step 5 log_debug(" hndq.5:") log_debug(" log point", "Hash N-Degree Quads function (4.9.3 (5)), entering loop.") log_debug(" with:") hn.keys.sort.each do |hash| log_debug(" - related hash", hash) log_debug(" data to hash") {data_to_hash.to_json} list = hn[hash] # Iterate over related nodes chosen_path, chosen_issuer = "", nil data_to_hash += hash log_debug(" hndq.5.4:") log_debug(" log point", "Hash N-Degree Quads function (4.9.3 (5.4)), entering loop.") log_debug(" with:") unless list.empty? list.permutation do |permutation| log_debug(" - perm") {permutation.map(&:id).to_json(indent: ' ', space: ' ')} issuer_copy, path, recursion_list = issuer.dup, "", [] log_debug(" hndq.5.4.4:") log_debug(" log point", "Hash N-Degree Quads function (4.9.3 (5.4.4)), entering loop.") log_debug(" with:") permutation.each do |related| log_debug(" - related") {related.id} log_debug(" path") {path.to_json} if canonical_issuer.identifier(related) path << '_:' + canonical_issuer.issue_identifier(related) else recursion_list << related if !issuer_copy.identifier(related) path << '_:' + issuer_copy.issue_identifier(related) end # Skip to the next permutation if chosen path isn't empty and the path is greater than the chosen path break if !chosen_path.empty? && path.length >= chosen_path.length end log_debug(" hndq.5.4.5:") log_debug(" log point", "Hash N-Degree Quads function (4.9.3 (5.4.5)), before possible recursion.") log_debug(" recursion list") {recursion_list.map(&:id).to_json(indent: ' ')} log_debug(" path") {path.to_json} log_debug(" with:") unless recursion_list.empty? recursion_list.each do |related| log_debug(" - related") {related.id} result = log_depth(depth: 18) do hash_n_degree_quads(related, issuer_copy) end path << '_:' + issuer_copy.issue_identifier(related) path << "<#{result.first}>" issuer_copy = result.last log_debug(" hndq.5.4.5.4:") log_debug(" log point", "Hash N-Degree Quads function (4.9.3 (5.4.5.4)), combine result of recursion.") log_debug(" path") {path.to_json} log_debug(" issuer copy") {issuer_copy.inspect} break if !chosen_path.empty? && path.length >= chosen_path.length && path > chosen_path end if chosen_path.empty? || path < chosen_path chosen_path, chosen_issuer = path, issuer_copy end end data_to_hash += chosen_path log_debug(" hndq.5.5:") log_debug(" log point", "Hash N-Degree Quads function (4.9.3 (5.5). End of current loop with Hn hashes.") log_debug(" chosen path") {chosen_path.to_json} log_debug(" data to hash") {data_to_hash.to_json} issuer = chosen_issuer end log_debug(" hndq.6:") log_debug(" log point", "Leaving Hash N-Degree Quads function (4.9.3).") log_debug(" hash") {hexdigest(data_to_hash)} log_depth(depth: 4) {log_debug("issuer") {issuer.inspect}} return [hexdigest(data_to_hash), issuer] end
# File lib/rdf/normalize/rdfc10.rb, line 362 def inspect "NormalizationState:\nbnode_to_statements: #{inspect_bnode_to_statements}\nhash_to_bnodes: #{inspect_hash_to_bnodes}\ncanonical_issuer: #{canonical_issuer.inspect}" end
# File lib/rdf/normalize/rdfc10.rb, line 366 def inspect_bnode_to_statements bnode_to_statements.map do |n, statements| "#{n.id}: #{statements.map {|s| s.to_nquads.strip}}" end.join(", ") end
# File lib/rdf/normalize/rdfc10.rb, line 372 def inspect_hash_to_bnodes end
Protected Instance Methods
# File lib/rdf/normalize/rdfc10.rb, line 377 def hexdigest(val) hash_algorithm.hexdigest(val) end