class RDF::NTriples::Reader
N-Triples parser.
@example Obtaining an NTriples
reader class
RDF::Reader.for(:ntriples) #=> RDF::NTriples::Reader RDF::Reader.for("etc/doap.nt") RDF::Reader.for(file_name: "etc/doap.nt") RDF::Reader.for(file_extension: "nt") RDF::Reader.for(content_type: "application/n-triples")
@example Parsing RDF
statements from an NTriples
file
RDF::NTriples::Reader.open("etc/doap.nt") do |reader| reader.each_statement do |statement| puts statement.inspect end end
@example Parsing RDF
statements from an NTriples
string
data = StringIO.new(File.read("etc/doap.nt")) RDF::NTriples::Reader.new(data) do |reader| reader.each_statement do |statement| puts statement.inspect end end
** RDF=star
Supports statements as resources using ‘<<s p o>>`.
@see www.w3.org/TR/rdf-testcases/#ntriples @see www.w3.org/TR/n-triples/
Constants
- BLANK_NODE_LABEL
- COMMENT
- DATATYPE_URI
- ECHAR
- END_OF_STATEMENT
- ESCAPE_CHARS
- ESCAPE_CHARS_ESCAPED
cache constants to optimize escaping the escape chars in self.unescape
- ESCAPE_CHARS_ESCAPED_REGEXP
- IRIREF
- IRI_RANGE
- LANGTAG
LANGTAG
is deprecated- LANG_DIR
- LITERAL
- LITERAL_PLAIN
- LITERAL_WITH_DATATYPE
- LITERAL_WITH_LANGUAGE
- NODEID
- OBJECT
- PN_CHARS
- PN_CHARS_BASE
- PN_CHARS_U
- PREDICATE
- QT_END
- QT_START
- STRING_LITERAL_QUOTE
- SUBJECT
- TT_END
- TT_START
- UCHAR
- UCHAR4
- UCHAR8
- URIREF
- U_CHARS1
Terminals from rdf-turtle.
@see www.w3.org/TR/n-triples/ @see www.w3.org/TR/turtle/
Unicode regular expressions.
- U_CHARS2
Public Class Methods
(see unserialize) @return [RDF::Literal]
# File lib/rdf/ntriples/reader.rb, line 153 def self.parse_literal(input, **options) case input when LITERAL_WITH_LANGUAGE RDF::Literal.new(unescape($1), language: $4) when LITERAL_WITH_DATATYPE RDF::Literal.new(unescape($1), datatype: $4) when LITERAL_PLAIN RDF::Literal.new(unescape($1)) end end
(see unserialize) @return [RDF::Node]
# File lib/rdf/ntriples/reader.rb, line 134 def self.parse_node(input, **options) if input =~ NODEID RDF::Node.new($1) end end
(see unserialize)
# File lib/rdf/ntriples/reader.rb, line 127 def self.parse_object(input, **options) parse_uri(input, **options) || parse_node(input, **options) || parse_literal(input, **options) end
(see unserialize) @return [RDF::URI]
# File lib/rdf/ntriples/reader.rb, line 121 def self.parse_predicate(input, **options) parse_uri(input, intern: true) end
(see unserialize) @return [RDF::Resource]
# File lib/rdf/ntriples/reader.rb, line 114 def self.parse_subject(input, **options) parse_uri(input, **options) || parse_node(input, **options) end
(see unserialize) @param [Boolean] intern (false) Use Interned URI
@return [RDF::URI]
# File lib/rdf/ntriples/reader.rb, line 144 def self.parse_uri(input, intern: false, **options) if input =~ URIREF RDF::URI.send(intern ? :intern : :new, unescape($1)) end end
@param [String] string @return [String] @see www.w3.org/TR/rdf-testcases/#ntrip_strings @see blog.grayproductions.net/articles/understanding_m17n @see yehudakatz.com/2010/05/17/encodings-unabridged/
# File lib/rdf/ntriples/reader.rb, line 185 def self.unescape(string) # Note: avoiding copying the input string when no escaping is needed # greatly reduces the number of allocations and the processing time. string = string.dup.force_encoding(Encoding::UTF_8) unless string.encoding == Encoding::UTF_8 scanner = StringScanner.new(string) buffer = "" while !scanner.eos? buffer << if scanner.scan(ESCAPE_CHARS_ESCAPED_REGEXP) ESCAPE_CHARS_ESCAPED[scanner.matched] elsif scanner.scan(UCHAR) scanner.matched.sub(UCHAR) {[($1 || $2).hex].pack('U*')} else # Scan one character scanner.getch end end buffer end
Reconstructs an RDF
value from its serialized N-Triples representation.
@param [String] input @param [{Symbol => Object}] options
From {RDF::Reader#initialize}
@option options [RDF::Util::Logger] :logger ([]) @return [RDF::Term]
# File lib/rdf/ntriples/reader.rb, line 104 def self.unserialize(input, **options) case input when nil then nil else self.new(input, logger: [], **options).read_value end end
Public Instance Methods
@return [Boolean] @see www.w3.org/TR/rdf-testcases/#ntrip_grammar (comment)
# File lib/rdf/ntriples/reader.rb, line 279 def read_comment match(COMMENT) end
@return [Boolean] @see www.w3.org/TR/rdf-testcases/#ntrip_grammar (triple)
# File lib/rdf/ntriples/reader.rb, line 338 def read_eos match(END_OF_STATEMENT) end
@return [RDF::Literal] @see www.w3.org/TR/rdf-testcases/#ntrip_grammar (literal)
# File lib/rdf/ntriples/reader.rb, line 312 def read_literal if literal_str = match(LITERAL_PLAIN) literal_str = self.class.unescape(literal_str) literal = case when lang_dir = match(LANG_DIR) language, direction = lang_dir.split('--') raise ArgumentError if direction && !@options[:rdfstar] RDF::Literal.new(literal_str, language: language, direction: direction) when datatype = match(/^(\^\^)/) # FIXME RDF::Literal.new(literal_str, datatype: read_uriref || fail_object) else RDF::Literal.new(literal_str) # plain string literal end literal.validate! if validate? literal.canonicalize! if canonicalize? literal end rescue ArgumentError v = literal_str v += "@#{lang_dir}" if lang_dir log_error("Invalid Literal (found: \"#{v}\")", lineno: lineno, token: "#v", exception: RDF::ReaderError) end
@return [RDF::Node] @see www.w3.org/TR/rdf-testcases/#ntrip_grammar (nodeID)
# File lib/rdf/ntriples/reader.rb, line 302 def read_node if node_id = match(NODEID) @nodes ||= {} @nodes[node_id] ||= RDF::Node.new(node_id) end end
@return [RDF::Statement] @deprecated Quoted triples are now deprecated
# File lib/rdf/ntriples/reader.rb, line 262 def read_quotedTriple if @options[:rdfstar] && !match(TT_START) && match(QT_START) warn "[DEPRECATION] RDF-star quoted triples are deprecated and will be removed in a future version.\n" + "Called from #{Gem.location_of_caller.join(':')}" subject = read_uriref || read_node || read_quotedTriple || fail_subject predicate = read_uriref(intern: true) || fail_predicate object = read_uriref || read_node || read_literal || read_quotedTriple || fail_object if !match(QT_END) log_error("Expected end of statement (found: #{current_line.inspect})", lineno: lineno, exception: RDF::ReaderError) end RDF::Statement.new(subject, predicate, object, quoted: true) end end
@return [Array] @see www.w3.org/TR/rdf-testcases/#ntrip_grammar
# File lib/rdf/ntriples/reader.rb, line 222 def read_triple loop do readline.strip! # EOFError thrown on end of input line = @line # for backtracking input in case of parse error begin unless blank? || read_comment subject = read_uriref || read_node || read_quotedTriple || fail_subject predicate = read_uriref(intern: true) || fail_predicate object = read_uriref || read_node || read_literal || read_tripleTerm || read_quotedTriple || fail_object if validate? && !read_eos log_error("Expected end of statement (found: #{current_line.inspect})", lineno: lineno, exception: RDF::ReaderError) end return [subject, predicate, object] end rescue RDF::ReaderError => e @line = line # this allows #read_value to work raise e end end end
@return [RDF::Statement]
# File lib/rdf/ntriples/reader.rb, line 247 def read_tripleTerm if @options[:rdfstar] && match(TT_START) subject = read_uriref || read_node || fail_subject predicate = read_uriref(intern: true) || fail_predicate object = read_uriref || read_node || read_literal || read_tripleTerm || fail_object if !match(TT_END) log_error("Expected end of statement (found: #{current_line.inspect})", lineno: lineno, exception: RDF::ReaderError) end RDF::Statement.new(subject, predicate, object, tripleTerm: true) end end
@param [Boolean] intern (false) Use Interned Node
@return [RDF::URI] @see www.w3.org/TR/rdf-testcases/#ntrip_grammar (uriref)
# File lib/rdf/ntriples/reader.rb, line 287 def read_uriref(intern: false, **options) if uri_str = match(URIREF) uri_str = self.class.unescape(uri_str) uri = RDF::URI.send(intern? && intern ? :intern : :new, uri_str) uri.validate! if validate? uri.canonicalize! if canonicalize? uri end rescue ArgumentError log_error("Invalid URI (found: \"<#{uri_str}>\")", lineno: lineno, token: "<#{uri_str}>", exception: RDF::ReaderError) end
@return [RDF::Term]
# File lib/rdf/ntriples/reader.rb, line 209 def read_value begin read_statement rescue RDF::ReaderError value = read_uriref || read_node || read_literal || read_tripleTerm || read_quotedTriple log_recover value end end