class RDF::NTriples::Reader

N-Triples parser.

@example Obtaining an NTriples reader class

RDF::Reader.for(:ntriples)     #=> RDF::NTriples::Reader
RDF::Reader.for("etc/doap.nt")
RDF::Reader.for(file_name:      "etc/doap.nt")
RDF::Reader.for(file_extension: "nt")
RDF::Reader.for(content_type:   "application/n-triples")

@example Parsing RDF statements from an NTriples file

RDF::NTriples::Reader.open("etc/doap.nt") do |reader|
  reader.each_statement do |statement|
    puts statement.inspect
  end
end

@example Parsing RDF statements from an NTriples string

data = StringIO.new(File.read("etc/doap.nt"))
RDF::NTriples::Reader.new(data) do |reader|
  reader.each_statement do |statement|
    puts statement.inspect
  end
end

** RDF=star

Supports statements as resources using ‘<<s p o>>`.

@see www.w3.org/TR/rdf-testcases/#ntriples @see www.w3.org/TR/n-triples/

Constants

BLANK_NODE_LABEL
COMMENT

@see www.w3.org/TR/rdf-testcases/#ntrip_grammar

DATATYPE_URI
ECHAR
END_OF_STATEMENT
ESCAPE_CHARS

@see www.w3.org/TR/rdf-testcases/#ntrip_strings

ESCAPE_CHARS_ESCAPED

cache constants to optimize escaping the escape chars in self.unescape

ESCAPE_CHARS_ESCAPED_REGEXP
IRIREF
IRI_RANGE
LANGTAG

LANGTAG is deprecated

LANG_DIR
LITERAL
LITERAL_PLAIN
LITERAL_WITH_DATATYPE
LITERAL_WITH_LANGUAGE
NODEID
OBJECT
PN_CHARS
PN_CHARS_BASE
PN_CHARS_U
PREDICATE
QT_END
QT_START
STRING_LITERAL_QUOTE
SUBJECT
TT_END
TT_START
UCHAR
UCHAR4
UCHAR8
URIREF
U_CHARS1

Terminals from rdf-turtle.

@see www.w3.org/TR/n-triples/ @see www.w3.org/TR/turtle/

Unicode regular expressions.

U_CHARS2

Public Class Methods

parse_literal(input, **options) click to toggle source

(see unserialize) @return [RDF::Literal]

# File lib/rdf/ntriples/reader.rb, line 153
def self.parse_literal(input, **options)
  case input
    when LITERAL_WITH_LANGUAGE
      RDF::Literal.new(unescape($1), language: $4)
    when LITERAL_WITH_DATATYPE
      RDF::Literal.new(unescape($1), datatype: $4)
    when LITERAL_PLAIN
      RDF::Literal.new(unescape($1))
  end
end
parse_node(input, **options) click to toggle source

(see unserialize) @return [RDF::Node]

# File lib/rdf/ntriples/reader.rb, line 134
def self.parse_node(input, **options)
  if input =~ NODEID
    RDF::Node.new($1)
  end
end
parse_object(input, **options) click to toggle source

(see unserialize)

# File lib/rdf/ntriples/reader.rb, line 127
def self.parse_object(input, **options)
  parse_uri(input, **options) || parse_node(input, **options) || parse_literal(input, **options)
end
parse_predicate(input, **options) click to toggle source

(see unserialize) @return [RDF::URI]

# File lib/rdf/ntriples/reader.rb, line 121
def self.parse_predicate(input, **options)
  parse_uri(input, intern: true)
end
parse_subject(input, **options) click to toggle source

(see unserialize) @return [RDF::Resource]

# File lib/rdf/ntriples/reader.rb, line 114
def self.parse_subject(input, **options)
  parse_uri(input, **options) || parse_node(input, **options)
end
parse_uri(input, intern: false, **options) click to toggle source

(see unserialize) @param [Boolean] intern (false) Use Interned URI @return [RDF::URI]

# File lib/rdf/ntriples/reader.rb, line 144
def self.parse_uri(input, intern: false, **options)
  if input =~ URIREF
    RDF::URI.send(intern ? :intern : :new, unescape($1))
  end
end
unescape(string) click to toggle source

@param [String] string @return [String] @see www.w3.org/TR/rdf-testcases/#ntrip_strings @see blog.grayproductions.net/articles/understanding_m17n @see yehudakatz.com/2010/05/17/encodings-unabridged/

# File lib/rdf/ntriples/reader.rb, line 185
def self.unescape(string)
  # Note: avoiding copying the input string when no escaping is needed
  # greatly reduces the number of allocations and the processing time.
  string = string.dup.force_encoding(Encoding::UTF_8) unless string.encoding == Encoding::UTF_8
  scanner = StringScanner.new(string)

  buffer = ""

  while !scanner.eos?
    buffer << if scanner.scan(ESCAPE_CHARS_ESCAPED_REGEXP)
      ESCAPE_CHARS_ESCAPED[scanner.matched]
    elsif scanner.scan(UCHAR)
      scanner.matched.sub(UCHAR) {[($1 || $2).hex].pack('U*')}
    else
      # Scan one character
      scanner.getch
    end
  end

  buffer
end
unserialize(input, **options) click to toggle source

Reconstructs an RDF value from its serialized N-Triples representation.

@param [String] input @param [{Symbol => Object}] options

From {RDF::Reader#initialize}

@option options [RDF::Util::Logger] :logger ([]) @return [RDF::Term]

# File lib/rdf/ntriples/reader.rb, line 104
def self.unserialize(input, **options)
  case input
    when nil then nil
    else self.new(input, logger: [], **options).read_value
  end
end

Public Instance Methods

read_comment() click to toggle source

@return [Boolean] @see www.w3.org/TR/rdf-testcases/#ntrip_grammar (comment)

# File lib/rdf/ntriples/reader.rb, line 279
def read_comment
  match(COMMENT)
end
read_eos() click to toggle source

@return [Boolean] @see www.w3.org/TR/rdf-testcases/#ntrip_grammar (triple)

# File lib/rdf/ntriples/reader.rb, line 338
def read_eos
  match(END_OF_STATEMENT)
end
read_literal() click to toggle source

@return [RDF::Literal] @see www.w3.org/TR/rdf-testcases/#ntrip_grammar (literal)

# File lib/rdf/ntriples/reader.rb, line 312
def read_literal
  if literal_str = match(LITERAL_PLAIN)
    literal_str = self.class.unescape(literal_str)
    literal = case
      when lang_dir = match(LANG_DIR)
        language, direction = lang_dir.split('--')
        raise ArgumentError if direction && !@options[:rdfstar]
        RDF::Literal.new(literal_str, language: language, direction: direction)
      when datatype = match(/^(\^\^)/) # FIXME
        RDF::Literal.new(literal_str, datatype: read_uriref || fail_object)
      else
        RDF::Literal.new(literal_str) # plain string literal
    end
    literal.validate!     if validate?
    literal.canonicalize! if canonicalize?
    literal
  end
rescue ArgumentError
  v = literal_str
  v += "@#{lang_dir}" if lang_dir
  log_error("Invalid Literal (found: \"#{v}\")", lineno: lineno, token: "#v", exception: RDF::ReaderError)
end
read_node() click to toggle source

@return [RDF::Node] @see www.w3.org/TR/rdf-testcases/#ntrip_grammar (nodeID)

# File lib/rdf/ntriples/reader.rb, line 302
def read_node
  if node_id = match(NODEID)
    @nodes ||= {}
    @nodes[node_id] ||= RDF::Node.new(node_id)
  end
end
read_quotedTriple() click to toggle source

@return [RDF::Statement] @deprecated Quoted triples are now deprecated

# File lib/rdf/ntriples/reader.rb, line 262
def read_quotedTriple
  if @options[:rdfstar] && !match(TT_START) && match(QT_START)
    warn "[DEPRECATION] RDF-star quoted triples are deprecated and will be removed in a future version.\n" +
         "Called from #{Gem.location_of_caller.join(':')}"
    subject   = read_uriref || read_node || read_quotedTriple || fail_subject
    predicate = read_uriref(intern: true) || fail_predicate
    object    = read_uriref || read_node || read_literal || read_quotedTriple || fail_object
    if !match(QT_END)
      log_error("Expected end of statement (found: #{current_line.inspect})", lineno: lineno, exception: RDF::ReaderError)
    end
    RDF::Statement.new(subject, predicate, object, quoted: true)
  end
end
read_triple() click to toggle source

@return [Array] @see www.w3.org/TR/rdf-testcases/#ntrip_grammar

# File lib/rdf/ntriples/reader.rb, line 222
def read_triple
  loop do
    readline.strip! # EOFError thrown on end of input
    line = @line    # for backtracking input in case of parse error

    begin
      unless blank? || read_comment
        subject   = read_uriref || read_node || read_quotedTriple || fail_subject
        predicate = read_uriref(intern: true) || fail_predicate
        object    = read_uriref || read_node || read_literal || read_tripleTerm || read_quotedTriple || fail_object

        if validate? && !read_eos
          log_error("Expected end of statement (found: #{current_line.inspect})", lineno: lineno, exception: RDF::ReaderError)
        end
        return [subject, predicate, object]
      end
    rescue RDF::ReaderError => e
      @line = line  # this allows #read_value to work
      raise e
    end
  end
end
read_tripleTerm() click to toggle source

@return [RDF::Statement]

# File lib/rdf/ntriples/reader.rb, line 247
def read_tripleTerm
  if @options[:rdfstar] && match(TT_START)
    subject   = read_uriref || read_node || fail_subject
    predicate = read_uriref(intern: true) || fail_predicate
    object    = read_uriref || read_node || read_literal || read_tripleTerm || fail_object
    if !match(TT_END)
      log_error("Expected end of statement (found: #{current_line.inspect})", lineno: lineno, exception: RDF::ReaderError)
    end
    RDF::Statement.new(subject, predicate, object, tripleTerm: true)
  end
end
read_uriref(intern: false, **options) click to toggle source

@param [Boolean] intern (false) Use Interned Node @return [RDF::URI] @see www.w3.org/TR/rdf-testcases/#ntrip_grammar (uriref)

# File lib/rdf/ntriples/reader.rb, line 287
def read_uriref(intern: false, **options)
  if uri_str = match(URIREF)
    uri_str = self.class.unescape(uri_str)
    uri = RDF::URI.send(intern? && intern ? :intern : :new, uri_str)
    uri.validate!     if validate?
    uri.canonicalize! if canonicalize?
    uri
  end
rescue ArgumentError
  log_error("Invalid URI (found: \"<#{uri_str}>\")", lineno: lineno, token: "<#{uri_str}>", exception: RDF::ReaderError)
end
read_value() click to toggle source

@return [RDF::Term]

# File lib/rdf/ntriples/reader.rb, line 209
def read_value
  begin
    read_statement
  rescue RDF::ReaderError
    value = read_uriref || read_node || read_literal || read_tripleTerm || read_quotedTriple
    log_recover
    value
  end
end