class TAO::RDFizer
Constants
- ERB_ANNOTATIONS_TTL
variable: denotations, relations
- ERB_PREFIXES_TTL
variable: namespaces
- ERB_SPANS_TTL
variable: spans
Public Class Methods
new(mode = nil)
click to toggle source
if mode == :spans then produces span descriptions if mode == :annotations then produces annotation descriptions if mode == nil then produces both
# File lib/tao_rdfizer/tao_rdfizer.rb, line 10 def initialize(mode = nil) @mode = mode template = if !mode.nil? && mode == :spans ERB_SPANS_TTL else ERB_ANNOTATIONS_TTL end @tao_ttl_erb = ERB.new(template, nil, '-') @prefix_ttl_erb = ERB.new(ERB_PREFIXES_TTL, nil, '-') end
Public Instance Methods
rdfize(annotations_col, options = nil)
click to toggle source
# File lib/tao_rdfizer/tao_rdfizer.rb, line 21 def rdfize(annotations_col, options = nil) options ||= {} only_prefixes = options.has_key?(:only_prefixes) ? options[:only_prefixes] == true : false with_prefixes = options.has_key?(:with_prefixes) ? options[:with_prefixes] == true : true # check the format annotations_col.each do |annotations| raise "'target' is missing" unless annotations.has_key? :target end # namespaces namespaces = {} anns = annotations_col.first anns[:namespaces].each {|n| namespaces[n[:prefix]] = n[:uri]} unless anns[:namespaces].nil? prefixes_ttl = @prefix_ttl_erb.result_with_hash(namespaces:namespaces) if only_prefixes || with_prefixes if only_prefixes prefixes_ttl else annotations_ttl = get_annotations_ttl(annotations_col, namespaces) if with_prefixes prefixes_ttl + annotations_ttl else annotations_ttl end end end
Private Instance Methods
find_uri(label, namespaces, prefix_for_this)
click to toggle source
# File lib/tao_rdfizer/tao_rdfizer.rb, line 214 def find_uri (label, namespaces, prefix_for_this) if label.match(/\s/) # raise ArgumentError, "A label including a whitespace character found: #{label}." label.gsub(/\s/, '_') end delimiter_position = label.index(':') if !delimiter_position.nil? && namespaces.keys.include?(label[0...delimiter_position]) label.gsub('(', '\(').gsub(')', '\)') # brackets have to be escaped elsif label =~ %r[^https?://] "<#{label}>" else clabel = if label.match(/^\W+$/) 'SYM' else label.sub(/^\W+/, '').sub(/[^a-zA-Z0-9_)]+$/, '').gsub(/ +/, '_') end namespaces.has_key?('_base') ? "<#{clabel}>" : "#{prefix_for_this}:#{clabel.gsub('(', '\(').gsub(')', '\)')}" end end
get_annotations_ttl(annotations_col, namespaces)
click to toggle source
# File lib/tao_rdfizer/tao_rdfizer.rb, line 53 def get_annotations_ttl(annotations_col, namespaces) anns = annotations_col.first unless @mode ==:spans raise ArgumentError, "A project name has to be specified." unless anns.has_key?(:project) prefix_for_this = anns[:project].downcase.gsub(/ /, '_') raise ArgumentError, "'#{prefix_for_this}' is a reserved prefix for this project." if namespaces.has_key?(prefix_for_this) project_uri = 'http://pubannotation.org/projects/' + anns[:project] namespaces[prefix_for_this] = project_uri + '/' end denotations = [] attributes = [] relations = [] spans = [] annotations_col.each do |annotations| text = annotations[:text] text_uri = annotations[:target] text_id = begin sourcedb, sourceid, divid = get_target_info(text_uri) divid.nil? ? "#{sourcedb}-#{sourceid}" : "#{sourcedb}-#{sourceid}-#{divid}" end # denotations and relations _denotations = annotations[:denotations] || [] _attributes = annotations[:attributes] || [] _relations = annotations[:relations] || [] if @mode == :spans && annotations.has_key?(:tracks) annotations[:tracks].each do |track| _denotations += track[:denotations] if track.has_key? :denotations _attributes += track[:attributes] if track.has_key? :attributes _relations += track[:relations] if track.has_key? :relations end end begin unless @mode == :span # index attributes attributesh = _attributes.inject({}) do |h, a| if a[:pred].end_with?('_id') subj = a[:subj] h[subj] = [] unless h.has_key? subj h[subj] << a[:obj] end h end # denotations preprocessing _denotations.each do |d| span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>" d[:span_uri] = span_uri d[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{d[:id]}" class_uris = (attributesh[d[:id]] || []).push(d[:obj]) d[:class_uris] = class_uris.map{|uri| find_uri(uri, namespaces, prefix_for_this)} rescue ArgumentError => e raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{d[:id]}] " + e.message end # relations preprocessing _relations.each do |r| r[:subj_uri] = "#{prefix_for_this}:#{text_id}-#{r[:subj]}" r[:obj_uri] = "#{prefix_for_this}:#{text_id}-#{r[:obj]}" r[:pred_uri] = find_uri(r[:pred], namespaces, prefix_for_this) rescue ArgumentError => e raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{r[:id]}] " + e.message end else # denotations preprocessing _denotations.each do |d| span_uri = "<#{text_uri}/spans/#{d[:span][:begin]}-#{d[:span][:end]}>" d[:span_uri] = span_uri rescue ArgumentError => e raise ArgumentError, "[#{sourcedb}-#{sourceid}-#{d[:id]}] " + e.message end end end unless @mode == :annotations # collect spans _spans = _denotations.map{|d| d[:span]} _spans.uniq! # add_infomation _spans.each do |s| s[:span_uri] = "<#{text_uri}/spans/#{s[:begin]}-#{s[:end]}>" s[:source_uri] = text_uri s[:text] = text[s[:begin] ... s[:end]] end # index spans spanh = _spans.inject({}){|r, s| r[s[:span_uri]] = s; r} # add denotation information _denotations.each do |d| span_uri = d[:span_uri] if spanh[span_uri][:denotations].nil? spanh[span_uri][:denotations] = [d] else spanh[span_uri][:denotations] << d end end _spans.sort!{|a, b| (a[:begin] <=> b[:begin]).nonzero? || b[:end] <=> a[:end]} ## begin indexing len = text.length num = _spans.length # initilaize the index (0 ... num).each do |i| _spans[i][:followings] = [] _spans[i][:children] = [] end (0 ... num).each do |i| # find the first following span j = i + 1 while j < num && _spans[j][:begin] < _spans[i][:end] unless include_parent?(_spans[i][:children], _spans[j]) _spans[i][:children] << _spans[j] end j += 1 end # find adjacent positions fp = _spans[i][:end] fp += 1 while fp < len && text[fp].match(/\s/) next if fp == len # index adjacent spans while j < num && _spans[j][:begin] == fp _spans[i][:followings] << _spans[j] j += 1 end end end denotations += _denotations relations += _relations spans += _spans unless @mode == :annotations end @tao_ttl_erb.result(binding) end
get_target_info(text_uri)
click to toggle source
# File lib/tao_rdfizer/tao_rdfizer.rb, line 206 def get_target_info (text_uri) sourcedb = (text_uri =~ %r|/sourcedb/([^/]+)|)? $1 : nil sourceid = (text_uri =~ %r|/sourceid/([^/]+)|)? $1 : nil divid = (text_uri =~ %r|/divs/([^/]+)|)? $1 : nil return sourcedb, sourceid, divid end
include_parent?(spans, span)
click to toggle source
# File lib/tao_rdfizer/tao_rdfizer.rb, line 200 def include_parent?(spans, span) # spans.each{|s| return true if (s[:begin] <= span[:begin] && s[:end] > span[:end]) || (s[:begin] < span[:begin] && s[:end] >= span[:end])} spans.each{|s| return true if s[:begin] <= span[:begin] && s[:end] >= span[:end]} return false end
rdf_literal_escape(string)
click to toggle source
# File lib/tao_rdfizer/tao_rdfizer.rb, line 234 def rdf_literal_escape(string) string.gsub('\\', '\\\\\\'). gsub("\t", '\\t'). gsub("\b", '\\b'). gsub("\n", '\\n'). gsub("\r", '\\r'). gsub("\f", '\\f'). gsub('"', '\\"'). freeze end