class Mexico::Fiesta::Interfaces::ElanInterface
Constants
- CONSTRAINTS
Public Class Methods
export(doc, io=$stdout, params = {})
click to toggle source
# File lib/mexico/fiesta/interfaces/elan_interface.rb, line 41 def self.export(doc, io=$stdout, params = {}) instance.export(doc, io, params) end
import(io=$stdin, params = {})
click to toggle source
# File lib/mexico/fiesta/interfaces/elan_interface.rb, line 36 def self.import(io=$stdin, params = {}) puts 'class method import' instance.import(io, params) end
Public Instance Methods
export(doc, io=$stdout, params = {})
click to toggle source
# File lib/mexico/fiesta/interfaces/elan_interface.rb, line 269 def export(doc, io=$stdout, params = {}) # Create an XML builder object that serialises into an XML structure builder = Nokogiri::XML::Builder.new do |xml| sec_datamodel = doc.head[Mexico::FileSystem::Section::DATA_MODEL] sec_lifecycle = doc.head[Mexico::FileSystem::Section::LIFECYCLE] date = sec_lifecycle['creationDate'].value author = sec_lifecycle['authorKey'].value format = sec_datamodel['annotationToolFormat'].value version = sec_datamodel['annotationToolVersion'].value schema_location = sec_datamodel['xsiNoNamespaceSchemaLocation'].value ad_attrs = { DATE: date, AUTHOR: author, FORMAT: format, VERSION: version, 'xmlns:xsi'=>'http://www.w3.org/2001/XMLSchema-instance', 'xsi:noNamespaceSchemaLocation' => schema_location } #xml.root() do #end xml.ANNOTATION_DOCUMENT(ad_attrs) do # @TODO implement the export of the header mediaFile = '' mediaFile = doc.head[Mexico::FileSystem::Section::MEDIA_CONTEXT]['primaryMediaUrl'].value unless doc.head[Mexico::FileSystem::Section::MEDIA_CONTEXT]['primaryMediaUrl'].nil? timeUnits = doc.head[Mexico::FileSystem::Section::DATA_MODEL]['timeUnits'].value xml.HEADER({MEDIA_FILE: mediaFile, TIME_UNITS: timeUnits}) do doc.head[Mexico::FileSystem::Section::MEDIA_CONTEXT]['referencedMedia'].property_maps.each do |m| uri = m['uri'].value path = m['path'].value mimeType = m['mimeType'].value xml.MEDIA_DESCRIPTOR({MEDIA_URL: uri, MIME_TYPE: mimeType, RELATIVE_MEDIA_URL: path}) end end # create the time stamp data structure time_hash = Hash.new counter = 1 # @todo #254 rework to use unit conversions! doc.items.each do |item| item.point_links.each do |pl| unless time_hash.has_key?(pl.point) time_hash[pl.point] = "ts#{counter}" counter += 1 end end item.interval_links.each do |il| unless time_hash.has_key?(il.min) time_hash[il.min] = "ts#{counter}" counter += 1 end unless time_hash.has_key?(il.max) time_hash[il.max] = "ts#{counter}" counter += 1 end end end xml.TIME_ORDER do # collect all timestamps # create a hash for them time_hash.each do |tkey, tval| xml.TIME_SLOT({'TIME_SLOT_ID' => tval, 'TIME_VALUE'=>tkey.to_i.to_s}) end end inverted_time_hash = time_hash.invert # read ling types from map ling_types = Hash.new lt_section = doc.head.section(Mexico::FileSystem::Section::LAYER_TYPES) lt_section.property_maps.each do |pm| key = pm.key puts key pm.properties.each do |p| puts " .. %s -> %s" % [p.key, p.value] end # puts pm.size ling_type = Hash.new if pm.has_key?('constraints') ling_type['constraints'] = pm['constraints'].value end if pm.has_key?('graphicReferences') ling_type['graphicReferences'] = pm['graphicReferences'].value end if pm.has_key?('timeAlignable') ling_type['timeAlignable'] = pm['timeAlignable'].value end if pm.has_key?('controlledVocabulary') ling_type['controlledVocabulary'] = pm['controlledVocabulary'].value end ling_types[key] = ling_type end puts ling_types # export layer by layer # caveat: only annotations with at least one layer link will be exported. # caveat 2: annotations with multiple layer links will be exported multiple times. doc.layers.each do |layer| ling_type = layer.properties['elanTierType'].value ling_type_object = doc.head[Mexico::FileSystem::Section::LAYER_TYPES].property_maps.find{|m| m.key == ling_type} puts "ling type object %s" % ling_type_object constraint = ling_type_object['constraints'] unless constraint.nil? constraint = constraint.value end attrs = {TIER_ID: layer.name, LINGUISTIC_TYPE_REF: ling_type} annotator = layer.properties['annotator'].value attrs.merge!({ANNOTATOR: annotator}) unless annotator.nil? # check if this layer is the child of another one. # if yes: add attribute PARENT_REF # find a parent layer parent_id=nil parent_connectors = doc.layer_connectors.select{|c| c.target == layer} if parent_connectors.size>0 parent_id = parent_connectors.first.source.name attrs.merge!({PARENT_REF: parent_id}) end tier = xml.TIER(attrs) do puts "inside tier" layer.items.each do |item| xml.ANNOTATION do # depending on the layer type, use either ALIGNABLE or REF annotations if %w(Symbolic_Subdivision Symbolic_Association).include?(constraint) ref_attrs = {ANNOTATION_ID: item.identifier} parent_ref = item.item_links.find{|l| l.role == Mexico::FileSystem::ItemLink::ROLE_PARENT} unless parent_ref.nil? ref_attrs.merge!({ANNOTATION_REF: parent_ref.target_item.identifier}) end pre_ref = item.item_links.find{|l| l.role == Mexico::FileSystem::ItemLink::ROLE_PREDECESSOR} unless pre_ref.nil? ref_attrs.merge!({PREVIOUS_ANNOTATION: pre_ref.target_item.identifier}) end xml.REF_ANNOTATION(ref_attrs) do xml.ANNOTATION_VALUE item.data.string_value end else tsref1, tsref2 = nil unless item.interval_links.empty? tsref1 = time_hash[item.interval_links.first.min] tsref2 = time_hash[item.interval_links.first.max] end xml.ALIGNABLE_ANNOTATION({'ANNOTATION_ID'=>item.identifier,TIME_SLOT_REF1: tsref1,TIME_SLOT_REF2: tsref2}) do xml.ANNOTATION_VALUE item.data.string_value end end end end end puts "%s :: %s" % [tier["TIER_ID"], layer.identifier] end # @todo #257 a :: LINGUISTIC_TYPE types = doc.head[Mexico::FileSystem::Section::LAYER_TYPES] types.property_maps.each do |tp| puts tp.class.name puts tp.properties.size puts tp.property_maps.size puts "PROPMAP: %s" % tp.properties.collect{|m| "#{m.key}: #{m.value}"}.join(' ') puts "TIME ALIGNABLE? %s" % tp.has_key?('timeAlignable') puts "TIME ALIGNABLE? %s" % tp['timeAlignable'] # puts "TIME ALIGNABLE? %s" % tp['timeAlignable'].value time_alignable = tp.has_key?('timeAlignable') ? tp['timeAlignable'].value : nil graphic_references = tp.has_key?('graphicReferences') ? tp['graphicReferences'].value : nil controlled_vocabulary_ref = tp.has_key?('controlledVocabulary') ? tp['controlledVocabulary'].value : nil constraints = tp.has_key?('constraints') ? tp['constraints'].value : nil puts "TIME ALIGNABLE? %s" % time_alignable attrs = {LINGUISTIC_TYPE_ID: tp.key} attrs.merge!({ TIME_ALIGNABLE: time_alignable}) unless time_alignable.nil? attrs.merge!({ GRAPHIC_REFERENCES: graphic_references}) unless graphic_references.nil? attrs.merge!({CONTROLLED_VOCABULARY_REF: controlled_vocabulary_ref}) unless controlled_vocabulary_ref.nil? attrs.merge!({ CONSTRAINTS: constraints}) unless constraints.nil? xml.LINGUISTIC_TYPE(attrs) end # Exports ELAN constraints. # These are static, so the same for all documents. Mexico::Fiesta::Interfaces::ElanInterface::CONSTRAINTS.each do |conk,conv| xml.CONSTRAINT STEREOTYPE: conk, DESCRIPTION: conv end # CONTROLLED_VOCABULARY vocs = doc.head[Mexico::FileSystem::Section::VOCABULARIES] vocs.property_maps.each do |voc| metamap = voc['info'] datamap = voc['data'] xml.CONTROLLED_VOCABULARY({CV_ID: voc.key, DESCRIPTION: metamap['description'].value }) do datamap.property_maps.each do |cv_entry| xml.CV_ENTRY(cv_entry['value'].value, {DESCRIPTION: cv_entry['description'].value}) end end end end end io << builder.to_xml end
import(io=$stdin, params = {})
click to toggle source
# File lib/mexico/fiesta/interfaces/elan_interface.rb, line 45 def import(io=$stdin, params = {}) io.rewind encoding = params.has_key?(:encoding) ? params[:encoding] : 'UTF-8' xmldoc = ::Nokogiri::XML(io) document = Mexico::FileSystem::FiestaDocument.new sec_datamodel = document.head[Mexico::FileSystem::Section::DATA_MODEL] sec_datamodel.properties << Mexico::FileSystem::Property.new('sourceFormat', 'ELAN/EAF') sec_datamodel.properties << Mexico::FileSystem::Property.new('converterClass', 'Mexico::Fiesta::Interfaces::ElanInterface') # import attributes on root element xmldoc.xpath("/ANNOTATION_DOCUMENT").each do |annodoc| # atts: DATE, AUTHOR, VERSION, FORMAT # xmlns:xsi xsi:noNamespaceSchemaLocation puts xmldoc.namespaces sec_lifecycle = document.head[Mexico::FileSystem::Section::LIFECYCLE] date = DateTime.iso8601(annodoc['DATE']) sec_lifecycle.properties << Mexico::FileSystem::Property.new('creationDate', date) puts date.class.name sec_lifecycle.properties << Mexico::FileSystem::Property.new('authorKey', annodoc['AUTHOR']) sec_datamodel.properties << Mexico::FileSystem::Property.new('annotationToolVersion', annodoc['VERSION']) sec_datamodel.properties << Mexico::FileSystem::Property.new('annotationToolFormat', annodoc['FORMAT']) sec_datamodel.properties << Mexico::FileSystem::Property.new('xmlSchemaDeclaration', xmldoc.namespaces['xmlns:xsi']) sec_datamodel.properties << Mexico::FileSystem::Property.new('xsiNoNamespaceSchemaLocation', annodoc['xsi:noNamespaceSchemaLocation']) annodoc.attributes.each do |a| puts a end end # import heaader # - @MEDIA_FILE # - @TIME_UNITS # - MEDIA DESCRIPTOR # - PROPERTY/@name=lastUsedAnnotationId xmldoc.xpath("//HEADER").each do |header| header['MEDIA_FILE'] header['TIME_UNTIS'] document.head[Mexico::FileSystem::Section::MEDIA_CONTEXT].property_maps << Mexico::FileSystem::PropertyMap.new(key: 'referencedMedia') refMediaMap = document.head[Mexico::FileSystem::Section::MEDIA_CONTEXT].property_maps.find{|m| m.key=='referencedMedia'} datModelSec = document.head[Mexico::FileSystem::Section::DATA_MODEL] refMediaMap.properties << Mexico::FileSystem::Property.new('primaryMediaUrl', header['MEDIA_FILE']) # time units should go into another section! datModelSec.properties << Mexico::FileSystem::Property.new('timeUnits', header['TIME_UNITS']) header.xpath('./MEDIA_DESCRIPTOR').each_with_index do |mediafile,n| file_map = Mexico::FileSystem::PropertyMap.new(key: "#{n}") # prop = Mexico::FileSystem::Property.new('path', mediafile['RELATIVE_MEDIA_URL']) #puts " %s : %s" % [prop.key, prop.value] file_map.properties << Mexico::FileSystem::Property.new('path', mediafile['RELATIVE_MEDIA_URL']) file_map.properties << Mexico::FileSystem::Property.new('mimeType', mediafile['MIME_TYPE']) file_map.properties << Mexico::FileSystem::Property.new('uri', mediafile['MEDIA_URL']) refMediaMap.property_maps << file_map end header.xpath('./PROPERTY').each do |property| datModelSec.properties << Mexico::FileSystem::Property.new(property['NAME'], property.text) end end # actual data: # 1. create a standard timeline timeline = document.add_standard_timeline('s') # 2. find time slots, store timeslots = Hash.new xmldoc.xpath("//TIME_ORDER/TIME_SLOT").each do |t| slot = t["TIME_SLOT_ID"] val = t["TIME_VALUE"].to_i timeslots[slot] = val end # read cv entries # cvs = Hash.new xmldoc.xpath("//CONTROLLED_VOCABULARY").each do |c| container_map = Mexico::FileSystem::PropertyMap.new(key: c['CV_ID']) metamap = Mexico::FileSystem::PropertyMap.new(key: 'info') metamap.properties << Mexico::FileSystem::Property.new('identifier', c['CV_ID']) metamap.properties << Mexico::FileSystem::Property.new('description', c['DESCRIPTION']) valuemap = Mexico::FileSystem::PropertyMap.new(key: 'data') c.xpath("./CV_ENTRY").each do |entry| desc = entry['DESCRIPTION'] val = entry.text valprop = Mexico::FileSystem::PropertyMap.new valprop.properties << Mexico::FileSystem::Property.new('description', desc) valprop.properties << Mexico::FileSystem::Property.new('value', val) valuemap.property_maps << valprop end container_map.property_maps << metamap container_map.property_maps << valuemap document.head.section(Mexico::FileSystem::Section::VOCABULARIES).property_maps << container_map end # Read ling type entries lingTypes = Hash.new xmldoc.xpath("//LINGUISTIC_TYPE").each do |lingtype| cnstrs, cntvoc = nil cnstrs = lingtype['CONSTRAINTS'] unless lingtype['CONSTRAINTS'].nil? graphr = lingtype['GRAPHIC_REFERENCES']=="true" ? true : false lngtid = lingtype['LINGUISTIC_TYPE_ID'] timeal = lingtype['TIME_ALIGNABLE']=="true" ? true : false cntvoc = lingtype['CONTROLLED_VOCABULARY_REF'] unless lingtype['CONTROLLED_VOCABULARY_REF'].nil? lingTypeEntry = { constraints: cnstrs, graphicReferences: graphr, timeAlignable: timeal, controlledVocabulary: cntvoc } lingTypes[lngtid] = lingTypeEntry end lingTypes.each do |key,val| sec = document.head[Mexico::FileSystem::Section::LAYER_TYPES] pmap = Mexico::FileSystem::PropertyMap.new(key: key) val.each do |skey,sval| unless sval.nil? pmap.properties << Mexico::FileSystem::Property::new(skey,sval) end end sec.property_maps << pmap end # create temporary hash for storage of layers layerHash = Hash.new xmldoc.xpath("//TIER").each do |t| # @todo (DEFAULT_LOCALE="en") (LINGUISTIC_TYPE_REF="default-lt") tierID = t["TIER_ID"] puts 'Read layers, %s' % tierID layer = document.add_layer(identifier: tierID, name: tierID) #layer.name = tierID #layer.id = ToE::Util::to_xml_id(tierID) layer.add_property Mexico::FileSystem::Property.new('elanTierType', t['LINGUISTIC_TYPE_REF']) if t.attributes.has_key?('ANNOTATOR') layer.add_property Mexico::FileSystem::Property.new('annotator', t['ANNOTATOR']) end # document.layers << layer puts "Attributes: %s" % t.attributes.to_s puts "Parent ref? %s" % t.attributes.has_key?('PARENT_REF') if t.attributes.has_key?('PARENT_REF') # puts "TATT: %s" % t['PARENT_REF'] # document.layers.each do |l| # puts "LAYER %s %s" % [l.identifier, l.name] # end puts 'ID of parent layer %s' % t['PARENT_REF'] puts 'ID, xmlified %s' % Mexico::Util::to_xml_id(t['PARENT_REF']) puts 'available ids: %s' % (document.layers.collect{|l| l.identifier}).join(' ') parent_layer = document.get_layer_by_id(Mexico::Util::to_xml_id(t['PARENT_REF'])) puts "Found parent layer: %s" % parent_layer if parent_layer layer_connector = Mexico::FileSystem::LayerConnector.new parent_layer, layer, { identifier: "#{parent_layer.identifier}_TO_#{layer.identifier}", role: 'PARENT_CHILD', document: document } document.add_layer_connector(layer_connector) end end layerHash[tierID] = layer t.xpath("./ANNOTATION").each do |annoContainer| annoContainer.xpath("child::*").each do |anno| annoVal = anno.xpath("./ANNOTATION_VALUE/text()").first.to_s i = document.add_item identifier: anno["ANNOTATION_ID"] if anno.name == "ALIGNABLE_ANNOTATION" # puts anno.xpath("./ANNOTATION_VALUE/text()").first if annoVal!=nil # && annoVal.strip != "" i.add_interval_link Mexico::FileSystem::IntervalLink.new(identifier: "#{i.identifier}-int", min: timeslots[anno["TIME_SLOT_REF1"]].to_f, max: timeslots[anno["TIME_SLOT_REF2"]].to_f, target_object: timeline) end end if anno.name == "REF_ANNOTATION" #puts pp anno #puts document.items.collect{|x| x.identifier}.join(', ') #puts '-'*80 i.add_item_link Mexico::FileSystem::ItemLink.new(identifier: "#{i.identifier}-itm", target_object: document.items({identifier: anno["ANNOTATION_REF"]}).first, role: Mexico::FileSystem::ItemLink::ROLE_PARENT) # @todo add previous anno if present if anno.has_attribute?('PREVIOUS_ANNOTATION') i.add_item_link Mexico::FileSystem::ItemLink.new(identifier: "#{i.identifier}-pre", target_object: document.items({identifier: anno["PREVIOUS_ANNOTATION"]}).first, role: Mexico::FileSystem::ItemLink::ROLE_PREDECESSOR) end end i.add_layer_link Mexico::FileSystem::LayerLink.new(identifier: "#{i.identifier}-lay", target_object: layer) i.data = Mexico::FileSystem::Data.new(string_value: annoVal) end end #if t["PARENT_REF"] # parent = layerHash[t["PARENT_REF"]] # if parent # document.layer_connectors << Mexico::FileSystem::LayerConnector.new(parent, layer) # # structure.connect(parent, layer) # end #end end puts 'instance method over' document end