class Libis::Ingester::AcpParser
Attributes
element_path[RW]
element_stack[RW]
Protected Instance Methods
parse_xml(xml_file, item)
click to toggle source
# File lib/libis/ingester/tasks/acp_parser.rb, line 53 def parse_xml(xml_file, item) element_stack = [] # stack of all elements traversed element_path = [] # stack of relevant elements traversed name_path = [] # stack of relevant names ie = nil Libis::Ingester::Base::XmlParser.new(xml_file) do |function, *args| case function when :start_element_namespace element = args[0] element_stack.push(element) if element_stack[-2] =~ CONTAINER_ELEMENT element_path.push(element) if element =~ PATH_ELEMENT attrs = args[1].reduce({}) {|r, x| r[x[0]] = x[3]; r} child_name = attrs['childName']&.gsub(/^cm:/, '') child_name = Nokogiri::XML.fragment(child_name) case element when NAME_ELEMENT name_path.push(child_name) when IE_ELEMENT ie = {} ie[:name] = child_name if child_name ie[:path] = name_path.join('/') else # nothing end end when :end_element_namespace element = args[0] raise WorkflowAbort, "Error processing XML: unexpected closing tag" unless element == element_stack.pop if element_stack[-1] =~ CONTAINER_ELEMENT element_path.pop if element =~ PATH_ELEMENT case element when NAME_ELEMENT name_path.pop when IE_ELEMENT create_ie ie, item if ie ie = nil else # nothing end end when :characters string = args[0] if ie case element_path[-1] when IE_ELEMENT case element_stack[-1] when 'name' ie[:name] = string when 'isadMedium' if 'Digitaal' != string ie = nil end when 'isadOrigineel' ie[:original] = parse_content_string(string) when 'content' case element_path[-1] when STUK_ELEMENT ie[:derived] = parse_content_string(string) when BACKLOG_ELEMENT ie[:original] = parse_content_string(string) else # nothing end when 'isadRaadpleegformaatNaam' ie[:deriv_name] = string when 'node-dbid' ie[:vp_dbid] = string when 'node-uuid' ie[:vp_uuid] = string when 'isadAanmaakDatum' ie[:created] = DateTime.parse(string) when 'created' ie[:created] ||= DateTime.parse(string) when 'isadTitel' ie[:label] = string when 'titel' ie[:label] = name_path[-1] + ' - ' + string when 'checksum' ie[:checksum] = string when 'isadReferentie' ie[:refcode] = string else # nothing end when THUMBNAIL_ELEMENT case element_stack[-1] when 'content' ie[:thumbnail] = parse_content_string(string) else # nothing end else # nothing end else # ie case element_stack[-1] when 'exportOf' name_path = string.gsub('/cm:', '/').gsub(/_x[^_]*_/) do |x| ["0#{x.tr('_', '')}".to_i(16)].pack('U') end.split('/')[3..-1] when 'name' if element_path[-1] =~ NAME_ELEMENT name_path.pop name_path.push(string) end else # nothing end end # if ie else # nothinh end # function end end
process(item)
click to toggle source
@param [Libis::Ingester::Run] item
# File lib/libis/ingester/tasks/acp_parser.rb, line 36 def process(item) unless Dir.exist?(parameter(:acp_dir)) raise Libis::WorkflowAbort, "ACP directory '#{parameter(:acp_dir)}' cannot be found." end xml_files = Dir.glob(File.join(parameter(:acp_dir), '*.xml')) unless xml_files.count == 1 raise Libis::WorkflowAbort, "ACP directory should contain only 1 XML file." end xml_file = xml_files.first item.options[:export_attachments] = xml_file parse_xml(xml_file, item) end
Private Instance Methods
create_file(source, size, target, date, checksum = nil)
click to toggle source
# File lib/libis/ingester/tasks/acp_parser.rb, line 178 def create_file(source, size, target, date, checksum = nil) return nil unless source file_name = File.join(parameter(:acp_dir), source) unless File.exist?(file_name) error "Could not find file '#{source}' in the ACP directory" return nil end File.utime(date.to_time, date.to_time, file_name) file_item = Libis::Ingester::FileItem.new file_item.filename = file_name unless file_item.properties['size'] == size error "File #{source} size does not match metadata info [#{file_item.properties['size']} vs #{size}]" return nil end unless file_item.properties['checksum_md5'] == checksum error "File #{source} checksum does not match metadata info [#{file_item.properties['checksum_md5']} vs #{checksum}]" return nil end if checksum file_item.properties['access_time'] = date file_item.properties['modification_time'] = date file_item.properties['creation_time'] = date file_item.properties['original_path'] = target file_item[:label] = File.basename(target) file_item[:group_id] = 1 file_item.save! file_item end
create_ie(data, parent)
click to toggle source
# File lib/libis/ingester/tasks/acp_parser.rb, line 214 def create_ie(data, parent) raise WorkflowError, "Missing original file information for IE '#{ie_info(data)}'" unless data[:original] unless data[:original][:size].to_i > 0 error "Original contains file with size 0. File '#{data[:original][:file]}' will be skipped and no IE will be created." return end # create IE ie = Libis::Ingester::IntellectualEntity.new ie.name = data[:name] ie.label = data[:label] || (data[:path].split('/')[-1] + ' - ' + data[:name]) ie.parent = parent ie.properties['path'] = data[:path] if data[:path] ie.properties['vp_dbid'] = data[:vp_dbid] if data[:vp_dbid] ie.properties['vp_uuid'] = data[:vp_uuid] if data[:vp_uuid] ie.properties['refcode'] = data[:refcode] if data[:refcode] record = MetadataRecord.new record.format = 'DC' dc = Libis::Metadata::DublinCoreRecord.new dc.title = ie.label # noinspection RubyResolve dc.identifier! "refcode:#{data[:refcode]}" if data[:refcode] # noinspection RubyResolve dc.identifier! "dbid:#{data[:vp_dbid]}" if data[:vp_dbid] # noinspection RubyResolve dc.identifier! "uuid:#{data[:vp_uuid]}" if data[:vp_uuid] # noinspection RubyResolve dc.isPartOf = data[:path] if data[:path] record.data = dc.to_xml ie.metadata_record = record debug "Created IE for '#{ie_info(data)}'" ie.save! created = data[:created] if (original = create_file(data[:original][:file], data[:original][:size], data[:name], created, data[:checksum])) original.properties['rep_type'] = 'original' original.save! ie << original ie.save! debug "Added original file to IE", ie else raise WorkflowError, "Failed to create original FileItem for IE '#{ie_info(data)}'" end if (derived = create_file(data[:derived][:file], data[:derived][:size], (data[:deriv_name] || data[:name]), created)) derived.properties['rep_type'] = 'derived' derived.save! ie << derived ie.save! debug "Added derived file to IE", ie end if data[:derived] && !(data[:derived][:mime] == data[:original][:mime] && data[:derived][:size] == data[:original][:size]) && data[:derived][:size] > 0 if data[:thumbnail]&.any? fname = "#{File.basename data[:name]}#{File.extname data[:thumbnail][:file]}" if (thumbnail = create_file(data[:thumbnail][:file], data[:thumbnail][:size], fname, created)) thumbnail.properties['rep_type'] = 'thumbnail' thumbnail.save! ie << thumbnail ie.save! debug "Added thumbnail file to IE", ie end end end
ie_info(ie)
click to toggle source
# File lib/libis/ingester/tasks/acp_parser.rb, line 283 def ie_info(ie) "#{ie[:name]} [#{ie[:vp_dbid]&.to_s || ie[:vp_uuid]}]" end
parse_content_string(string)
click to toggle source
# File lib/libis/ingester/tasks/acp_parser.rb, line 164 def parse_content_string(string) result = {} if string =~ /contentUrl=([^|]*)/ result[:file] = $1 end if string =~ /mimetype=([^|]*)/ result[:mime] = $1 end if string =~ /size=([^|]*)/ result[:size] = $1.to_i end result end