class Libis::Ingester::FormatValidator
Protected Instance Methods
apply_formats(item, format_list)
click to toggle source
# File lib/libis/ingester/tasks/format_validator.rb, line 102 def apply_formats(item, format_list) if item.is_a? Libis::Ingester::FileItem format = format_list[item.fullpath] if format.empty? warn "Could not determine MIME type. Using default 'application/octet-stream'.", item else debug "MIME type '#{format[:mimetype]}' detected.", item end item.properties['mimetype'] = format[:mimetype] || 'application/octet-stream' item.properties['puid'] = format[:puid] || 'fmt/unknown' item.properties['format_identification'] = format else item.each do |subitem| apply_formats(subitem, format_list) end end end
collect_filepaths(item)
click to toggle source
# File lib/libis/ingester/tasks/format_validator.rb, line 122 def collect_filepaths(item) return File.absolute_path(item.fullpath) if item.is_a? Libis::Ingester::FileItem item.map do |subitem| collect_filepaths(subitem) end.flatten.compact end
process(item)
click to toggle source
# File lib/libis/ingester/tasks/format_validator.rb, line 39 def process(item) msg = case item.properties['puid'] when 'fmt/494' 'Microsoft Office Encrypted Document' when 'fmt/754' 'password protected Microsoft Word Document' when 'fmt/755' 'password protected Microsoft Word Document Template' else nil end if msg case parameter(:encrypted_doc) when 'FAIL' raise Libis::WorkflowError, "Found #{msg}: #{item.filepath}" when 'WARN' warn "Found #{msg}: #{item.filepath}" when 'DUMMY' replace_with_dummy(item, "File <i>#{item.filepath}</i> is a #{msg}") warn "#{msg} '#{item.filepath}' was replaced with a dummy file" else raise Libis::WorkflowAbort, "Unknown value for encrypted_doc parameter encountered." end end if item.properties['format_ext_mismatch'] format_type = item.properties[:format_type] extensions = format_type ? Libis::Format::TypeDatabase.type_extentions(format_type) : [] message = 'Found document with wrong extension `%s`; format is %s%s, puid: %s, valid extensions: %s' % [ File.extname(item.filepath), item.properties['format_name'], item.properties['format_version'].blank? ? '' : " (#{item.properties['format_version']})", item.properties['puid'], extensions.map {|x| ".#{x}"}.join(' ') ] case parameter(:ext_mismatch) when 'FAIL' raise Libis::WorkflowError, message when 'WARN' warn message when 'FIX' warn message if (ext = extensions.first) old_name = item.properties['original_path'] old_name ||= File.basename(item.properties['filename']) new_name = File.join(File.dirname(old_name), "#{File.basename(old_name, '.*')}.#{ext}") item.properties['original_path'] = new_name item.save! warn "File will be renamed to '#{File.basename(new_name)}' in the repository." else message = 'Could not fix extenstion of file %s as no extension for the format (%s - %s - %s) is known in the type database' % [item.filepath, item.properties['puid'], item.properties['format_name'], item.properties['format_version']] raise Libis::WorkflowError, message end else warn message end end end
replace_with_dummy(item, message)
click to toggle source
# File lib/libis/ingester/tasks/format_validator.rb, line 129 def replace_with_dummy(item, message) work_dir = File.join(item.get_run.work_dir, item.id) FileUtils.mkpath(work_dir) unless Dir.exists?(work_dir) file_path = File.join(work_dir, "#{File.basename(item.filename, '.*')}.docx") html = '<html><head></head><body><h1/><h1>%s</h1><h1/>%s</body></html>' % [ 'The preservation system rejected this file for the following reason:', "File <i>#{item.filepath}</i> is a <b>#{message}</b>" ] Htmltoword.config.custom_templates_path = File.join(Libis::Ingester::ROOT_DIR, 'config') Htmltoword::Document.create_and_save(html, file_path, 'Warning') item.filename = file_path result = Libis::Format::Identifier.get(item.fullpath) || {} process_messages(result, item) apply_formats(item, result[:formats]) end