class Libis::Ingester::ThesisToledoCollector

Protected Instance Methods

process(item) click to toggle source

Process the input directory on the FTP server for new material @param [Libis::Ingester::Run] item

# File lib/libis/ingester/tasks/thesis_toledo_collector.rb, line 40
def process(item)
  @work_dir = item.work_dir

  # sanity checks
  location = parameter(:location)
  unless Dir.exists?(location)
    error 'Location path %s does not exist.', location
    raise Libis::WorkflowError, 'Directory not found'
  end

  parameter(:value_files).each_with_index do |csv_file, index|
    process_csv(File.join(location, csv_file), parameter(:access_rights)[index])
  end

end

Private Instance Methods

add_node(xml, node_name) { || ... } click to toggle source
# File lib/libis/ingester/tasks/thesis_toledo_collector.rb, line 153
def add_node(xml, node_name)
  value = yield
  node_name = "#{node_name.to_s}=" unless node_name.to_s[-1] == '!'
  xml.send(node_name, value)
rescue
  warn "Could not create metadata field: #{node_name} for #{xml.title.text}"
end
create_metadata(row) click to toggle source

@param [CSV::Row] row noinspection RubyResolve

# File lib/libis/ingester/tasks/thesis_toledo_collector.rb, line 118
def create_metadata(row)
  xml = ::Libis::Metadata::DublinCoreRecord.new
  xml.identifier = row['<dc:identifier>'].strip
  xml.title = row['<dc:title>'].gsub(/""/, '"').strip

  row.headers.select { |h| h =~ /<dc:creator>/ }.each do |i|
    value = row[i]
    next if value.blank?
    add_node(xml, :creator!) { value }
  end

  xml.description = row['<dc:description>']
  xml.publisher = row['<dc:publisher>']

  row.headers.select { |h| h =~ /<dc:contributor>/ }.each do |i|
    value = row[i]
    next if value.blank?
    add_node(xml, :contributor!) { value }
  end

  xml.source = row['<dc:source>']
  xml.rights = row['<dc:rights>']
  xml.date = row['date']

  row.headers.select { |h| h =~ /<dc:type>/ }.each do |i|
    value = row[i]
    next if value.blank?
    add_node(xml, :type!) { value }
  end

  xml.abstract = row['abstract']

  xml
end
process_csv(csv_file, ar) click to toggle source

@param [String] csv_file path to the CSV file @param [String] ar Access Right name to use

# File lib/libis/ingester/tasks/thesis_toledo_collector.rb, line 59
def process_csv(csv_file, ar)
  files_csv = CSV.open(csv_file, headers: true, skip_blanks: true)
  file_list = files_csv.each

  ie_csv = CSV.open(csv_file, headers: true, skip_blanks: true)
  ie_list = ie_csv.each

  while (row = ie_list.find { |r| r['entity type'] == 'COMPLEX' })
    catch :error do
      vpid = row['vpid']
      files = file_list.rewind.select { |r| r['relation'] == vpid }
      files.each do |f|
        f[:path] = File.join(parameter(:unzip_dir), f['file name old'])
        unless File.exists?(f[:path])
          error 'File %s could not be found. Thesis %s skipped.', f[:path], row['label']
          throw :error
        end
        f[:order] = 0
        name = f['file name'].downcase
        if File.extname(name) == '.pdf'
          f[:order] += 1
          f[:order] += name.scan(/eindwerk|bachelorproef|masterproef/).count
        end
      end
      files.sort! { |f1, f2| f2[:order] <=> f1[:order] }
      ie_item = Libis::Ingester::IntellectualEntity.new
      ie_item.name = row['label'].gsub(/[^0-9A-Za-z._]/, '_')
      ie_item.label = row['label']
      ie_item.properties['entity_type'] = parameter(:entity_type)
      ie_item.properties['access_right'] = ar
      ie_item.properties['vpid'] = vpid
      ie_item.properties['user_a'] = 'Ingest from Toledo'
      ie_item.properties['user_b'] = row['embargo opmerking']
      ie_item.properties['user_c'] = parameter(:user_c)

      # Build Dublin Core record from the rest of the XML
      # noinspection RubyResolve
      ie_item.metadata_record_attributes = {
          format: 'DC',
          data: create_metadata(row).to_xml
      }
      files.each do |f|
        file_item = Libis::Ingester::FileItem.new
        file_item.filename = f[:path]
        file_item.name = f['file name']
        file_item.label = f['label']
        ie_item << file_item
      end
      self.workitem << ie_item
      ie_item.save!
    end
  end

  files_csv.close
  ie_csv.close
end