class MetsConverter::YamlBuilder
Attributes
document[R]
options[R]
pages[R]
Public Class Methods
new(mets, **options)
click to toggle source
# File lib/mets_converter/yaml_builder.rb, line 9 def initialize(mets, **options) @document = mets.document @pages = mets.pages @options = options end
Public Instance Methods
build()
click to toggle source
# File lib/mets_converter/yaml_builder.rb, line 15 def build # Descriptive and technical information # Capture Date # <metsHdr CREATEDATE="2015-07-01T15:26:39" RECORDSTATUS="Complete"> yaml = "capture_date: #{document.xpath("//xmlns:metsHdr")[0].attr("CREATEDATE")}-06:00\n" # Scanner Make and Model if options[:suprascan] yaml += "scanner_make: SupraScan\n" yaml += "scanner_model: Quartz A1\n" else yaml += "scanner_make: Kirtas\n" yaml += "scanner_model: APT 1200\n" end # Scanner User yaml += "scanner_user: \"Northwestern University Library: Repository & Digital Curation\"\n" # Resolution yaml += "contone_resolution_dpi: #{options[:resolution] || 300}\n" # Image Compression Date yaml += "image_compression_date: #{document.xpath("//xmlns:metsHdr")[0].attr("CREATEDATE")}-06:00\n" # Image Compression Agent yaml += "image_compression_agent: northwestern\n" # Image Compression Tool yaml += "image_compression_tool: [\"LIMB v3.1.0.0\"]\n" # Scanning Order if options[:scanning_order_rtl] yaml += "scanning_order: right-to-left\n" else yaml += "scanning_order: left-to-right\n" end # Reading Order if options[:reading_order_rtl] yaml += "reading_order: right-to-left\n" else yaml += "reading_order: left-to-right\n" yaml += "pagedata:\n" end # File List # Loop through pages within logical structMap pages.each do |element| # Store the fileid for the jp2 file_id = element.xpath('./xmlns:fptr[starts-with(@FILEID, "JP2")]')[0]["FILEID"] # Store the jp2 filename filename = find_filename_by_file_id(file_id) # Since the yaml flattens out the xml structure, # the first child of each parent gets special treatment (of course) # i.e. labels for covers, titles, chapters, etc. if element == element.parent.first_element_child case when element.parent["LABEL"] == "Cover" && element.parent["TYPE"] == "cover" && element.parent == document.search('structMap[@TYPE="logical"]//div[@TYPE="cover"]').first if element["ORDERLABEL"].empty? line = filename + ": { label: \"FRONT_COVER\" }\n" else line = filename + ": { orderlabel: \"#{element["ORDERLABEL"]}\", label: \"FRONT_COVER\" }\n" end when element.parent["LABEL"] == "Front Matter" next if element["ORDERLABEL"].empty? line = filename + ": { orderlabel: \"#{element["ORDERLABEL"]}\" }\n" when element.parent["LABEL"] == "Cover" && element.parent["TYPE"] == "appendix" next if element["ORDERLABEL"].empty? line = filename + ": { orderlabel: \"#{element["ORDERLABEL"]}\" }\n" when element.parent["LABEL"] == "Title" if element["ORDERLABEL"].empty? line = filename + ": { label: \"TITLE\" }\n" else line = filename + ": { orderlabel: \"#{element["ORDERLABEL"]}\", label: \"TITLE\" }\n" end when element.parent["LABEL"] == "Contents" if element["ORDERLABEL"].empty? line = filename + ": { label: \"TABLE_OF_CONTENTS\" }\n" else line = filename + ": { orderlabel: \"#{element["ORDERLABEL"]}\", label: \"TABLE_OF_CONTENTS\" }\n" end when element.parent["LABEL"] == "Preface" if element["ORDERLABEL"].empty? line = filename + ": { label: \"PREFACE\" }\n" else line = filename + ": { orderlabel: \"#{element["ORDERLABEL"]}\", label: \"PREFACE\" }\n" end # First page within the body, can be within a div with label attribute "Introduction" or "Chapter" when element == document.at('structMap[@TYPE="logical"]//div[@TYPE="body"]/div[1]/div[1]') && (element.parent["LABEL"] == "Introduction" || element.parent["LABEL"].start_with?("Chapter")) if element["ORDERLABEL"].empty? line = filename + ": { label: \"FIRST_CONTENT_CHAPTER_START\" }\n" else line = filename + ": { orderlabel: \"#{element["ORDERLABEL"]}\", label: \"FIRST_CONTENT_CHAPTER_START\" }\n" end when element.parent["LABEL"] == "Back Matter" next if element["ORDERLABEL"].empty? line = filename + ": { orderlabel: \"#{element["ORDERLABEL"]}\" }\n" when element.parent["LABEL"].start_with?("Chapter") || element.parent["LABEL"] == "Appendix" if element["ORDERLABEL"].empty? line = filename + ": { label: \"CHAPTER_START\" }\n" else line = filename + ": { orderlabel: \"#{element["ORDERLABEL"]}\", label: \"CHAPTER_START\" }\n" end when element.parent["LABEL"] == "Notes" || element.parent["LABEL"] == "Bibliography" if element["ORDERLABEL"].empty? line = filename + ": { label: \"REFERENCES\" }\n" else line = filename + ": { orderlabel: \"#{element["ORDERLABEL"]}\", label: \"REFERENCES\" }\n" end when element.parent["LABEL"] == "Index" if element["ORDERLABEL"].empty? line = filename + ": { label: \"INDEX\" }\n" else line = filename + ": { orderlabel: \"#{element["ORDERLABEL"]}\", label: \"INDEX\" }\n" end when element.parent["LABEL"] == "Cover" && element.parent["TYPE"] == "cover" && element.parent == document.search('structMap[@TYPE="logical"]//div[@TYPE="cover"]').last if element["ORDERLABEL"].empty? line = filename + ": { label: \"BACK_COVER\" }\n" else line = filename + ": { orderlabel: \"#{element["ORDERLABEL"]}\", label: \"BACK_COVER\" }\n" end end else # remaining pages # skip pages that don't have page numbers (stored in "ORDERLABEL" attribute) next if element["ORDERLABEL"].empty? line = filename + ": { orderlabel: \"#{element["ORDERLABEL"]}\" }\n" end yaml += line.indent(4) if line end yaml end
find_filename_by_file_id(id)
click to toggle source
# File lib/mets_converter/yaml_builder.rb, line 143 def find_filename_by_file_id(id) document.xpath("//xmlns:file[@ID=\"#{id}\"]/xmlns:FLocat")[0]['xlink:href'][7..-1] end