class Origami::PDF
Main class representing a PDF
file and its inner contents. A PDF
file contains a set of Revision
.
Constants
- AuthEvent
- BaseVersion
- ByteRange
- CF
- CFM
- Catalog
- Cert
- ContactInfo
- Contents
- Data
Class representing the
Catalog
Dictionary
of aPDF
file.- Encrypt
- EncryptMetadata
- ExtensionLevel
- Filter
- ID
- Length
- Location
- Metadata
- Name
- OpenAction
- P
- Pages
- Parent
- Prev
- R
- Reason
- Rect
- Reference
- Root
- SigFlags
- Size
- StmF
- SubFilter
- TransformMethod
- TransformParams
- Type
- UR3
- V
- W
- WC
- WP
- XFA
- XRefStm
Attributes
Document header and revisions.
Document header and revisions.
Public Class Methods
Creates a new PDF
and saves it. If a block is passed, the PDF
instance can be processed before saving.
# File lib/origami/pdf.rb, line 143 def create(output, options = {}) pdf = PDF.new yield(pdf) if block_given? pdf.save(output, options) end
Creates a new PDF
instance.
- parser
-
The
Parser
object creating the document. If none is specified, some default structures are automatically created to get a minimal working document.
# File lib/origami/pdf.rb, line 156 def initialize(parser = nil) @header = PDF::Header.new @revisions = [] @parser = parser @loaded = false add_new_revision @revisions.first.trailer = Trailer.new init if parser.nil? end
Reads and parses a PDF
file from disk.
# File lib/origami/pdf.rb, line 126 def read(path, options = {}) path = File.expand_path(path) if path.is_a?(::String) lazy = options[:lazy] if lazy parser_class = PDF::LazyParser else parser_class = PDF::LinearParser end parser_class.new(options).parse(path) end
Public Instance Methods
Adds a new object to the PDF
file. If this object has no version number, then a new one will be automatically computed and assignated to him.
It returns a Reference
to this Object
.
- object
-
The object to add.
# File lib/origami/pdf.rb, line 317 def <<(object) owner = object.document # # Does object belongs to another PDF ? # if owner and not owner.equal?(self) import object else add_to_revision(object, @revisions.last) end end
Returns the current Catalog
Dictionary
.
# File lib/origami/catalog.rb, line 40 def Catalog cat = trailer_key(:Root) raise InvalidPDFError, "Broken catalog" unless cat.is_a?(Catalog) cat end
Sets the current Catalog
Dictionary
.
# File lib/origami/catalog.rb, line 50 def Catalog=(cat) raise TypeError, "Must be a Catalog object" unless cat.is_a?(Catalog) delete_object(@revisions.last.trailer[:Root]) if @revisions.last.trailer[:Root] @revisions.last.trailer.Root = self << cat end
Add a field to the Acrobat form.
- field
-
The
Field
to add.
# File lib/origami/acroform.rb, line 46 def add_fields(*fields) raise TypeError, "Expected Field arguments" unless fields.all? { |f| f.is_a?(Field) } self.Catalog.AcroForm ||= InteractiveForm.new.set_indirect(true) self.Catalog.AcroForm.Fields ||= [] self.Catalog.AcroForm.Fields.concat(fields) fields.each do |field| field.set_indirect(true) end self end
Ends the current Revision
, and starts a new one.
# File lib/origami/pdf.rb, line 364 def add_new_revision root = @revisions.last.trailer[:Root] unless @revisions.empty? @revisions << Revision.new(self) @revisions.last.trailer = Trailer.new @revisions.last.trailer.Root = root self end
Adds a new object to a specific revision. If this object has no version number, then a new one will be automatically computed and assignated to him.
It returns a Reference
to this Object
.
- object
-
The object to add.
- revision
-
The revision to add the object to.
# File lib/origami/pdf.rb, line 350 def add_to_revision(object, revision) object.set_indirect(true) object.set_document(self) object.no, object.generation = allocate_new_object_number if object.no == 0 revision.body[object.reference] = object object.reference end
Returns a new number/generation for future object.
# File lib/origami/pdf.rb, line 519 def allocate_new_object_number last_object = self.each_object(compressed: true).max_by {|object| object.no } if last_object.nil? no = 1 else no = last_object.no + 1 end [ no, 0 ] end
Appends a page or list of pages to the end of the page tree.
- page
-
The page to append to the document. Creates a new
Page
if not specified.
Pass the Page
object if a block is present.
# File lib/origami/page.rb, line 31 def append_page(page = Page.new) init_page_tree treeroot = self.Catalog.Pages treeroot.Kids ||= [] #:nodoc: treeroot.Kids.push(page) treeroot.Count ||= 0 treeroot.Count += 1 page.Parent = treeroot yield(page) if block_given? self end
Attachs an embedded file to the PDF
.
- path
-
The path to the file to attach.
- register
-
Whether the file shall be registered in the name directory.
- name
-
The embedded file name of the attachment.
- filter
-
The stream filter used to store the file contents.
# File lib/origami/filespec.rb, line 32 def attach_file(path, register: true, name: nil, filter: :FlateDecode) if path.is_a? FileSpec filespec = path name ||= '' else if path.respond_to?(:read) data = path.read.force_encoding('binary') name ||= '' else data = File.binread(File.expand_path(path)) name ||= File.basename(path) end fstream = EmbeddedFileStream.new fstream.data = data fstream.Filter = filter filespec = FileSpec.new(:F => fstream) end fspec = FileSpec.new.setType(:Filespec).setF(name.dup).setEF(filespec) self.register( Names::EMBEDDED_FILES, name.dup, fspec ) if register fspec end
Creates a new AcroForm with specified fields.
# File lib/origami/acroform.rb, line 35 def create_form(*fields) acroform = self.Catalog.AcroForm ||= InteractiveForm.new.set_indirect(true) self.add_fields(*fields) acroform end
Modifies or creates a metadata stream.
# File lib/origami/metadata.rb, line 88 def create_metadata(info = {}) skeleton = <<-XMP <?packet begin="\xef\xbb\xbf" id="W5M0MpCehiHzreSzNTczkc9d"?> <x:xmpmeta xmlns:x="adobe:ns:meta/"> <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <rdf:Description rdf:about="" xmlns:pdf="http://ns.adobe.com/pdf/1.3/"> </rdf:Description> </rdf:RDF> </x:xmpmeta> <?xpacket end="w"?> XMP xml = if self.Catalog.Metadata.is_a?(Stream) self.Catalog.Metadata.data else skeleton end doc = REXML::Document.new(xml) desc = doc.elements['*/*/rdf:Description'] info.each do |name, value| elt = REXML::Element.new "pdf:#{name}" elt.text = value desc.elements << elt end xml = ""; doc.write(xml, 4) if self.Catalog.Metadata.is_a?(Stream) self.Catalog.Metadata.data = xml else self.Catalog.Metadata = Stream.new(xml) end self.Catalog.Metadata end
# File lib/origami/xfa.rb, line 50 def create_xfa_form(xdp, *fields) acroform = create_form(*fields) acroform.XFA = XFAStream.new(xdp, :Filter => :FlateDecode) acroform end
# File lib/origami/metadata.rb, line 46 def creation_date; get_document_info_field(:CreationDate) end
# File lib/origami/metadata.rb, line 44 def creator; get_document_info_field(:Creator) end
Decrypts the current document (only RC4 40..128 bits).
- passwd
-
The password to decrypt the document.
# File lib/origami/encryption.rb, line 50 def decrypt(passwd = "") raise EncryptionError, "PDF is not encrypted" unless self.encrypted? # Turn the encryption dictionary into a standard encryption dictionary. handler = trailer_key(:Encrypt) handler = self.cast_object(handler.reference, Encryption::Standard::Dictionary) unless handler.Filter == :Standard raise EncryptionNotSupportedError, "Unknown security handler : '#{handler.Filter}'" end doc_id = trailer_key(:ID) unless doc_id.is_a?(Array) raise EncryptionError, "Document ID was not found or is invalid" unless handler.V.to_i == 5 else doc_id = doc_id.first end encryption_key = handler.derive_encryption_key(passwd, doc_id) self.extend(Encryption::EncryptedDocument) self.encryption_handler = handler self.encryption_key = encryption_key decrypt_objects self end
Remove an object.
# File lib/origami/pdf.rb, line 401 def delete_object(no, generation = 0) case no when Reference target = no when ::Integer target = Reference.new(no, generation) else raise TypeError, "Invalid parameter type : #{no.class}" end @revisions.each do |rev| rev.body.delete(target) end end
Tries to delinearize the document if it has been linearized. This operation is xrefs destructive, should be fixed in the future to merge tables.
# File lib/origami/linearization.rb, line 45 def delinearize! raise LinearizationError, 'Not a linearized document' unless self.linearized? # # Saves the first trailer. # prev_trailer = @revisions.first.trailer linear_dict = @revisions.first.objects.min_by{|obj| obj.file_offset} # # Removes hint streams used by linearization. # delete_hint_streams(linear_dict) # # Update the trailer. # last_trailer = (@revisions.last.trailer ||= Trailer.new) last_trailer.dictionary ||= Dictionary.new if prev_trailer.dictionary? last_trailer.dictionary = last_trailer.dictionary.merge(prev_trailer.dictionary) else xrefstm = @revisions.last.xrefstm raise LinearizationError, 'Cannot find trailer info while delinearizing document' unless xrefstm.is_a?(XRefStream) last_trailer.dictionary[:Root] = xrefstm[:Root] last_trailer.dictionary[:Encrypt] = xrefstm[:Encrypt] last_trailer.dictionary[:Info] = xrefstm[:Info] last_trailer.dictionary[:ID] = xrefstm[:ID] end # # Remove all xrefs. # Fix: Should be merged instead. # remove_xrefs # # Remove the linearization revision. # @revisions.first.body.delete(linear_dict.reference) @revisions.last.body.merge! @revisions.first.body remove_revision(0) self end
Returns the document information dictionary if present.
# File lib/origami/metadata.rb, line 36 def document_info trailer_key :Info end
Returns true if the document has a document information dictionary.
# File lib/origami/metadata.rb, line 29 def document_info? trailer_key? :Info end
Iterates over each Acroform Field
.
# File lib/origami/acroform.rb, line 68 def each_field return enum_for(__method__) do if self.form? and self.Catalog.AcroForm.Fields.is_a?(Array) self.Catalog.AcroForm.Fields.length else 0 end end unless block_given? if self.form? and self.Catalog.AcroForm.Fields.is_a?(Array) self.Catalog.AcroForm.Fields.each do |field| yield(field.solve) end end end
Returns an Enumerator of all names under the specified root name directory.
# File lib/origami/catalog.rb, line 157 def each_name(root, &block) return enum_for(__method__, root) unless block_given? names_root = get_names_root(root) return if names_root.nil? names_from_node(names_root, &block) self end
Calls block for each named destination.
# File lib/origami/destinations.rb, line 34 def each_named_dest(&b) each_name(Names::DESTINATIONS, &b) end
Calls block for each named embedded file.
# File lib/origami/filespec.rb, line 74 def each_named_embedded_file(&b) each_name(Names::EMBEDDED_FILES, &b) end
Calls block for each named page.
# File lib/origami/page.rb, line 105 def each_named_page(&b) each_name(Names::PAGES, &b) end
Calls block for each named JavaScript
script.
# File lib/origami/actions.rb, line 34 def each_named_script(&b) each_name(Names::JAVASCRIPT, &b) end
Iterates over the objects of the document. compressed: iterates over the objects inside object streams. recursive: iterates recursively inside objects like arrays and dictionaries.
# File lib/origami/pdf.rb, line 278 def each_object(compressed: false, recursive: false, &block) return enum_for(__method__, compressed: compressed, recursive: recursive ) unless block_given? @revisions.each do |revision| revision.each_object do |object| block.call(object) walk_object(object, &block) if recursive if object.is_a?(ObjectStream) and compressed object.each do |child_obj| block.call(child_obj) walk_object(child_obj) if recursive end end end end end
Iterate through each page, returns self.
# File lib/origami/page.rb, line 80 def each_page(&b) init_page_tree self.Catalog.Pages.each_page(&b) end
Enable the document Usage Rights.
- rights
-
list of rights defined in
UsageRights::Rights
# File lib/origami/signature.rb, line 185 def enable_usage_rights(cert, pkey, *rights) # Always uses a detached PKCS7 signature for UR. method = Signature::PKCS7_DETACHED # # Load key pair # key = pkey.is_a?(OpenSSL::PKey::RSA) ? pkey : OpenSSL::PKey::RSA.new(pkey) certificate = cert.is_a?(OpenSSL::X509::Certificate) ? cert : OpenSSL::X509::Certificate.new(cert) # # Forge digital signature dictionary # digsig = Signature::DigitalSignature.new.set_indirect(true) self.Catalog.AcroForm ||= InteractiveForm.new #self.Catalog.AcroForm.SigFlags = InteractiveForm::SigFlags::APPEND_ONLY digsig.Type = :Sig digsig.Contents = HexaString.new("\x00" * Signature.required_size(method, certificate, key, [])) digsig.Filter = :"Adobe.PPKLite" digsig.Name = "ARE Acrobat Product v8.0 P23 0002337" digsig.SubFilter = Name.new(method ) digsig.ByteRange = [0, 0, 0, 0] sigref = Signature::Reference.new sigref.Type = :SigRef sigref.TransformMethod = :UR3 sigref.Data = self.Catalog sigref.TransformParams = UsageRights::TransformParams.new sigref.TransformParams.P = true sigref.TransformParams.Type = :TransformParams sigref.TransformParams.V = UsageRights::TransformParams::VERSION rights.each do |right| sigref.TransformParams[right.first] ||= [] sigref.TransformParams[right.first].concat(right[1..-1]) end digsig.Reference = [ sigref ] self.Catalog.Perms ||= Perms.new self.Catalog.Perms.UR3 = digsig # # Flattening the PDF to get file view. # compile # # Creating an empty Xref table to compute signature byte range. # rebuild_dummy_xrefs sig_offset = get_object_offset(digsig.no, digsig.generation) + digsig.signature_offset digsig.ByteRange[0] = 0 digsig.ByteRange[1] = sig_offset digsig.ByteRange[2] = sig_offset + digsig.Contents.size until digsig.ByteRange[3] == filesize - digsig.ByteRange[2] digsig.ByteRange[3] = filesize - digsig.ByteRange[2] end # From that point on, the file size remains constant # # Correct Xrefs variations caused by ByteRange modifications. # rebuild_xrefs file_data = output() signable_data = file_data[digsig.ByteRange[0],digsig.ByteRange[1]] + file_data[digsig.ByteRange[2],digsig.ByteRange[3]] signature = Signature.compute(method, signable_data, certificate, key, []) digsig.Contents[0, signature.size] = signature # # No more modification are allowed after signing. # self.freeze end
Encrypts the current document with the provided passwords. The document will be encrypted at writing-on-disk time.
- userpasswd
-
The user password.
- ownerpasswd
-
The owner password.
- options
-
A set of options to configure encryption.
# File lib/origami/encryption.rb, line 86 def encrypt(options = {}) raise EncryptionError, "PDF is already encrypted" if self.encrypted? # # Default encryption options. # params = { :user_passwd => '', :owner_passwd => '', :cipher => 'aes', # :RC4 or :AES :key_size => 128, # Key size in bits :hardened => false, # Use newer password validation (since Reader X) :encrypt_metadata => true, # Metadata shall be encrypted? :permissions => Encryption::Standard::Permissions::ALL # Document permissions }.update(options) # Get the cryptographic parameters. version, revision = crypto_revision_from_options(params) # Create the security handler. handler, encryption_key = create_security_handler(version, revision, params) # Turn this document into an EncryptedDocument instance. self.extend(Encryption::EncryptedDocument) self.encryption_handler = handler self.encryption_key = encryption_key self end
Returns whether the PDF
file is encrypted.
# File lib/origami/encryption.rb, line 42 def encrypted? trailer_key? :Encrypt end
Executes a JavaScript
script in the current document context.
# File lib/origami/javascript.rb, line 699 def eval_js(code) js_engine.exec(code) end
Returns an Array
of Acroform fields.
# File lib/origami/acroform.rb, line 61 def fields self.each_field.to_a end
Returns true if the document contains an acrobat form.
# File lib/origami/acroform.rb, line 28 def form? self.Catalog.key? :AcroForm end
Lookup destination in the destination name directory.
# File lib/origami/destinations.rb, line 27 def get_destination_by_name(name) resolve_name Names::DESTINATIONS, name end
Lookup embedded file in the embedded files name directory.
# File lib/origami/filespec.rb, line 67 def get_embedded_file_by_name(name) resolve_name Names::EMBEDDED_FILES, name end
Returns the corresponding named Field
.
# File lib/origami/acroform.rb, line 87 def get_field(name) self.each_field do |field| return field if field[:T].solve == name end nil end
Get the n-th Page
object.
# File lib/origami/page.rb, line 89 def get_page(n) init_page_tree self.Catalog.Pages.get_page(n) end
Lookup page in the page name directory.
# File lib/origami/page.rb, line 98 def get_page_by_name(name) resolve_name Names::PAGES, name end
Lookup script in the scripts name directory.
# File lib/origami/actions.rb, line 27 def get_script_by_name(name) resolve_name Names::JAVASCRIPT, name end
Similar to PDF#insert
or PDF#<<
, but for an object belonging to another document. Object
will be recursively copied and new version numbers will be assigned. Returns the new reference to the imported object.
- object
-
The object to import.
# File lib/origami/pdf.rb, line 337 def import(object) self.insert(object.export) end
Return an array of indirect objects.
# File lib/origami/pdf.rb, line 304 def indirect_objects @revisions.inject([]) do |set, rev| set.concat(rev.objects) end end
Inserts a page at position index into the document.
- index
-
Page
index (starting from zero). - page
-
The page to insert into the document. Creates a new one if none given.
Pass the Page
object if a block is present.
# File lib/origami/page.rb, line 55 def insert_page(index, page = Page.new) init_page_tree # Page from another document must be exported. page = page.export if page.document and page.document != self self.Catalog.Pages.insert_page(index, page) yield(page) if block_given? self end
Returns the JavaScript
engine (if JavaScript
support is present).
# File lib/origami/javascript.rb, line 706 def js_engine @js_engine ||= PDF::JavaScript::Engine.new(self) end
# File lib/origami/metadata.rb, line 43 def keywords; get_document_info_field(:Keywords) end
Returns whether the current document is linearized.
# File lib/origami/linearization.rb, line 31 def linearized? begin first_obj = @revisions.first.objects.min_by{|obj| obj.file_offset} rescue return false end @revisions.size > 1 and first_obj.is_a?(Dictionary) and first_obj.has_key? :Linearized end
Mark the document as complete. No more objects needs to be fetched by the parser.
# File lib/origami/pdf.rb, line 535 def loaded! @loaded = true end
Returns an array of Objects whose name (in a Dictionary
) is matching pattern.
# File lib/origami/pdf.rb, line 263 def ls(pattern, follow_references: true) pattern = /#{Regexp.escape(pattern)}/i if pattern.is_a?(::String) raise TypeError, "Expected a String or Regexp" unless pattern.is_a?(Regexp) self.grep(pattern, streams: false, object_streams: true) .select {|object| object.is_a?(Name) and object.parent.is_a?(Dictionary) and object.parent.key?(object) } .collect {|object| result = object.parent[object]; follow_references ? result.solve : result } end
Returns a Hash of the information found in the metadata stream
# File lib/origami/metadata.rb, line 59 def metadata metadata_stm = self.Catalog.Metadata if metadata_stm.is_a?(Stream) doc = REXML::Document.new(metadata_stm.data) info = {} doc.elements.each('*/*/rdf:Description') do |description| description.attributes.each_attribute do |attr| case attr.prefix when 'pdf','xap' info[attr.name] = attr.value end end description.elements.each('*') do |element| value = (element.elements['.//rdf:li'] || element).text info[element.name] = value.to_s end end info end end
Returns true if the document has a catalog metadata stream.
# File lib/origami/metadata.rb, line 52 def metadata? self.Catalog.Metadata.is_a?(Stream) end
# File lib/origami/metadata.rb, line 47 def mod_date; get_document_info_field(:ModDate) end
Returns a Hash of all names under the specified root name directory.
# File lib/origami/catalog.rb, line 150 def names(root) self.each_name(root).to_h end
Sets an action to run on document closing.
- action
# File lib/origami/catalog.rb, line 80 def onDocumentClose(action) unless action.is_a?(Action::JavaScript) or action.is_a?(Reference) raise TypeError, "An Action::JavaScript object must be passed." end unless self.Catalog raise InvalidPDFError, "A catalog object must exist to add this action." end self.Catalog.AA ||= CatalogAdditionalActions.new self.Catalog.AA.WC = action self end
Sets an action to run on document opening.
# File lib/origami/catalog.rb, line 62 def onDocumentOpen(action) unless action.is_a?(Action) or action.is_a?(Destination) or action.is_a?(Reference) raise TypeError, "An Action object must be passed." end unless self.Catalog raise InvalidPDFError, "A catalog object must exist to add this action." end self.Catalog.OpenAction = action self end
Sets an action to run on document printing.
- action
# File lib/origami/catalog.rb, line 99 def onDocumentPrint(action) unless action.is_a?(Action::JavaScript) or action.is_a?(Reference) raise TypeError, "An Action::JavaScript object must be passed." end unless self.Catalog raise InvalidPDFError, "A catalog object must exist to add this action." end self.Catalog.AA ||= CatalogAdditionalActions.new self.Catalog.AA.WP = action self end
Original data parsed to create this document, nil if created from scratch.
# File lib/origami/pdf.rb, line 185 def original_data @parser.target_data if @parser end
Original file name if parsed from disk, nil otherwise.
# File lib/origami/pdf.rb, line 171 def original_filename @parser.target_filename if @parser end
Original file size if parsed from a data stream, nil otherwise.
# File lib/origami/pdf.rb, line 178 def original_filesize @parser.target_filesize if @parser end
Returns an Enumerator of Page
# File lib/origami/page.rb, line 71 def pages init_page_tree self.Catalog.Pages.pages end
# File lib/origami/outputintents.rb, line 42 def pdfa1? self.Catalog.OutputIntents.is_a?(Array) and self.Catalog.OutputIntents.any?{|intent| intent.solve.S == OutputIntent::Intent::PDFA1 } and self.metadata? and ( doc = REXML::Document.new self.Catalog.Metadata.data; REXML::XPath.match(doc, "*/*/rdf:Description[@xmlns:pdfaid]").any? {|desc| desc.elements["pdfaid:conformance"].text == "A" and desc.elements["pdfaid:part"].text == "1" } ) end
Returns true if the document behaves as a portfolio for embedded files.
# File lib/origami/collections.rb, line 27 def portfolio? self.Catalog.Collection.is_a?(Dictionary) end
# File lib/origami/metadata.rb, line 45 def producer; get_document_info_field(:Producer) end
Registers an object into a specific Names
root dictionary.
- root
-
The root dictionary (see Names::Root)
- name
-
The value name.
- value
-
The value to associate with this name.
# File lib/origami/catalog.rb, line 120 def register(root, name, value) self.Catalog.Names ||= Names.new value.set_indirect(true) unless value.is_a?(Reference) namesroot = self.Catalog.Names[root] if namesroot.nil? names = NameTreeNode.new(:Names => []).set_indirect(true) self.Catalog.Names[root] = names names.Names << name << value else namesroot.solve[:Names] << name << value end end
Removes a whole document revision.
- index
-
Revision
index, first is 0.
# File lib/origami/pdf.rb, line 378 def remove_revision(index) if index < 0 or index > @revisions.size raise IndexError, "Not a valid revision index" end if @revisions.size == 1 raise InvalidPDFError, "Cannot remove last revision" end @revisions.delete_at(index) self end
Tries to strip any xrefs information off the document.
# File lib/origami/xreftable.rb, line 27 def remove_xrefs @revisions.reverse_each do |rev| if rev.has_xrefstm? delete_object(rev.xrefstm.reference) end if rev.trailer.XRefStm.is_a?(Integer) xrefstm = get_object_by_offset(rev.trailer.XRefStm) delete_object(xrefstm.reference) if xrefstm.is_a?(XRefStream) end rev.xrefstm = rev.xreftable = nil end end
Retrieve the corresponding value associated with name in the specified root name directory, or nil if the value does not exist.
# File lib/origami/catalog.rb, line 140 def resolve_name(root, name) namesroot = get_names_root(root) return nil if namesroot.nil? resolve_name_from_node(namesroot, name) end
Saves the current document.
- filename
-
The path where to save this
PDF
.
# File lib/origami/pdf.rb, line 193 def save(path, params = {}) options = { delinearize: true, recompile: true, decrypt: false } options.update(params) if self.frozen? # incompatible flags with frozen doc (signed) options[:recompile] = options[:rebuild_xrefs] = options[:noindent] = options[:obfuscate] = false end if path.respond_to?(:write) fd = path else path = File.expand_path(path) fd = File.open(path, 'w').binmode close = true end load_all_objects unless @loaded intents_as_pdfa1 if options[:intent] =~ /pdf[\/-]?A1?/i self.delinearize! if options[:delinearize] and self.linearized? compile(options) if options[:recompile] fd.write output(options) fd.close if close self end
Saves the file up to given revision number. This can be useful to visualize the modifications over different incremental updates.
- revision
-
The revision number to save.
- filename
-
The path where to save this
PDF
.
# File lib/origami/pdf.rb, line 236 def save_upto(revision, filename) save(filename, up_to_revision: revision) end
Sets PDF
extension level and version. Only supported values are “1.7” and 3.
# File lib/origami/catalog.rb, line 27 def set_extension_level(version, level) exts = (self.Catalog.Extensions ||= Extensions.new) exts[:ADBE] = DeveloperExtension.new exts[:ADBE].BaseVersion = Name.new(version) exts[:ADBE].ExtensionLevel = level self end
Sign the document with the given key and x509 certificate.
- certificate
-
The X509 certificate containing the public key.
- key
-
The private key associated with the certificate.
- method
-
The
PDF
signature identifier. - ca
-
Optional CA certificates used to sign the user certificate.
- annotation
-
Annotation
associated with the signature. - issuer
-
Issuer name.
- location
-
Signature
location. - contact
-
Signer contact.
- reason
-
Signing reason.
# File lib/origami/signature.rb, line 67 def sign(certificate, key, method: Signature::PKCS7_DETACHED, ca: [], annotation: nil, issuer: nil, location: nil, contact: nil, reason: nil) unless certificate.is_a?(OpenSSL::X509::Certificate) raise TypeError, "A OpenSSL::X509::Certificate object must be passed." end unless key.is_a?(OpenSSL::PKey::RSA) raise TypeError, "A OpenSSL::PKey::RSA object must be passed." end unless ca.is_a?(::Array) raise TypeError, "Expected an Array of CA certificate." end unless annotation.nil? or annotation.is_a?(Annotation::Widget::Signature) raise TypeError, "Expected a Annotation::Widget::Signature object." end digsig = Signature::DigitalSignature.new.set_indirect(true) if annotation.nil? annotation = Annotation::Widget::Signature.new annotation.Rect = Rectangle[:llx => 0.0, :lly => 0.0, :urx => 0.0, :ury => 0.0] end annotation.V = digsig add_fields(annotation) self.Catalog.AcroForm.SigFlags = InteractiveForm::SigFlags::SIGNATURES_EXIST | InteractiveForm::SigFlags::APPEND_ONLY digsig.Type = :Sig digsig.Contents = HexaString.new("\x00" * Signature::required_size(method, certificate, key, ca)) digsig.Filter = :"Adobe.PPKLite" digsig.SubFilter = Name.new(method) digsig.ByteRange = [0, 0, 0, 0] digsig.Name = issuer digsig.Location = HexaString.new(location) if location digsig.ContactInfo = HexaString.new(contact) if contact digsig.Reason = HexaString.new(reason) if reason # PKCS1 signatures require a Cert entry. if method == Signature::PKCS1_RSA_SHA1 digsig.Cert = if ca.empty? HexaString.new(certificate.to_der) else [ HexaString.new(certificate.to_der) ] + ca.map{ |crt| HexaString.new(crt.to_der) } end end # # Flattening the PDF to get file view. # compile # # Creating an empty Xref table to compute signature byte range. # rebuild_dummy_xrefs sig_offset = get_object_offset(digsig.no, digsig.generation) + digsig.signature_offset digsig.ByteRange[0] = 0 digsig.ByteRange[1] = sig_offset digsig.ByteRange[2] = sig_offset + digsig.Contents.to_s.bytesize until digsig.ByteRange[3] == filesize - digsig.ByteRange[2] digsig.ByteRange[3] = filesize - digsig.ByteRange[2] end # From that point on, the file size remains constant # # Correct Xrefs variations caused by ByteRange modifications. # rebuild_xrefs file_data = output() signable_data = file_data[digsig.ByteRange[0],digsig.ByteRange[1]] + file_data[digsig.ByteRange[2],digsig.ByteRange[3]] # # Computes and inserts the signature. # signature = Signature.compute(method, signable_data, certificate, key, ca) digsig.Contents[0, signature.size] = signature # # No more modification are allowed after signing. # self.freeze end
# File lib/origami/signature.rb, line 276 def signature raise SignatureError, "Not a signed document" unless self.signed? self.each_field do |field| return field.V if field.FT == :Sig and field.V.is_a?(Dictionary) end raise SignatureError, "Cannot find digital signature" end
Returns whether the document contains a digital signature.
# File lib/origami/signature.rb, line 171 def signed? begin self.Catalog.AcroForm.is_a?(Dictionary) and self.Catalog.AcroForm.SigFlags.is_a?(Integer) and (self.Catalog.AcroForm.SigFlags & InteractiveForm::SigFlags::SIGNATURES_EXIST != 0) rescue InvalidReferenceError false end end
# File lib/origami/metadata.rb, line 42 def subject; get_document_info_field(:Subject) end
# File lib/origami/metadata.rb, line 40 def title; get_document_info_field(:Title) end
Returns the current trailer. This might be either a Trailer
or XRefStream
.
# File lib/origami/trailer.rb, line 29 def trailer # # First look for a standard trailer dictionary # if @revisions.last.trailer.dictionary? trl = @revisions.last.trailer # # Otherwise look for a xref stream. # else trl = @revisions.last.xrefstm end raise InvalidPDFError, "No trailer found" if trl.nil? trl end
# File lib/origami/signature.rb, line 271 def usage_rights? not self.Catalog.Perms.nil? and (not self.Catalog.Perms.has_key?(:UR3) or not self.Catalog.Perms.has_key?(:UR)) end
Verify a document signature.
_:trusted_certs_: an array of trusted X509 certificates. If no argument is passed, embedded certificates are treated as trusted.
# File lib/origami/signature.rb, line 35 def verify(trusted_certs: []) digsig = self.signature digsig = digsig.cast_to(Signature::DigitalSignature) unless digsig.is_a?(Signature::DigitalSignature) unless digsig[:Contents].is_a?(String) raise SignatureError, "Invalid digital signature contents" end store = OpenSSL::X509::Store.new trusted_certs.each { |ca| store.add_cert(ca) } flags = 0 flags |= OpenSSL::PKCS7::NOVERIFY if trusted_certs.empty? data = extract_signed_data(digsig) signature = digsig[:Contents] subfilter = digsig.SubFilter.value Signature.verify(subfilter.to_s, data, signature, store, flags) end
# File lib/origami/xfa.rb, line 57 def xfa_form? self.form? and self.Catalog.AcroForm.key?(:XFA) end
Private Instance Methods
# File lib/origami/pdf.rb, line 759 def build_compound_object(object, revision, options) return unless object.is_a?(Dictionary) or object.is_a?(Array) # Flatten the object by adding indirect objects to the revision and # replacing them with their reference. object.map! do |child| next(child) unless child.indirect? if get_object(child.reference) child.reference else reference = add_to_revision(child, revision) build_object(child, revision, options) reference end end # Finalize all the children objects. object.each do |child| build_object(child, revision, options) end end
# File lib/origami/pdf.rb, line 739 def build_object(object, revision, options) # Build any compressed object before building the object stream. if object.is_a?(ObjectStream) object.each do |compressed_obj| build_object(compressed_obj, revision, options) end end object.pre_build case object when Stream build_object(object.dictionary, revision, options) when Dictionary, Array build_compound_object(object, revision, options) end object.post_build end
This method is meant to recompute, verify and correct main PDF
structures, in order to output a proper file.
-
Allocates objects references.
-
Sets some objects missing required values.
# File lib/origami/pdf.rb, line 700 def compile(options = {}) load_all_objects unless @loaded # # A valid document must have at least one page. # append_page if pages.empty? # # Allocates object numbers and creates references. # Invokes object finalization methods. # physicalize(options) # # Sets the PDF version header. # version, level = version_required @header.major_version = version[0,1].to_i @header.minor_version = version[2,1].to_i set_extension_level(version, level) if level > 0 self end
Installs the standard security dictionary, marking the document as being encrypted. Returns the handler and the encryption key used for protecting contents.
# File lib/origami/encryption.rb, line 123 def create_security_handler(version, revision, params) # Ensure the document has an ID. doc_id = (trailer_key(:ID) || generate_id).first # Create the standard encryption dictionary. handler = Encryption::Standard::Dictionary.new handler.Filter = :Standard handler.V = version handler.R = revision handler.Length = params[:key_size] handler.P = -1 # params[:Permissions] # Build the crypt filter dictionary. if revision >= 4 handler.EncryptMetadata = params[:encrypt_metadata] handler.CF = Dictionary.new crypt_filter = Encryption::CryptFilterDictionary.new crypt_filter.AuthEvent = :DocOpen if revision == 4 crypt_filter.CFM = :AESV2 else crypt_filter.CFM = :AESV3 end crypt_filter.Length = params[:key_size] >> 3 handler.CF[:StdCF] = crypt_filter handler.StmF = handler.StrF = :StdCF end user_passwd, owner_passwd = params[:user_passwd], params[:owner_passwd] # Setup keys. handler.set_passwords(owner_passwd, user_passwd, doc_id) encryption_key = handler.compute_user_encryption_key(user_passwd, doc_id) # Install the encryption dictionary to the document. self.trailer.Encrypt = self << handler [ handler, encryption_key ] end
Compute the required standard security handler version based on the AES key size.
- key_size
-
Key size in bits.
- hardened
-
Use the extension level 8 hardened derivation algorithm.
Returns [ version, revision ].
# File lib/origami/encryption.rb, line 207 def crypto_revision_from_aes_key(key_size, hardened) if key_size == 128 version = revision = 4 elsif key_size == 256 version = 5 if hardened revision = 6 else revision = 5 end else raise EncryptionError, "Invalid AES key length (Only 128 and 256 bits keys are supported)" end [ version, revision ] end
Converts the parameters passed to PDF#encrypt
. Returns [ version, revision, crypt_filters ]
# File lib/origami/encryption.rb, line 171 def crypto_revision_from_options(params) case params[:cipher].upcase when 'RC4' crypto_revision_from_rc4_key(params[:key_size]) when 'AES' crypto_revision_from_aes_key(params[:key_size], params[:hardened]) else raise EncryptionNotSupportedError, "Cipher not supported : #{params[:cipher]}" end end
Compute the required standard security handler version based on the RC4 key size.
- key_size
-
Key size in bits.
Returns [ version, revision ].
# File lib/origami/encryption.rb, line 187 def crypto_revision_from_rc4_key(key_size) raise EncryptionError, "Invalid RC4 key length" unless (40..128) === key_size and key_size % 8 == 0 if key_size > 40 version = 2 revision = 3 else version = 1 revision = 2 end [ version, revision ] end
Strip the document from Hint streams given a linearization dictionary.
# File lib/origami/linearization.rb, line 102 def delete_hint_streams(linearization_dict) hints = linearization_dict[:H] return unless hints.is_a?(Array) hints.each_slice(2) do |offset, _length| next unless offset.is_a?(Integer) stream = get_object_by_offset(offset) delete_object(stream.reference) if stream.is_a?(Stream) end end
Verifies the ByteRange
field of a digital signature and returned the signed data.
# File lib/origami/signature.rb, line 291 def extract_signed_data(digsig) # Computes the boundaries of the Contents field. start_sig = digsig[:Contents].file_offset stream = StringScanner.new(self.original_data) stream.pos = digsig[:Contents].file_offset Object.typeof(stream).parse(stream) end_sig = stream.pos stream.terminate r1, r2 = digsig.ranges if r1.begin != 0 or r2.end != self.original_data.size or r1.end != start_sig or r2.begin != end_sig raise SignatureError, "Invalid signature byte range" end self.original_data[r1] + self.original_data[r2] end
# File lib/origami/trailer.rb, line 70 def generate_id id = HexaString.new Random.new.bytes 16 self.trailer.ID = [ id, id ] end
Instanciates basic structures required for a valid PDF
file.
# File lib/origami/pdf.rb, line 969 def init catalog = (self.Catalog = (trailer_key(:Root) || Catalog.new)) @revisions.last.trailer.Root = catalog.reference @loaded = true self end
# File lib/origami/outputintents.rb, line 58 def intents_as_pdfa1 return if self.pdfa1? self.Catalog.OutputIntents ||= [] self.Catalog.OutputIntents << self.insert( OutputIntent.new( :Type => :OutputIntent, :S => OutputIntent::Intent::PDFA1, :OutputConditionIdentifier => "RGB" ) ) metadata = self.create_metadata doc = REXML::Document.new(metadata.data) desc = REXML::Element.new 'rdf:Description' desc.add_attribute 'rdf:about', '' desc.add_attribute 'xmlns:pdfaid', 'http://www.aiim.org/pdfa/ns/id/' desc.add REXML::Element.new('pdfaid:conformance').add_text('A') desc.add REXML::Element.new('pdfaid:part').add_text('1') doc.elements["*/rdf:RDF"].add desc xml = ""; doc.write(xml, 3) metadata.data = xml end
Force the loading of all objects in the document.
# File lib/origami/pdf.rb, line 650 def load_all_objects return if @loaded or @parser.nil? @revisions.each do |revision| if revision.has_xreftable? xrefs = revision.xreftable elsif revision.has_xrefstm? xrefs = revision.xrefstm else next end xrefs.each_with_number do |xref, no| self.get_object(no) unless xref.free? end end @loaded = true end
Load an object from its given file offset. The document must have an associated Parser
.
# File lib/origami/pdf.rb, line 611 def load_object_at_offset(revision, offset) return nil if @loaded or @parser.nil? pos = @parser.pos begin object = @parser.parse_object(offset) return nil if object.nil? if self.is_a?(Encryption::EncryptedDocument) make_encrypted_object(object) end add_to_revision(object, revision) ensure @parser.pos = pos end object end
Method called on encrypted objects loaded into the document.
# File lib/origami/pdf.rb, line 634 def make_encrypted_object(object) case object when String object.extend(Encryption::EncryptedString) when Stream object.extend(Encryption::EncryptedStream) when Dictionary, Array object.strings_cache.each do |string| string.extend(Encryption::EncryptedString) end end end
Returns the final binary representation of the current document.
# File lib/origami/pdf.rb, line 785 def output(params = {}) has_objstm = self.indirect_objects.any?{|obj| obj.is_a?(ObjectStream)} options = { rebuild_xrefs: true, noindent: false, obfuscate: false, use_xrefstm: has_objstm, use_xreftable: (not has_objstm), up_to_revision: @revisions.size } options.update(params) options[:up_to_revision] = @revisions.size if options[:up_to_revision] > @revisions.size # Reset to default params if no xrefs are chosen (hybrid files not supported yet) if options[:use_xrefstm] == options[:use_xreftable] options[:use_xrefstm] = has_objstm options[:use_xreftable] = (not has_objstm) end # Get trailer dictionary trailer_dict = self.trailer.dictionary prev_xref_offset = nil xrefstm_offset = nil # Header bin = "" bin << @header.to_s # For each revision @revisions[0, options[:up_to_revision]].each do |rev| # Create xref table/stream. if options[:rebuild_xrefs] == true lastno_table, lastno_stm = 0, 0 brange_table, brange_stm = 0, 0 xrefs_stm = [ XRef.new(0, 0, XRef::FREE) ] xrefs_table = [ XRef.new(0, XRef::FIRSTFREE, XRef::FREE) ] if options[:use_xreftable] == true xrefsection = XRef::Section.new end if options[:use_xrefstm] == true xrefstm = rev.xrefstm || XRefStream.new if xrefstm == rev.xrefstm xrefstm.clear else add_to_revision(xrefstm, rev) end end end objset = rev.objects objset.find_all{|obj| obj.is_a?(ObjectStream)}.each do |objstm| objset.concat objstm.objects end if options[:rebuild_xrefs] == true and options[:use_xrefstm] == true previous_obj = nil # For each object, in number order # Move any XRefStream to the end of the revision. objset.sort_by {|obj| [obj.is_a?(XRefStream) ? 1 : 0, obj.no, obj.generation] } .each do |obj| # Ensures that every object has a unique reference number. # Duplicates should never happen in a well-formed revision and will cause breakage of xrefs. if previous_obj and previous_obj.reference == obj.reference raise InvalidPDFError, "Duplicate object detected, reference #{obj.reference}" else previous_obj = obj end # Create xref entry. if options[:rebuild_xrefs] == true # Adding subsections if needed if options[:use_xreftable] and (obj.no - lastno_table).abs > 1 xrefsection << XRef::Subsection.new(brange_table, xrefs_table) xrefs_table.clear brange_table = obj.no end if options[:use_xrefstm] and (obj.no - lastno_stm).abs > 1 xrefs_stm.each do |xref| xrefstm << xref end xrefstm.Index ||= [] xrefstm.Index << brange_stm << xrefs_stm.length xrefs_stm.clear brange_stm = obj.no end # Process embedded objects if options[:use_xrefstm] and obj.parent != obj and obj.parent.is_a?(ObjectStream) index = obj.parent.index(obj.no) xrefs_stm << XRefToCompressedObject.new(obj.parent.no, index) lastno_stm = obj.no else xrefs_stm << XRef.new(bin.size, obj.generation, XRef::USED) xrefs_table << XRef.new(bin.size, obj.generation, XRef::USED) lastno_table = lastno_stm = obj.no end end if obj.parent == obj or not obj.parent.is_a?(ObjectStream) # Finalize XRefStm if options[:rebuild_xrefs] == true and options[:use_xrefstm] == true and obj == xrefstm xrefstm_offset = bin.size xrefs_stm.each do |xref| xrefstm << xref end xrefstm.W = [ 1, (xrefstm_offset.to_s(2).size + 7) >> 3, 2 ] if xrefstm.DecodeParms.is_a?(Dictionary) and xrefstm.DecodeParms.has_key?(:Columns) xrefstm.DecodeParms[:Columns] = xrefstm.W[0] + xrefstm.W[1] + xrefstm.W[2] end xrefstm.Index ||= [] xrefstm.Index << brange_stm << xrefs_stm.size xrefstm.dictionary = xrefstm.dictionary.merge(trailer_dict) xrefstm.Prev = prev_xref_offset rev.trailer.dictionary = nil add_to_revision(xrefstm, rev) xrefstm.pre_build xrefstm.post_build end # Output object code if (obj.is_a?(Dictionary) or obj.is_a?(Stream)) and options[:noindent] bin << obj.to_s(indent: 0) else bin << obj.to_s end end end # end each object rev.trailer ||= Trailer.new # XRef table if options[:rebuild_xrefs] == true if options[:use_xreftable] == true table_offset = bin.size xrefsection << XRef::Subsection.new(brange_table, xrefs_table) rev.xreftable = xrefsection rev.trailer.dictionary = trailer_dict rev.trailer.Size = objset.size + 1 rev.trailer.Prev = prev_xref_offset rev.trailer.XRefStm = xrefstm_offset if options[:use_xrefstm] == true end startxref = options[:use_xreftable] == true ? table_offset : xrefstm_offset rev.trailer.startxref = prev_xref_offset = startxref end # Trailer bin << rev.xreftable.to_s if options[:use_xreftable] == true bin << (options[:obfuscate] == true ? rev.trailer.to_obfuscated_str : rev.trailer.to_s) end # end each revision bin end
Converts a logical PDF
view into a physical view ready for writing.
# File lib/origami/pdf.rb, line 730 def physicalize(options = {}) indirect_objects_by_rev.each do |obj, revision| build_object(obj, revision, options) end self end
Compute and update XRef::Section
for each Revision
.
# File lib/origami/pdf.rb, line 1006 def rebuild_dummy_xrefs #:nodoc build_dummy_xrefs = -> (objects) do lastno = 0 brange = 0 xrefs = [ XRef.new(0, XRef::FIRSTFREE, XRef::FREE) ] xrefsection = XRef::Section.new objects.sort.each do |object| if (object.no - lastno).abs > 1 xrefsection << XRef::Subsection.new(brange, xrefs) brange = object.no xrefs.clear end xrefs << XRef.new(0, 0, XRef::FREE) lastno = object.no end xrefsection << XRef::Subsection.new(brange, xrefs) xrefsection end size = 0 startxref = @header.to_s.size @revisions.each do |revision| revision.objects.each do |object| startxref += object.to_s.size end size += revision.body.size revision.xreftable = build_dummy_xrefs.call(revision.objects) revision.trailer ||= Trailer.new revision.trailer.Size = size + 1 revision.trailer.startxref = startxref startxref += revision.xreftable.to_s.size + revision.trailer.to_s.size end self end
Compute and update XRef::Section
for each Revision
.
# File lib/origami/pdf.rb, line 673 def rebuild_xrefs size = 0 startxref = @header.to_s.size @revisions.each do |revision| revision.objects.each do |object| startxref += object.to_s.size end size += revision.body.size revision.xreftable = build_xrefs(revision.objects) revision.trailer ||= Trailer.new revision.trailer.Size = size + 1 revision.trailer.startxref = startxref startxref += revision.xreftable.to_s.size + revision.trailer.to_s.size end self end
Searches through an object, possibly going into object streams. Returns an array of matching strings, names and streams.
# File lib/origami/pdf.rb, line 575 def search_object(object, pattern, streams: true, object_streams: true) result = [] case object when Stream result.concat object.dictionary.strings_cache.select{|str| pattern === str} result.concat object.dictionary.names_cache.select{|name| pattern === name.value} begin result.push object if streams and object.data.match(pattern) rescue Filter::Error return result # Skip object if a decoding error occured. end return result unless object.is_a?(ObjectStream) and object_streams object.each do |child| result.concat search_object(child, pattern, streams: streams, object_streams: object_streams) end when Name, String result.push object if object.value.match(pattern) when Dictionary, Array result.concat object.strings_cache.select{|str| pattern === str} result.concat object.names_cache.select{|name| pattern === name.value} end result end
Iterates over the children of an object, avoiding cycles.
# File lib/origami/pdf.rb, line 546 def walk_object(object, excludes: [], &block) return enum_for(__method__, object, excludes: excludes) unless block_given? return if excludes.include?(object) excludes.push(object) case object when Dictionary object.each_value do |value| yield(value) walk_object(value, excludes: excludes, &block) end when Array object.each do |child| yield(child) walk_object(child, excludes: excludes, &block) end when Stream yield(object.dictionary) walk_object(object.dictionary, excludes: excludes, &block) end end