class HexaPDF::CLI::Files

Lists or extracts embedded files from a PDF file.

See: HexaPDF::Type::EmbeddedFile

Private Instance Methods

each_file(doc) { |obj, index| ... } click to toggle source

Iterates over all embedded files.

# File lib/hexapdf/cli/files.rb, line 119
def each_file(doc, &block) # :yields: obj, index
  doc.files.each(search: @search).select(&:embedded_file?).each_with_index(&block)
end
extract_files(doc) click to toggle source

Extracts the files with the given indices.

# File lib/hexapdf/cli/files.rb, line 104
def extract_files(doc)
  each_file(doc) do |obj, index|
    next unless @indices.include?(index + 1) || @indices.include?(0)
    maybe_raise_on_existing_file(obj.path)
    puts "Extracting #{obj.path}..." if command_parser.verbosity_info?
    File.open(obj.path, 'wb') do |file|
      fiber = obj.embedded_file_stream.stream_decoder
      while fiber.alive? && (data = fiber.resume)
        file << data
      end
    end
  end
end
list_files(doc) click to toggle source

Outputs the list of files embedded in the given PDF document.

# File lib/hexapdf/cli/files.rb, line 86
def list_files(doc)
  each_file(doc) do |obj, index|
    $stdout.write(sprintf("%4i: %s", index + 1, obj.path))
    ef_stream = obj.embedded_file_stream
    if (params = ef_stream[:Params]) && !params.empty?
      data = []
      data << "size: #{params[:Size]}" if params.key?(:Size)
      data << "md5: #{params[:CheckSum].unpack1('H*')}" if params.key?(:CheckSum)
      data << "ctime: #{params[:CreationDate]}" if params.key?(:CreationDate)
      data << "mtime: #{params[:ModDate]}" if params.key?(:ModDate)
      $stdout.write(" (#{data.join(', ')})")
    end
    $stdout.puts
    $stdout.puts("      #{obj[:Desc]}") if obj[:Desc] && !obj[:Desc].empty?
  end
end