class HexaPDF::CLI::Command

Base class for all hexapdf commands. It provides utility methods needed by the individual commands.

Constants

PAGE_MAP

Protected Instance Methods

apply_encryption_options(doc) click to toggle source

Applies the encryption related options to the given HexaPDF::Document instance.

See: define_encryption_options

# File lib/hexapdf/cli/command.rb, line 279
def apply_encryption_options(doc)
  case @out_options.encryption
  when :add
    doc.encrypt(algorithm: @out_options.enc_algorithm,
                key_length: @out_options.enc_key_length,
                force_v4: @out_options.enc_force_v4,
                permissions: @out_options.enc_permissions,
                owner_password: @out_options.enc_owner_pwd,
                user_password: @out_options.enc_user_pwd)
  when :remove
    doc.encrypt(name: nil)
  end
end
apply_optimization_options(doc) click to toggle source

Applies the optimization options to the given HexaPDF::Document instance.

See: define_optimization_options

# File lib/hexapdf/cli/command.rb, line 234
def apply_optimization_options(doc)
  doc.task(:optimize, compact: @out_options.compact,
           object_streams: @out_options.object_streams,
           xref_streams: @out_options.xref_streams,
           compress_pages: @out_options.compress_pages)
  if @out_options.streams != :preserve || @out_options.optimize_fonts
    doc.each(only_current: false) do |obj|
      optimize_stream(obj)
      optimize_font(obj)
    end
  end
end
define_encryption_options() click to toggle source

Defines the encryption options.

See: out_options, apply_encryption_options

# File lib/hexapdf/cli/command.rb, line 180
def define_encryption_options
  options.separator("")
  options.separator("Encryption options:")
  options.on("--decrypt", "Remove any encryption") do
    @out_options.encryption = :remove
  end
  options.on("--encrypt", "Encrypt the output file") do
    @out_options.encryption = :add
  end
  options.on("--owner-password PASSWORD", String, "The owner password to be set on the " \
             "output file (use - for reading from standard input)") do |pwd|
    @out_options.encryption = :add
    @out_options.enc_owner_pwd = (pwd == '-' ? read_password("Owner password") : pwd)
  end
  options.on("--user-password PASSWORD", String, "The user password to be set on the " \
             "output file (use - for reading from standard input)") do |pwd|
    @out_options.encryption = :add
    @out_options.enc_user_pwd = (pwd == '-' ? read_password("User password") : pwd)
  end
  options.on("--algorithm ALGORITHM", [:aes, :arc4],
             "The encryption algorithm: aes or arc4 (default: " \
               "#{@out_options.enc_algorithm})") do |a|
    @out_options.encryption = :add
    @out_options.enc_algorithm = a
  end
  options.on("--key-length BITS", Integer,
             "The encryption key length in bits (default: " \
               "#{@out_options.enc_key_length})") do |i|
    @out_options.encryption = :add
    @out_options.enc_key_length = i
  end
  options.on("--force-V4",
             "Force use of encryption version 4 if key length=128 and algorithm=arc4") do
    @out_options.encryption = :add
    @out_options.enc_force_v4 = true
  end
  syms = HexaPDF::Encryption::StandardSecurityHandler::Permissions::SYMBOL_TO_PERMISSION.keys
  options.on("--permissions PERMS", Array,
             "Comma separated list of permissions to be set on the output file. Possible " \
               "values: #{syms.join(', ')}") do |perms|
    perms.map! do |perm|
      unless syms.include?(perm.to_sym)
        raise OptionParser::InvalidArgument, "#{perm} (invalid permission name)"
      end
      perm.to_sym
    end
    @out_options.encryption = :add
    @out_options.enc_permissions = perms
  end
end
define_optimization_options() click to toggle source

Defines the optimization options.

See: out_options, apply_optimization_options

# File lib/hexapdf/cli/command.rb, line 145
def define_optimization_options
  options.separator("")
  options.separator("Optimization options:")
  options.on("--[no-]compact", "Delete unnecessary PDF objects (default: " \
             "#{@out_options.compact})") do |c|
    @out_options.compact = c
  end
  options.on("--object-streams MODE", [:generate, :preserve, :delete],
             "Handling of object streams (either generate, preserve or delete; " \
               "default: #{@out_options.object_streams})") do |os|
    @out_options.object_streams = os
  end
  options.on("--xref-streams MODE", [:generate, :preserve, :delete],
             "Handling of cross-reference streams (either generate, preserve or delete; " \
               "default: #{@out_options.xref_streams})") do |x|
    @out_options.xref_streams = x
  end
  options.on("--streams MODE", [:compress, :preserve, :uncompress],
             "Handling of stream data (either compress, preserve or uncompress; default: " \
               "#{@out_options.streams})") do |streams|
    @out_options.streams = streams
  end
  options.on("--[no-]compress-pages", "Recompress page content streams (may take a long " \
             "time; default: #{@out_options.compress_pages})") do |c|
    @out_options.compress_pages = c
  end
  options.on("--[no-]optimize-fonts", "Optimize embedded font files; " \
             "default: #{@out_options.optimize_fonts})") do |o|
    @out_options.optimize_fonts = o
  end
end
maybe_raise_on_existing_file(filename) click to toggle source

Checks whether the given output file exists and raises an error if it does and HexaPDF::CLI#force is not set.

# File lib/hexapdf/cli/command.rb, line 135
def maybe_raise_on_existing_file(filename)
  if !command_parser.force && File.exist?(filename)
    raise "Output file '#{filename}' already exists, not overwriting. Use --force to " \
      "force writing"
  end
end
optimize_font(obj) click to toggle source

Optimize the object if it is a font object.

# File lib/hexapdf/cli/command.rb, line 260
def optimize_font(obj)
  return unless @out_options.optimize_fonts && obj.kind_of?(HexaPDF::Type::Font) &&
    (obj[:Subtype] == :TrueType ||
     (obj[:Subtype] == :Type0 && obj.descendant_font[:Subtype] == :CIDFontType2)) &&
    obj.embedded?

  font = HexaPDF::Font::TrueType::Font.new(StringIO.new(obj.font_file.stream))
  data = HexaPDF::Font::TrueType::Optimizer.build_for_pdf(font)
  obj.font_file.stream = data
  obj.font_file[:Length1] = data.size
rescue StandardError => e
  if command_parser.verbosity_info?
    $stderr.puts "Error optimizing font object (#{obj.oid},#{obj.gen}): #{e.message}"
  end
end
optimize_stream(obj) click to toggle source

Applies the chosen stream mode to the given object.

# File lib/hexapdf/cli/command.rb, line 252
def optimize_stream(obj)
  return if @out_options.streams == :preserve || !obj.respond_to?(:set_filter) ||
    Array(obj[:Filter]).any? {|f| IGNORED_FILTERS[f] }

  obj.set_filter(@out_options.streams == :compress ? :FlateDecode : nil)
end
parse_pages_specification(range, count) click to toggle source

Parses the pages specification string and returns an array of tuples containing a page number and a rotation value (either -90, 90, 180, :none or nil where an integer means adding a rotation by that number of degrees, :none means removing any set rotation value and nil means preserving the set rotation value).

The parameter count needs to be the total number of pages in the document.

For details on the pages specification see the hexapdf(1) manual page.

# File lib/hexapdf/cli/command.rb, line 313
def parse_pages_specification(range, count)
  range.split(',').each_with_object([]) do |str, arr|
    case str
    when /\A#{PAGE_NUMBER_SPEC}(l|r|d|n)?\z/o
      page_num = PAGE_MAP[$1, count]
      next if page_num > count
      arr << [page_num - 1, ROTATE_MAP[$2]]
    when /\A#{PAGE_NUMBER_SPEC}-#{PAGE_NUMBER_SPEC}(?:\/([1-9]\d*))?(l|r|d|n)?\z/o
      start_nr = [PAGE_MAP[$1, count], count].min - 1
      end_nr = [PAGE_MAP[$2, count], count].min - 1
      step = ($3 ? $3.to_i : 1) * (start_nr > end_nr ? -1 : 1)
      rotation = ROTATE_MAP[$4]
      start_nr.step(to: end_nr, by: step) {|n| arr << [n, rotation] }
    else
      raise OptionParser::InvalidArgument, "invalid page range format: #{str}"
    end
  end
end
pdf_options(password) click to toggle source

Returns a hash with HexaPDF::Document options based on the given password and the option switches.

# File lib/hexapdf/cli/command.rb, line 99
def pdf_options(password)
  hash = {decryption_opts: {password: password}, config: {}}
  HexaPDF::GlobalConfiguration['filter.predictor.strict'] = command_parser.strict
  hash[:config]['parser.try_xref_reconstruction'] = !command_parser.strict
  hash[:config]['parser.on_correctable_error'] =
    if command_parser.strict
      proc { true }
    else
      proc do |_, msg, pos|
        if command_parser.verbosity_info?
          msg = MalformedPDFError.new(msg, pos: pos).message
          $stderr.puts "Corrected parsing problem: #{msg}"
        end
        false
      end
    end
  hash
end
read_password(prompt = "Password") click to toggle source

Reads a password from the standard input and falls back to the console if needed.

The optional argument prompt can be used to customize the prompt when reading from the console.

# File lib/hexapdf/cli/command.rb, line 336
def read_password(prompt = "Password")
  if $stdin.tty?
    read_from_console(prompt)
  else
    ($stdin.gets || read_from_console(prompt)).chomp
  end
end
remove_unused_pages(doc) click to toggle source

Removes unused pages and page tree nodes from the document.

# File lib/hexapdf/cli/command.rb, line 345
def remove_unused_pages(doc)
  retained = doc.pages.each_with_object({}) {|page, h| h[page.data] = true }
  retained[doc.pages.root.data] = true
  doc.each(only_current: false) do |obj|
    next unless obj.kind_of?(HexaPDF::Dictionary)
    if (obj.type == :Pages || obj.type == :Page) && !retained.key?(obj.data)
      doc.delete(obj)
    end
  end
end
with_document(file, password: nil, out_file: nil, incremental: false) { |document| ... } click to toggle source

Creates a HexaPDF::Document instance for the PDF file and yields it.

If out_file is given, the document is written to it after yielding.

# File lib/hexapdf/cli/command.rb, line 82
def with_document(file, password: nil, out_file: nil, incremental: false) #:yield: document
  if file == out_file
    doc = HexaPDF::Document.open(file, **pdf_options(password))
  else
    file_io = File.open(file, 'rb')
    doc = HexaPDF::Document.new(io: file_io, **pdf_options(password))
  end

  yield(doc)

  write_document(doc, out_file, incremental: incremental)
ensure
  file_io&.close
end
write_document(doc, out_file, incremental: false) click to toggle source

Writes the document to the given file or does nothing if out_file is nil.

# File lib/hexapdf/cli/command.rb, line 119
def write_document(doc, out_file, incremental: false)
  if out_file
    doc.validate(auto_correct: true) do |msg, correctable, object|
      if command_parser.strict && !correctable
        raise "Validation error for object (#{object.oid},#{object.gen}): #{msg}"
      elsif command_parser.verbosity_info?
        $stderr.puts "#{correctable ? 'Corrected' : 'Ignored'} validation problem " \
          "for object (#{object.oid},#{object.gen}): #{msg}"
      end
    end
    doc.write(out_file, validate: false, incremental: incremental)
  end
end

Private Instance Methods

read_from_console(prompt) click to toggle source

Displays the given prompt, reads from the console without echo and returns the read string.

# File lib/hexapdf/cli/command.rb, line 359
def read_from_console(prompt)
  IO.console.write("#{prompt}: ")
  str = IO.console.noecho {|io| io.gets.chomp }
  puts
  str
end