class HexaPDF::CLI::Images

Lists or extracts images from a PDF file.

See: HexaPDF::Type::Image

Private Instance Methods

each_image(doc) { |obj, index, page_index| ... } click to toggle source

Iterates over all images.

# File lib/hexapdf/cli/images.rb, line 164
def each_image(doc) # :yields: obj, index, page_index
  index = 1
  seen = {}

  doc.pages.each_with_index do |page, pindex|
    image_names = []
    xobjects = page.resources[:XObject]

    xobjects&.each&.map do |name, xobject|
      image_names << name if xobject[:Subtype] == :Image && !xobject[:ImageMask]
    end

    processor = ImageLocationProcessor.new(image_names, page[:UserUnit] || 1)
    page.process_contents(processor)
    processor.result.each do |name, ppi|
      xobject = xobjects[name]
      if seen[xobject]
        yield(xobject, seen[xobject], pindex + 1, ppi)
      else
        yield(xobject, index, pindex + 1, ppi)
        seen[xobject] = index
        index += 1
      end
    end
  end

  if @search
    doc.images.each do |image|
      next if seen[image]
      yield(image, index, nil, nil)
      index += 1
    end
  end
end
extract_images(doc) click to toggle source

Extracts the images with the given indices.

# File lib/hexapdf/cli/images.rb, line 146
def extract_images(doc)
  done = Set.new
  each_image(doc) do |image, index, _|
    next unless (@indices.include?(index) || @indices.include?(0)) && !done.include?(index)
    info = image.info
    if info.writable
      path = "#{@prefix}-#{index}.#{image.info.extension}"
      maybe_raise_on_existing_file(path)
      puts "Extracting #{path}..." if command_parser.verbosity_info?
      image.write(path)
      done << index
    elsif command_parser.verbosity_warning?
      $stderr.puts "Warning (image #{index}): PDF image format not supported for writing"
    end
  end
end
human_readable_file_size(size) click to toggle source

Returns the human readable file size.

# File lib/hexapdf/cli/images.rb, line 200
def human_readable_file_size(size)
  case size
  when 0..9999 then "#{size}B"
  when 10_000..999_999 then "#{(size / 1024.to_f).round(1)}K"
  else "#{(size.to_f / 1024 / 1024).round(1)}M"
  end
end
list_images(doc) click to toggle source

Outputs a table with the images of the PDF document.

# File lib/hexapdf/cli/images.rb, line 130
def list_images(doc)
  printf("%5s %5s %9s %6s %6s %5s %4s %3s %5s %5s %6s %5s %8s\n",
         "index", "page", "oid", "width", "height", "color", "comp", "bpc",
         "x-ppi", "y-ppi", "size", "type", "writable")
  puts("-" * 77)
  each_image(doc) do |image, index, pindex, (x_ppi, y_ppi)|
    info = image.info
    size = human_readable_file_size(image[:Length] + image[:SMask]&.[](:Length).to_i)
    printf("%5i %5s %9s %6i %6i %5s %4i %3i %5s %5s %6s %5s %8s\n",
           index, pindex || '-', "#{image.oid},#{image.gen}", info.width, info.height,
           info.color_space, info.components, info.bits_per_component, x_ppi, y_ppi,
           size, info.type, info.writable)
  end
end