class PDF::Info

Constants

VERSION

Public Class Methods

command_path() click to toggle source
# File lib/pdf/info.rb, line 12
def self.command_path
  @@command_path
end
command_path=(path) click to toggle source
# File lib/pdf/info.rb, line 8
def self.command_path=(path)
  @@command_path = path
end
new(pdf_path) click to toggle source
# File lib/pdf/info.rb, line 16
def initialize(pdf_path)
  @pdf_path = pdf_path
end

Public Instance Methods

command() click to toggle source
# File lib/pdf/info.rb, line 20
def command
  output = `#{self.class.command_path} -enc UTF-8 -f 1 -l -1 "#{@pdf_path}" 2> /dev/null`
  exit_code = $?
  case exit_code
  when 0 || nil
    if !output.valid_encoding?
      # It's already UTF-8, so we need to convert to UTF-16 and back to
      # force the bad characters to be replaced.
      output.encode!('UTF-16', :undef => :replace, :invalid => :replace, :replace => "")
      output.encode!('UTF-8')
    end
    return output
  else
    exit_error = PDF::Info::UnexpectedExitError.new
    exit_error.exit_code = exit_code
    raise exit_error
  end
end
metadata() click to toggle source
# File lib/pdf/info.rb, line 39
def metadata
  begin
    process_output(command)
  rescue UnexpectedExitError => e
    case e.exit_code
    when 1
      raise FileError
    when 2
      raise OutputError
    when 3
      raise BadPermissionsError
    else
      raise UnknownError
    end
  end
end
process_output(output) click to toggle source
# File lib/pdf/info.rb, line 56
def process_output(output)
  rows = output.split("\n")
  metadata = {}
  rows.each do |row|
    pair = row.split(':', 2)
    pair.map!(&:strip)

    case pair.first
    when "Pages"
      metadata[:page_count] = pair.last.to_i
    when "Encrypted"
      metadata[:encrypted] = pair.last == 'yes'
    when "Optimized"
      metadata[:optimized] = pair.last == 'yes'
    when "Tagged"
      metadata[:tagged] = pair.last == 'yes'
    when "PDF version"
      metadata[:version] = pair.last.to_f
    when "CreationDate"
      creation_date = parse_datetime(pair.last)
      metadata[:creation_date] = creation_date if creation_date
    when "ModDate"
      modification_date = parse_datetime(pair.last)
      metadata[:modification_date] = modification_date if modification_date
    when /^Page.*size$/
      metadata[:pages] ||= []
      metadata[:pages] << pair.last.scan(/[\d.]+/).map(&:to_f)
      metadata[:format] = pair.last.scan(/.*\(\w+\)$/).to_s
    when String
      metadata[pair.first.downcase.tr(" ", "_").to_sym] = pair.last.to_s.strip
    end
  end

  metadata
end

Private Instance Methods

parse_datetime(value) click to toggle source
# File lib/pdf/info.rb, line 94
def parse_datetime(value)
  DateTime.parse(value)
rescue
  begin
    DateTime.strptime(value, '%m/%d/%Y %k:%M:%S')
  rescue
    nil
  end
end