class Podoff::Document

Attributes

additions[R]
encoding[R]
obj_counters[R]
objs[R]
root[R]
scanner[R]
version[R]
xref[R]

Public Class Methods

load(path, encoding) click to toggle source
# File lib/podoff.rb, line 23
def self.load(path, encoding)

  Podoff::Document.new(
    File.open(path, 'rb:' + encoding) { |f| f.read },
    encoding
  )
end
new(s, encoding) click to toggle source
# File lib/podoff.rb, line 47
def initialize(s, encoding)

  fail ArgumentError.new('not a PDF file') \
    unless s.match(/\A%PDF-\d+\.\d+\s/)

  @encoding = encoding

  @scanner = ::StringScanner.new(s)
  @version = nil
  @xref = nil
  @objs = {}
  @obj_counters = {}
  @root = nil

  @additions = {}

  @version = @scanner.scan(/%PDF-\d+\.\d+/)

  loop do

    @scanner.skip_until(
      /(startxref\s+\d+|\d+\s+\d+\s+obj|\/Root\s+\d+\s+\d+\s+R)/)

    m = @scanner.matched
    break unless m

    if m[0] == 's'
      @xref = m.split(' ').last.to_i
    elsif m[0] == '/'
      @root = extract_ref(m)
    else
      obj = Podoff::Obj.extract(self)
      @objs[obj.ref] = obj
      @obj_counters[obj.ref] = (@obj_counters[obj.ref] || 0) + 1
    end
  end

  if @root == nil
    @scanner.pos = 0
    loop do
      @scanner.skip_until(/\/Root\s+\d+\s+\d+\s+R/)
      break unless @scanner.matched
      @root = extract_ref(@scanner.matched)
    end
  end
end
parse(s) click to toggle source
# File lib/podoff.rb, line 31
def self.parse(s)

  Podoff::Document.new(s)
end

Public Instance Methods

add(obj) click to toggle source
# File lib/podoff.rb, line 160
def add(obj)

  @objs[obj.ref] = obj
  @additions[obj.ref] = obj

  obj
end
add_base_font(name) click to toggle source
# File lib/podoff.rb, line 168
def add_base_font(name)

  name = name[1..-1] if name[0] == '/'

  r = new_ref
  s = "#{r} obj <</Type /Font /Subtype /Type1 /BaseFont /#{name}>> endobj"

  add(Obj.new(self, r, source: s))
end
add_stream(src=nil, &block) click to toggle source
# File lib/podoff.rb, line 178
def add_stream(src=nil, &block)

  ref = new_ref

  src =
    src &&
    [
      "#{ref} obj",
      "<< /Length #{src.size} >>\nstream\n#{src}\nendstream",
      "endobj"
    ].join("\n")

  str =
    src ?
    nil :
    make_stream(&block)

  obj = add(Obj.new(self, ref, source: src, stream: str))

  str || obj
end
dup() click to toggle source
# File lib/podoff.rb, line 104
def dup

  o = self

  self.class.allocate.instance_eval do

    @encoding = o.encoding

    @scanner = ::StringScanner.new(o.source)
    @xref = o.xref

    @objs = o.objs.inject({}) { |h, (k, v)| h[k] = v.dup(self); h }
    @obj_counters = o.obj_counters.dup

    @root = o.root

    @additions =
      o.additions.inject({}) { |h, (k, v)| h[k] = v.dup(self); h }

    self
  end
end
new_ref() click to toggle source
# File lib/podoff.rb, line 153
def new_ref

  "#{
    @objs.keys.inject(-1) { |i, r| [ i, r.split(' ').first.to_i ].max } + 1
  } 0"
end
page(index) click to toggle source
# File lib/podoff.rb, line 142
def page(index)

  if index < 0
    pages[index]
  elsif index == 0
    nil
  else
    pages[index - 1]
  end
end
pages() click to toggle source
# File lib/podoff.rb, line 127
def pages

  #@objs.values.select { |o| o.type == '/Page' }

  ps = @objs.values.find { |o| o.type == '/Pages' }

  fail ArgumentError.new(
    "no /Pages, the PDF is not usable by Podoff as is, you have to do " +
    "`qpdf --object-streams=disable original.pdf unpacked.pdf` " +
    "and use unpacked.pdf instead of original.pdf"
  ) unless ps

  extract_refs(ps.attributes[:kids]).collect { |r| @objs[r] }
end
re_add(obj_or_ref) click to toggle source
# File lib/podoff.rb, line 200
def re_add(obj_or_ref)

  obj = obj_or_ref.is_a?(String) ? @objs[obj_or_ref] : obj_or_ref

  obj = obj.replicate unless obj.replica?

  add(obj)
end
rewrite(path=:string, encoding=nil) click to toggle source
# File lib/podoff.rb, line 254
def rewrite(path=:string, encoding=nil)

  encoding ||= @encoding

  f =
    case path
      when :string, '-' then StringIO.new
      when String then File.open(path, 'wb')
      else path
    end
  f.set_encoding(encoding)

  v = source.match(/%PDF-\d+\.\d+/)[0]
  f.write(v)
  f.write("\n")

  pointers = {}

  objs.keys.sort.each do |k|
    pointers[k.split(' ').first.to_i] = f.pos
    f.write(objs[k].source.force_encoding(encoding))
    f.write("\n")
  end

  xref = f.pos

  write_xref(f, pointers)

  f.write("trailer\n")
  f.write("<<\n")
  f.write("/Size #{objs.size + 1}\n")
  f.write("/Root #{root} R\n")
  f.write(">>\n")
  f.write("startxref #{xref}\n")
  f.write("%%EOF\n")

  f.close if path.is_a?(String) || path.is_a?(Symbol)

  f.is_a?(StringIO) ? f.string : nil
end
source() click to toggle source
# File lib/podoff.rb, line 94
def source

  @scanner.string
end
updated?() click to toggle source
# File lib/podoff.rb, line 99
def updated?

  @additions.any?
end
write(path=:string, encoding=nil) click to toggle source
# File lib/podoff.rb, line 209
def write(path=:string, encoding=nil)

  encoding ||= @encoding

  f =
    case path
      when :string, '-' then StringIO.new
      when String then File.open(path, 'wb')
      else path
    end
  f.set_encoding(encoding) # internal encoding: nil
  #f.set_encoding(encoding, encoding)

  f.write(source)

  if @additions.any?

    pointers = {}

    @additions.values.each do |o|
      f.write("\n")
      pointers[o.ref.split(' ').first.to_i] = f.pos
      f.write(o.to_s.force_encoding(encoding))
    end
    f.write("\n\n")

    xref = f.pos

    write_xref(f, pointers)

    f.write("trailer\n")
    f.write("<<\n")
    f.write("/Prev #{self.xref}\n")
    f.write("/Size #{objs.size + 1}\n")
    f.write("/Root #{root} R\n")
    f.write(">>\n")
    f.write("startxref #{xref}\n")
    f.write("%%EOF\n")
  end

  f.close if path.is_a?(String) || path.is_a?(Symbol)

  f.is_a?(StringIO) ? f.string : nil
end

Protected Instance Methods

extract_ref(s) click to toggle source
# File lib/podoff.rb, line 325
def extract_ref(s)

  s.gsub(/\s+/, ' ').gsub(/[^0-9 ]+/, '').strip
end
extract_refs(s) click to toggle source
# File lib/podoff.rb, line 330
def extract_refs(s)

  s.gsub(/\s+/, ' ').scan(/(\d+ \d+) R/).collect(&:first)
end
make_stream(&block) click to toggle source
# File lib/podoff.rb, line 317
def make_stream(&block)

  s = Stream.new
  s.instance_exec(&block) if block

  s
end
write_xref(f, pointers) click to toggle source
# File lib/podoff.rb, line 297
def write_xref(f, pointers)

  f.write("xref\n")
  f.write("0 1\n")
  f.write("0000000000 65535 f \n")

  pointers
    .keys
    .sort
    .inject([ [] ]) { |ps, k|
      ps << [] if ps.last != [] && k > ps.last.last + 1
      ps.last << k
      ps
    }
    .each { |part|
      f.write("#{part.first} #{part.size}\n")
      part.each { |k| f.write(sprintf("%010d 00000 n \n", pointers[k])) }
    }
end