parses a postscript line, returns a line with individual characters at the right place (more or less)
# File misc/pdfparse.rb, line 391 def initialize(str, x, y, fontx, fonty, charspc, wordspc) @raw, @charspc, @wordspc = str, charspc, wordspc @x, @y, @fontx, @fonty = x, y, fontx, fonty str = str[1...-1] if str[0] == [ @str = '' bs = char = false #lastchar = nil spc = '' str.each_byte { |b| if not bs # special chars (unescaped) case b when ( # new word: honor word spacing spc = (-spc.to_f/CHARWIDTH).round if spc > 0 and not @str.empty? @str << (' '*spc) elsif spc < 0 @str.chop! while @str[-1] == \ and (spc += 1) <= 0# and (lastchar != ?\ or @str[-2] == lastchar) end char = true next when \\ # bs character bs = true next when ) # end of word char = false spc = '' next end end # octal escape sequence: leave as is (actual char depends on font) if bs and (0..7).include? b; @str << \\ end bs = false if char # update current rendered string, honoring charspc @str << b @str << (' ' * (charspc*1000/CHARWIDTH).round) if charspc > 0.1 @str << (' ' * (wordspc*1000/CHARWIDTH).round) if b == \ and wordspc > 0.1 #lastchar = b else # between strings: store word spacing integer spc << b end } puts "(#{x}, #{y} #{fontx}, #{fonty}) #@str" if $VERBOSE end
# File misc/pdfparse.rb, line 439 def to_s ; @str end