class PDFBeads::PageDataProvider::PageData

Allows to collect data needed for building an individual page of a PDF document and gives access to those data.

Attributes

basename[R]
bg_created[R]
bg_layer[RW]
fg_created[R]
fg_layer[RW]
height[RW]
hocr_path[R]
name[R]
s_type[R]
stencils[R]
width[RW]
x_res[RW]
y_res[RW]

Public Class Methods

fixResolution( img ) click to toggle source
# File lib/pdfbeads/pdfpage.rb, line 149
def self.fixResolution( img )
  xres = img.x_resolution; yres = img.y_resolution
  if img.units == PixelsPerCentimeterResolution
    img.units = PixelsPerInchResolution
    xres = (xres * 2.54).round
    yres = (yres * 2.54).round
  end
  return [ xres, yres ]
end
new( path,basename,args,exts,pref ) click to toggle source
# File lib/pdfbeads/pdfpage.rb, line 43
def initialize( path,basename,args,exts,pref )
  @name = path
  @basename = basename
  @s_type = 'b'
  @stencils = Array.new()
  @pageargs = args
  @exts = exts
  @pref = pref
  @bg_layer = @fg_layer = nil
  @bg_created = @fg_created = false
end

Public Instance Methods

addSupplementaryFiles() click to toggle source
# File lib/pdfbeads/pdfpage.rb, line 105
def addSupplementaryFiles()
  force = @pageargs[:force_update]
  exts_pattern = @exts.join( '|' )
  pref_pattern = @pref.join( '|' )

  if @bg_layer.nil?
    bgpath = Dir.entries('.').detect do |f|
      /\A#{@basename}.(bg|sep).(#{pref_pattern})\Z/i.match(f)
    end
    bgpath = Dir.entries('.').detect do |f|
      /\A#{@basename}.(bg|sep).(#{exts_pattern})\Z/i.match(f)
    end if bgpath.nil?
    @bg_layer = bgpath unless bgpath.nil?

    # If updating auxiliary files is requested and the base image is
    # either bitonal or indexed with just a few colors (i. e. doesn't
    # contain any elements which should be placed to the background layer),
    # then the *.color.* image (if present) takes priority over any existing
    # *.bg.* and *.fg.* images. So we should regenerate them.
    if bgpath.nil? or ( force and not @s_type.eql? 'c' )
      colorpath = Dir.entries('.').detect do |f|
        /\A#{@basename}.color.(#{exts_pattern})\Z/i.match(f)
      end
      unless colorpath.nil?
        fnames = Array.new()
        separateColor( colorpath )
      end
    end
  end

  if @fg_layer.nil? and @stencils.length == 1
    fgpath = Dir.entries('.').detect do |f|
      /\A#{@basename}.(fg).(#{exts_pattern})\Z/i.match(f)
    end
    @fg_layer = fgpath unless fgpath.nil?
  end

  if $has_nokogiri and not @pageargs[:pages_per_dict].nil?
    @hocr_path = Dir.entries('.').detect do |f|
      /\A#{@basename}.(HOCR|HTML?)/i.match(f)
    end
  end
end
fillStencilArray() click to toggle source
# File lib/pdfbeads/pdfpage.rb, line 55
def fillStencilArray()
  ret   = 0
  force = @pageargs[:force_update]
  fres  = @pageargs[:st_resolution]
  treshold = @pageargs[:threshold]

  map = Hash[
    :path => @name,
    :rgb  => [0.0, 0.0, 0.0],
    :created => false
  ]

  insp = ImageInspector.new( @name )
  return 0 if insp.width.nil?

  @width = insp.width; @height = insp.height
  unless fres > 0
    @x_res = insp.x_dpi; @y_res = insp.y_dpi
  else
    @x_res = @y_res = fres
  end

  if insp.depth == 1 and insp.trans.nil?
    @stencils << map
    ret = 1

  else
    img = ImageList.new( @name )
    # ImageMagick incorrectly identifies indexed PNG images as DirectClass.
    # It also assigns a strange color value to fully opaque areas. So
    # we have to use an independent approach to recognize indexed images.
    unless insp.palette.nil?
      img.class_type = PseudoClass
      ret = processIndexed( img,@pageargs[:maxcolors],force )
    end
    ret = processMixed( img,treshold,force,map ) if ret == 0
    img.destroy!

    # Make sure there are no more RMagick objects
    GC.start
  end

  $stderr.puts( "Prepared data for processing #{@name}\n" )
  if insp.nextImage
    $stderr.puts( "Warning: #{@name} contains multiple images, but only the first one")
    $stderr.puts( "\tis going to be used\n" )
  end
  ret
end

Private Instance Methods

processIndexed( img,maxcolors,force ) click to toggle source
# File lib/pdfbeads/pdfpage.rb, line 184
def processIndexed( img,maxcolors,force )
  ret = 0
  ncolors = img.number_colors
  if ncolors <= maxcolors
    @s_type = 'i'
    exc = ( img.alpha? ) ? '#00000000' : 'white'
    for i in ( 0...ncolors )
      color = img.colormap( i )
      px = Pixel.from_color( color )
      unless color.eql? exc
        cpath = "#{@basename}.#{color}.tiff"
        created = false
        if not File.exists? cpath or force
          bitonal = img.copy
          # Caution: replacing colors in the colormap currently only works
          # if we save the result into a bilevel TIFF file. Otherwise the
          # changes are ignored or produce a strange effect. We still use
          # this method because it allows to reduce the number of memory
          # allocations.
          for j in (0...ncolors)
            crepl = (j == i) ? 'black' : 'white'
            bitonal.colormap( j,crepl )
          end
          bitonal.compress_colormap!
          bitonal.write( cpath ) do |curimg|
            curimg.format = 'TIFF'
            curimg.define( 'TIFF','rows-per-strip',img.rows )
            curimg.compression = Group4Compression
          end
          bitonal.destroy!
          created = true
        end
        cmap = Hash[
          :path => cpath,
          :rgb  => [px.red.to_f/QuantumRange, px.green.to_f/QuantumRange, px.blue.to_f/QuantumRange],
          :created => created
        ]
        @stencils << cmap
        ret += 1
      end
    end
  end
  return ret
end
processMixed( img,treshold,force,map ) click to toggle source
# File lib/pdfbeads/pdfpage.rb, line 229
def processMixed( img,treshold,force,map )
  binpath = "#{@basename}.black.tiff"
  if not File.exists? binpath or force
    im_copy = img.copy; bitonal = im_copy.threshold(QuantumRange/255*treshold); im_copy.destroy!
    bitonal.write( binpath ) { |curimg|
      curimg.format = 'TIFF'
      curimg.define( 'TIFF','rows-per-strip',img.rows )
      curimg.compression = Group4Compression
    }
    bitonal.destroy!
    map[:created] = true
  end

  bgf = @pageargs[:bg_format]
  bgpath = "#{@basename}.bg." << bgf.downcase

  if not File.exists? bgpath or force
    if treshold > 1
      bk = img.black_threshold(QuantumRange/255*treshold); img.destroy!; img = bk
    end
    op = img.opaque( 'black','white' ); img.destroy!; img = op;
    if @pageargs[:force_grayscale]
      img.image_type = GrayscaleType
    end
    PageData.fixResolution( img )
    resampled = img.resample(@pageargs[:bg_resolution]); img.destroy!; img = resampled

    # A hack for some Windows versions of RMagick, which throw an error the
    # first time when Magick.formats is accessed
    begin
      retries = 2
      mfmts = Magick.formats
    rescue
      retry if (retries -= 1 ) > 0
    end
    if bgf.eql? 'JP2' and not mfmts.has_key? 'JP2'
      $stderr.puts( "This version of ImageMagick doesn't support JPEG2000 compression." )
      $stderr.puts( "\tI'll use JPEG compression instead." )
      bgf = 'JPG'
      bgpath = "#{@basename}.bg." << bgf.downcase
    end

    writeImage( img,bgpath,bgf )
    @bg_created = true
  end

  map[:path] = binpath
  @stencils << map
  @s_type= 'c'
  @bg_layer = bgpath
  ret = 1
end
separateColor( colorpath ) click to toggle source
# File lib/pdfbeads/pdfpage.rb, line 282
def separateColor( colorpath )
  fmt = @pageargs[:bg_format]
  dpi = @pageargs[:bg_resolution]

  begin
    img  = ImageList.new( colorpath )
  rescue ImageMagickError
    $stderr.puts( "Error reading image file #{colorpath}" )
    return nil
  end

  begin
    mask = ImageList.new( @name )
  rescue ImageMagickError
    $stderr.puts( "Error reading image file #{@name}" )
    return nil
  end

  imw = img.columns
  imh = img.rows

  if @s_type.eql? 'i'
    mask.class_type = PseudoClass
    exc = ( mask.alpha? ) ? '#00000000' : 'white'
    for i in ( 0...mask.number_colors )
      color = mask.colormap( i )
      unless color.eql? exc
        op = mask.opaque( color,'black' )
        mask.destroy!
        mask = op
      end
    end

    if mask.alpha?
      op = mask.opaque( exc,'white' )
      mask.destroy!
      mask = op
      mask.alpha( DeactivateAlphaChannel )
    end
    mask.compress_colormap!
  end

  PageData.fixResolution( img )
  mask.resize!( imw,imh ) if mask.columns != imw or mask.rows != imh

  no_fg = img.composite( mask,CenterGravity,CopyAlphaCompositeOp )
  bg = no_fg.blur_channel( 0,6,AllChannels )
  bg.alpha( DeactivateAlphaChannel )

  bg.composite!( no_fg,CenterGravity,OverCompositeOp )
  if ( bg.x_resolution != dpi or bg.y_resolution != dpi )
    resampled = bg.resample( dpi ); bg.destroy!; bg = resampled
  end

  bgpath = "#{@basename}.bg." << fmt.downcase
  if writeImage( bg,bgpath,fmt )
    @bg_layer = bgpath
    @bg_created = true
  end

  bg.destroy!
  no_fg.destroy!

  unless @bg_layer.nil? or @s_type.eql? 'i'
    ksam = mask.negate
    mask.destroy!

    no_bg = img.composite( ksam,CenterGravity,CopyAlphaCompositeOp )
    fg = no_bg.clone

    # Resize the image to a tiny size and then back to the original size
    # to achieve the desired color diffusion. The idea is inspired by
    # Anthony Thyssen's http://www.imagemagick.org/Usage/scripts/hole_fill_shepards
    # script, which is intended just for this purpose (i. e. removing undesired
    # areas from the image). However our approach is a bit more crude (but still
    # effective).
    fg.resize!( width=imw/100,height=imh/100,filter=GaussianFilter )
    fg.resize!( width=imw,height=imh,filter=GaussianFilter )
    fg.composite!( no_bg,CenterGravity,OverCompositeOp )
    downs = fg.resample( 100 ); fg.destroy!; fg = downs
    fg.alpha( DeactivateAlphaChannel )

    fgpath = "#{@basename}.fg." << fmt.downcase
    if writeImage( fg,fgpath,fmt )
      @fg_layer = fgpath
      @fg_created = true
    end

    fg.destroy!
    no_bg.destroy!
    ksam.destroy!
  else
    mask.destroy!
  end
  img.destroy!
  # Make sure there are no more RMagick objects still residing in memory
  GC.start
end
writeImage( img,path,fmt ) click to toggle source
# File lib/pdfbeads/pdfpage.rb, line 161
def writeImage( img,path,fmt )
  begin
    img.write( path ) do |curimg|
      case fmt
      when 'JP2'
        curimg.define( 'JP2','mode','real' )
        curimg.define( 'JP2','numrlvls',4 )
        curimg.define( 'JP2','rate',0.015625 )
      when 'JPG'
        curimg.quality = 50
      else
        curimg.compression = ZipCompression
        curimg.quality = 95
      end
      curimg.format = fmt
    end
    return true
  rescue
    $stderr.puts( "Error: could not write to #{path}" )
    return false
  end
end