class PDFBeads::PageDataProvider::PageData
Allows to collect data needed for building an individual page of a PDF document and gives access to those data.
Attributes
basename[R]
bg_created[R]
bg_layer[RW]
fg_created[R]
fg_layer[RW]
height[RW]
hocr_path[R]
name[R]
s_type[R]
stencils[R]
width[RW]
x_res[RW]
y_res[RW]
Public Class Methods
fixResolution( img )
click to toggle source
# File lib/pdfbeads/pdfpage.rb, line 149 def self.fixResolution( img ) xres = img.x_resolution; yres = img.y_resolution if img.units == PixelsPerCentimeterResolution img.units = PixelsPerInchResolution xres = (xres * 2.54).round yres = (yres * 2.54).round end return [ xres, yres ] end
new( path,basename,args,exts,pref )
click to toggle source
# File lib/pdfbeads/pdfpage.rb, line 43 def initialize( path,basename,args,exts,pref ) @name = path @basename = basename @s_type = 'b' @stencils = Array.new() @pageargs = args @exts = exts @pref = pref @bg_layer = @fg_layer = nil @bg_created = @fg_created = false end
Public Instance Methods
addSupplementaryFiles()
click to toggle source
# File lib/pdfbeads/pdfpage.rb, line 105 def addSupplementaryFiles() force = @pageargs[:force_update] exts_pattern = @exts.join( '|' ) pref_pattern = @pref.join( '|' ) if @bg_layer.nil? bgpath = Dir.entries('.').detect do |f| /\A#{@basename}.(bg|sep).(#{pref_pattern})\Z/i.match(f) end bgpath = Dir.entries('.').detect do |f| /\A#{@basename}.(bg|sep).(#{exts_pattern})\Z/i.match(f) end if bgpath.nil? @bg_layer = bgpath unless bgpath.nil? # If updating auxiliary files is requested and the base image is # either bitonal or indexed with just a few colors (i. e. doesn't # contain any elements which should be placed to the background layer), # then the *.color.* image (if present) takes priority over any existing # *.bg.* and *.fg.* images. So we should regenerate them. if bgpath.nil? or ( force and not @s_type.eql? 'c' ) colorpath = Dir.entries('.').detect do |f| /\A#{@basename}.color.(#{exts_pattern})\Z/i.match(f) end unless colorpath.nil? fnames = Array.new() separateColor( colorpath ) end end end if @fg_layer.nil? and @stencils.length == 1 fgpath = Dir.entries('.').detect do |f| /\A#{@basename}.(fg).(#{exts_pattern})\Z/i.match(f) end @fg_layer = fgpath unless fgpath.nil? end if $has_nokogiri and not @pageargs[:pages_per_dict].nil? @hocr_path = Dir.entries('.').detect do |f| /\A#{@basename}.(HOCR|HTML?)/i.match(f) end end end
fillStencilArray()
click to toggle source
# File lib/pdfbeads/pdfpage.rb, line 55 def fillStencilArray() ret = 0 force = @pageargs[:force_update] fres = @pageargs[:st_resolution] treshold = @pageargs[:threshold] map = Hash[ :path => @name, :rgb => [0.0, 0.0, 0.0], :created => false ] insp = ImageInspector.new( @name ) return 0 if insp.width.nil? @width = insp.width; @height = insp.height unless fres > 0 @x_res = insp.x_dpi; @y_res = insp.y_dpi else @x_res = @y_res = fres end if insp.depth == 1 and insp.trans.nil? @stencils << map ret = 1 else img = ImageList.new( @name ) # ImageMagick incorrectly identifies indexed PNG images as DirectClass. # It also assigns a strange color value to fully opaque areas. So # we have to use an independent approach to recognize indexed images. unless insp.palette.nil? img.class_type = PseudoClass ret = processIndexed( img,@pageargs[:maxcolors],force ) end ret = processMixed( img,treshold,force,map ) if ret == 0 img.destroy! # Make sure there are no more RMagick objects GC.start end $stderr.puts( "Prepared data for processing #{@name}\n" ) if insp.nextImage $stderr.puts( "Warning: #{@name} contains multiple images, but only the first one") $stderr.puts( "\tis going to be used\n" ) end ret end
Private Instance Methods
processIndexed( img,maxcolors,force )
click to toggle source
# File lib/pdfbeads/pdfpage.rb, line 184 def processIndexed( img,maxcolors,force ) ret = 0 ncolors = img.number_colors if ncolors <= maxcolors @s_type = 'i' exc = ( img.alpha? ) ? '#00000000' : 'white' for i in ( 0...ncolors ) color = img.colormap( i ) px = Pixel.from_color( color ) unless color.eql? exc cpath = "#{@basename}.#{color}.tiff" created = false if not File.exists? cpath or force bitonal = img.copy # Caution: replacing colors in the colormap currently only works # if we save the result into a bilevel TIFF file. Otherwise the # changes are ignored or produce a strange effect. We still use # this method because it allows to reduce the number of memory # allocations. for j in (0...ncolors) crepl = (j == i) ? 'black' : 'white' bitonal.colormap( j,crepl ) end bitonal.compress_colormap! bitonal.write( cpath ) do |curimg| curimg.format = 'TIFF' curimg.define( 'TIFF','rows-per-strip',img.rows ) curimg.compression = Group4Compression end bitonal.destroy! created = true end cmap = Hash[ :path => cpath, :rgb => [px.red.to_f/QuantumRange, px.green.to_f/QuantumRange, px.blue.to_f/QuantumRange], :created => created ] @stencils << cmap ret += 1 end end end return ret end
processMixed( img,treshold,force,map )
click to toggle source
# File lib/pdfbeads/pdfpage.rb, line 229 def processMixed( img,treshold,force,map ) binpath = "#{@basename}.black.tiff" if not File.exists? binpath or force im_copy = img.copy; bitonal = im_copy.threshold(QuantumRange/255*treshold); im_copy.destroy! bitonal.write( binpath ) { |curimg| curimg.format = 'TIFF' curimg.define( 'TIFF','rows-per-strip',img.rows ) curimg.compression = Group4Compression } bitonal.destroy! map[:created] = true end bgf = @pageargs[:bg_format] bgpath = "#{@basename}.bg." << bgf.downcase if not File.exists? bgpath or force if treshold > 1 bk = img.black_threshold(QuantumRange/255*treshold); img.destroy!; img = bk end op = img.opaque( 'black','white' ); img.destroy!; img = op; if @pageargs[:force_grayscale] img.image_type = GrayscaleType end PageData.fixResolution( img ) resampled = img.resample(@pageargs[:bg_resolution]); img.destroy!; img = resampled # A hack for some Windows versions of RMagick, which throw an error the # first time when Magick.formats is accessed begin retries = 2 mfmts = Magick.formats rescue retry if (retries -= 1 ) > 0 end if bgf.eql? 'JP2' and not mfmts.has_key? 'JP2' $stderr.puts( "This version of ImageMagick doesn't support JPEG2000 compression." ) $stderr.puts( "\tI'll use JPEG compression instead." ) bgf = 'JPG' bgpath = "#{@basename}.bg." << bgf.downcase end writeImage( img,bgpath,bgf ) @bg_created = true end map[:path] = binpath @stencils << map @s_type= 'c' @bg_layer = bgpath ret = 1 end
separateColor( colorpath )
click to toggle source
# File lib/pdfbeads/pdfpage.rb, line 282 def separateColor( colorpath ) fmt = @pageargs[:bg_format] dpi = @pageargs[:bg_resolution] begin img = ImageList.new( colorpath ) rescue ImageMagickError $stderr.puts( "Error reading image file #{colorpath}" ) return nil end begin mask = ImageList.new( @name ) rescue ImageMagickError $stderr.puts( "Error reading image file #{@name}" ) return nil end imw = img.columns imh = img.rows if @s_type.eql? 'i' mask.class_type = PseudoClass exc = ( mask.alpha? ) ? '#00000000' : 'white' for i in ( 0...mask.number_colors ) color = mask.colormap( i ) unless color.eql? exc op = mask.opaque( color,'black' ) mask.destroy! mask = op end end if mask.alpha? op = mask.opaque( exc,'white' ) mask.destroy! mask = op mask.alpha( DeactivateAlphaChannel ) end mask.compress_colormap! end PageData.fixResolution( img ) mask.resize!( imw,imh ) if mask.columns != imw or mask.rows != imh no_fg = img.composite( mask,CenterGravity,CopyAlphaCompositeOp ) bg = no_fg.blur_channel( 0,6,AllChannels ) bg.alpha( DeactivateAlphaChannel ) bg.composite!( no_fg,CenterGravity,OverCompositeOp ) if ( bg.x_resolution != dpi or bg.y_resolution != dpi ) resampled = bg.resample( dpi ); bg.destroy!; bg = resampled end bgpath = "#{@basename}.bg." << fmt.downcase if writeImage( bg,bgpath,fmt ) @bg_layer = bgpath @bg_created = true end bg.destroy! no_fg.destroy! unless @bg_layer.nil? or @s_type.eql? 'i' ksam = mask.negate mask.destroy! no_bg = img.composite( ksam,CenterGravity,CopyAlphaCompositeOp ) fg = no_bg.clone # Resize the image to a tiny size and then back to the original size # to achieve the desired color diffusion. The idea is inspired by # Anthony Thyssen's http://www.imagemagick.org/Usage/scripts/hole_fill_shepards # script, which is intended just for this purpose (i. e. removing undesired # areas from the image). However our approach is a bit more crude (but still # effective). fg.resize!( width=imw/100,height=imh/100,filter=GaussianFilter ) fg.resize!( width=imw,height=imh,filter=GaussianFilter ) fg.composite!( no_bg,CenterGravity,OverCompositeOp ) downs = fg.resample( 100 ); fg.destroy!; fg = downs fg.alpha( DeactivateAlphaChannel ) fgpath = "#{@basename}.fg." << fmt.downcase if writeImage( fg,fgpath,fmt ) @fg_layer = fgpath @fg_created = true end fg.destroy! no_bg.destroy! ksam.destroy! else mask.destroy! end img.destroy! # Make sure there are no more RMagick objects still residing in memory GC.start end
writeImage( img,path,fmt )
click to toggle source
# File lib/pdfbeads/pdfpage.rb, line 161 def writeImage( img,path,fmt ) begin img.write( path ) do |curimg| case fmt when 'JP2' curimg.define( 'JP2','mode','real' ) curimg.define( 'JP2','numrlvls',4 ) curimg.define( 'JP2','rate',0.015625 ) when 'JPG' curimg.quality = 50 else curimg.compression = ZipCompression curimg.quality = 95 end curimg.format = fmt end return true rescue $stderr.puts( "Error: could not write to #{path}" ) return false end end