class ActiveAnalysis::Analyzer::PDFAnalyzer

Extracts width, height in pixels and number of pages from a pdf blob.

Example:

ActiveAnalysis::Analyzer::PDFAnalyzer::Poppler.new(blob).metadata
# => { width: 4104, height: 2736, pages: 10 }

This analyzer requires the poppler system library, which is not provided by Rails.

Public Class Methods

accept?(blob) click to toggle source
# File lib/active_analysis/analyzer/pdf_analyzer.rb, line 16
def accept?(blob)
  blob.content_type == "application/pdf" && pdfinfo_exists?
end
pdfinfo_exists?() click to toggle source
# File lib/active_analysis/analyzer/pdf_analyzer.rb, line 24
def pdfinfo_exists?
  return @pdfinfo_exists if defined?(@pdfinfo_exists)

  @pdfinfo_exists = system(pdfinfo_path, "-v", out: File::NULL, err: File::NULL)
end
pdfinfo_path() click to toggle source
# File lib/active_analysis/analyzer/pdf_analyzer.rb, line 20
def pdfinfo_path
  ActiveStorage.paths[:pdfinfo] || "pdfinfo"
end

Public Instance Methods

metadata() click to toggle source
# File lib/active_analysis/analyzer/pdf_analyzer.rb, line 31
def metadata
  { width: width, height: height, pages: pages }.compact
end

Private Instance Methods

bottom() click to toggle source
# File lib/active_analysis/analyzer/pdf_analyzer.rb, line 53
def bottom
  Float cropbox[1]
end
cropbox() click to toggle source
# File lib/active_analysis/analyzer/pdf_analyzer.rb, line 65
def cropbox
  return @cropbox if defined?(@cropbox)
  @cropbox = (info["CropBox"] || "").split
end
height() click to toggle source
# File lib/active_analysis/analyzer/pdf_analyzer.rb, line 45
def height
  (top - bottom).floor if cropbox.present?
end
info() click to toggle source
# File lib/active_analysis/analyzer/pdf_analyzer.rb, line 70
def info
  @info ||= download_blob_to_tempfile { |file| info_from(file) }
end
info_from(file) click to toggle source
# File lib/active_analysis/analyzer/pdf_analyzer.rb, line 74
def info_from(file)
  IO.popen([self.class.pdfinfo_path, "-box", file.path]) do |output|
    output.read.scan(/^(.*?): *(.*)?/).to_h
  end
rescue Errno::ENOENT
  logger.info "Skipping pdf analysis due to an error"
  {}
end
left() click to toggle source
# File lib/active_analysis/analyzer/pdf_analyzer.rb, line 49
def left
  Float cropbox[0]
end
pages() click to toggle source
# File lib/active_analysis/analyzer/pdf_analyzer.rb, line 36
def pages
  pages = info["Pages"]
  Integer(pages) if pages
end
right() click to toggle source
# File lib/active_analysis/analyzer/pdf_analyzer.rb, line 57
def right
  Float cropbox[2]
end
top() click to toggle source
# File lib/active_analysis/analyzer/pdf_analyzer.rb, line 61
def top
  Float cropbox[3]
end
width() click to toggle source
# File lib/active_analysis/analyzer/pdf_analyzer.rb, line 41
def width
  (right - left).floor if cropbox.present?
end