class Pdfh::PdfHandler
Handles the Pdf document text extraction and password removal TODO: Replace command utils with this gem
require 'pdf-reader' reader = PDF::Reader.new(temp) reader.pages.each do |page| @text << page.text end
Attributes
file[R]
Public Class Methods
new(file, password)
click to toggle source
@return [self]
# File lib/pdfh/pdf_handler.rb, line 16 def initialize(file, password) @file = file @password_option = password ? "--password=#{password.inspect} " : "" end
Public Instance Methods
extract_text()
click to toggle source
Gets the text from the pdf in order to execute the regular expresion matches @return [String]
# File lib/pdfh/pdf_handler.rb, line 25 def extract_text temp = `mktemp`.chomp Pdfh.verbose_print "~~~~~~~~~~~~~~~~~~ Extract PDF text" Pdfh.verbose_print " --> #{temp.inspect} temporal file assigned." cmd = %(qpdf #{@password_option}--decrypt --stream-data=uncompress #{@file.inspect} #{temp.inspect}) Pdfh.verbose_print " DeCrypt Command: #{cmd}" _result = `#{cmd}` cmd2 = %(pdftotext -enc UTF-8 #{temp.inspect} -) Pdfh.verbose_print " Extract Command: #{cmd2}" text = `#{cmd2}` Pdfh.verbose_print " Text: #{text.inspect}" text end
write_new_pdf(dir_path, full_path)
click to toggle source
@return [void]
# File lib/pdfh/pdf_handler.rb, line 42 def write_new_pdf(dir_path, full_path) Pdfh.verbose_print "~~~~~~~~~~~~~~~~~~ Writing PDFs" raise IOError, "Path #{dir_path} not found." unless Dir.exist?(dir_path) cmd = %(qpdf #{@password_option}--decrypt #{@file.inspect} #{full_path.inspect}) Pdfh.verbose_print " Write PDF Command: #{cmd}" return if Pdfh.dry? _result = `#{cmd}` raise IOError, "New PDF file #{full_path.inspect} was not created." unless File.file?(full_path) end