class TesseractFFI::Tesseract
class Tesseract
Attributes
errors[R]
file_name[RW]
hocr_text[R]
language[RW]
source_resolution[RW]
utf8_text[R]
Public Class Methods
new(file_name: nil, language: 'eng', source_resolution: 72, oem: DEFAULT)
click to toggle source
# File lib/tesseract_ffi/tesseract.rb, line 14 def initialize(file_name: nil, language: 'eng', source_resolution: 72, oem: DEFAULT) unless file_name.is_a?(String) && File.exist?(file_name) log 'Error: Tesseract needs a file ' + (file_name || 'no name given') raise TessException.new(error_msg: 'file_name must be provided') end @file_name = file_name @language = language @source_resolution = source_resolution @oem = oem @errors = [] end
Public Instance Methods
convert_to_pdf(output_stem)
click to toggle source
# File lib/tesseract_ffi/tesseract.rb, line 69 def convert_to_pdf(output_stem) setup do datapath = TesseractFFI.tess_get_datapath(@handle) pdf_renderer = TesseractFFI.tess_pdf_renderer_create(output_stem, datapath, false) TesseractFFI.tess_process_pages(@handle, @file_name, nil, 5000, pdf_renderer) end end
log(msg)
click to toggle source
just output to console
# File lib/tesseract_ffi/tesseract.rb, line 28 def log(msg) puts msg end
ocr()
click to toggle source
rubocop:enable Metrics/AbcSize, Metrics/MethodLength
# File lib/tesseract_ffi/tesseract.rb, line 55 def ocr tess_set_source_resolution(@handle, @source_resolution) raise TessException.new(error_msg: 'Recognition Error') if tess_recognize(@handle, 0) != 0 @utf8_text = tess_get_utf8(@handle, 0) @hocr_text = tess_get_hocr(@handle, 0) end
recognize()
click to toggle source
# File lib/tesseract_ffi/tesseract.rb, line 63 def recognize setup do ocr end end
setup() { || ... }
click to toggle source
rubocop:disable Metrics/AbcSize, Metrics/MethodLength
# File lib/tesseract_ffi/tesseract.rb, line 33 def setup @handle = tess_create raise TessException.new(error_msg: 'Library Error') unless @handle result = tess_init(@handle, 0, @language, @oem) raise TessException.new(error_msg: 'Init Error') if result != 0 @image = tess_pix_read(@file_name) image_status = tess_set_image(@handle, @image) raise TessException.new(error_msg: "Unable to set image #{@file_name}") if image_status != 0 yield # run the block for recognition etc rescue TessException => e @errors << "Tesseract Error #{e.error[:error_msg]}" log @errors raise ensure tess_end(@handle) tess_delete(@handle) end