class Despeck::Ocr
Extracts text of desired language from the image
Attributes
lang[R]
source_path[R]
Public Class Methods
new(path)
click to toggle source
# File lib/despeck/ocr.rb, line 8 def initialize(path) @source_path = path end
Public Instance Methods
text(lang: :eng)
click to toggle source
# File lib/despeck/ocr.rb, line 12 def text(lang: :eng) if source_path.end_with?('.pdf') res = '' for_each_page_image do |path| res += RTesseract.new(path, lang: lang).to_s end res else RTesseract.new(source_path, lang: lang).to_s end end
Private Instance Methods
for_each_page_image() { |path| ... }
click to toggle source
# File lib/despeck/ocr.rb, line 26 def for_each_page_image paths = [] Despeck::PdfTools .pdf_to_images(source_path).each do |pic| tempfile = Tempfile.new(['despeck_page', '.jpg']) pic.write_to_file(tempfile.path) yield tempfile.path end paths end