class Honyomi::Pdf

Public Class Methods

new(filename) click to toggle source
# File lib/honyomi/pdf.rb, line 10
def initialize(filename)
  @filename = filename
end

Public Instance Methods

generate_images(output_dir) click to toggle source
# File lib/honyomi/pdf.rb, line 39
def generate_images(output_dir)
  FileUtils.mkdir_p output_dir
  system("pdftoppm", "-jpeg", @filename, File.join(output_dir, "book"))
end
pages() click to toggle source
# File lib/honyomi/pdf.rb, line 14
def pages
  result = []
  
  Dir.mktmpdir do |dir|
    outfile = File.join(dir, "pdf.txt")

    loop do
      page_no = (result.count + 1).to_s

      o, e, s = Open3.capture3("pdftotext -f #{page_no} -l #{page_no} #{Shellwords.escape(@filename)} #{Shellwords.escape(outfile)}") # Need pdftotext (poppler, xpdf)
      break if s.exitstatus != 0
      
      text = File.read(outfile, encoding: Encoding::UTF_8)

      if String.method_defined? :scrub
        text = text.scrub('?')
      end

      result << text
    end
  end

  result
end