class Rocrad

Attributes

src[RW]
tmp[R]
txt[R]

Public Class Methods

new(src="") click to toggle source
# File lib/rocrad.rb, line 16
def initialize(src="")
  @uid = UUID.new
  @src = build_source src
  @txt = ""
  @tmp = nil
end

Public Instance Methods

crop!(x, y, w, h) click to toggle source

Crop image to convert

# File lib/rocrad.rb, line 46
def crop!(x, y, w, h)
  @txt = ""
  src  = Magick::Image.read(@src.to_s).first
  src.crop!(x, y, w, h)
  @tmp = Pathname.new(Dir::tmpdir).join("#{@uid.generate}_#{@src.sub(@src.extname, "-crop#{@src.extname}").basename}")
  src.write @tmp.to_s
  self
end
ocr!() click to toggle source
# File lib/rocrad.rb, line 28
def ocr!
  if @src.instance_of? Pathname and @src.file?
    ocr_via_path
    @txt
  elsif @src.instance_of? URI::HTTP
    ocr_via_http
    @txt
  else
    raise ImageNotSelectedError
  end
end
src=(value="") click to toggle source
# File lib/rocrad.rb, line 23
def src=(value="")
  @txt = ""
  @src = build_source value
end
to_s() click to toggle source

Output value

# File lib/rocrad.rb, line 41
def to_s
  @txt != "" ? @txt : ocr!
end

Private Instance Methods

build_source(src) click to toggle source
# File lib/rocrad.rb, line 74
def build_source(src)
  case (uri = URI.parse(src)).class.to_s
    when "URI::HTTP" then
      uri
    when "URI::Generic" then
      Pathname.new(uri.path)
    else
      Pathname.new(src)
  end
end
cco() click to toggle source

Linux console clear

# File lib/rocrad.rb, line 58
def cco
  File.exist?("/dev/null") ? "2>/dev/null" : ""
end
image_to_pnm() click to toggle source

Convert image to pnm

# File lib/rocrad.rb, line 97
def image_to_pnm
  src = @tmp ? @tmp : @src
  pnm = Pathname.new(Dir::tmpdir).join("#{@uid.generate}_#{@src.sub(@src.extname, ".pnm").basename}")
  case @src.extname.downcase
    when ".jpg" then
      `djpeg -colors 2 -grayscale -dct float -pnm #{src} > #{pnm} #{cco}`
    when ".tif" then
      `tifftopnm #{src} > #{pnm} #{cco}`
    when ".png" then
      `pngtopnm  #{src} > #{pnm} #{cco}`
    when ".bmp" then
      `bmptopnm #{src} > #{pnm} #{cco}`
    when ".pdf", ".ps" then
      `gs -sDEVICE=pnmraw -r300 -dNOPAUSE -dBATCH -sOutputFile=- -q #{src} > #{pnm} #{cco}`
    else
      raise UnsupportedFileTypeError
  end
  pnm
end
ocr_via_http() click to toggle source
# File lib/rocrad.rb, line 62
def ocr_via_http
  tmp_path = Pathname.new(Dir::tmpdir).join("#{@uid.generate}_#{Pathname.new(@src.request_uri).basename}")
  tmp_file = File.new(tmp_path.to_s, File::CREAT|File::TRUNC|File::RDWR, 0644)
  tmp_file.write(Net::HTTP.get(@src))
  tmp_file.close
  uri  = @src
  @src = tmp_path
  ocr_via_path
  @src = uri
  remove_file([tmp_path])
end
ocr_via_path() click to toggle source

Convert image to string

# File lib/rocrad.rb, line 118
def ocr_via_path
  src = @tmp ? @tmp : @src
  txt = Pathname.new(Dir::tmpdir).join("#{@uid.generate}_#{src.sub(src.extname, ".txt").basename}")
  pnm = image_to_pnm
  `ocrad #{pnm} -l -F utf8 -o #{txt} #{cco}`
  @txt = File.read(txt)
  @tmp ? remove_file([pnm, txt, @tmp]) : remove_file([pnm, txt])
  @tmp = nil
end
remove_file(files=[]) click to toggle source

Remove files

# File lib/rocrad.rb, line 86
def remove_file(files=[])
  files.each do |file|
    begin
      File.unlink(file) if File.exist?(file)
    rescue
      `rm -f #{file} #{cco}`
    end
  end
end