class Paperclip::Document::Processors::Reader

This processor extract the OCR text of the file

Attributes

clean[RW]
language[RW]
text_column[RW]

Public Class Methods

new(file, options = {}, attachment = nil) click to toggle source
Calls superclass method Paperclip::Document::Processor::new
# File lib/paperclip/document/processors/reader.rb, line 8
def initialize(file, options = {}, attachment = nil)
  super(file, options, attachment)
  if @options[:text_column].nil? && text_column?
    @options[:text_column] = default_text_column
  end
  @language = @options[:language]
  @text_column = @options[:text_column]
  unless @text_column
    raise Paperclip::Error, 'No content text column given'
  end
  @clean = (RUBY_VERSION >= '2.0' ? false : options.key?(:clean) ? !!options[:clean] : true)
end

Public Instance Methods

default_text_column() click to toggle source

Returns the name of the default text column

# File lib/paperclip/document/processors/reader.rb, line 47
def default_text_column
  @attachment.name.to_s + '_content_text'
end
make() click to toggle source

Extract the text of all the document

# File lib/paperclip/document/processors/reader.rb, line 22
def make
  destination_path = tmp_dir.to_s
  options = { output: destination_path, clean: @clean }
  options[:language] = (language.is_a?(Proc) ? language.call(attachment.instance) : language)
  Docsplit.extract_text(file_path.to_s, options)

  destination_file = File.join(destination_path, basename + '.txt')
  instance = @attachment.instance
  f = File.open(destination_file)
  instance[text_column] = f.read
  instance.run_callbacks(:save) { false }
  f.close

  File.open(file.path)
end
text_column?() click to toggle source

Check if the default text column is present

# File lib/paperclip/document/processors/reader.rb, line 39
def text_column?
  expected_column = default_text_column
  instance.class.columns.detect do |column|
    column.name.to_s == expected_column
  end
end