class Paperclip::Document::Processors::Reader
This processor extract the OCR text of the file
Attributes
clean[RW]
language[RW]
text_column[RW]
Public Class Methods
new(file, options = {}, attachment = nil)
click to toggle source
Calls superclass method
Paperclip::Document::Processor::new
# File lib/paperclip/document/processors/reader.rb, line 8 def initialize(file, options = {}, attachment = nil) super(file, options, attachment) if @options[:text_column].nil? && text_column? @options[:text_column] = default_text_column end @language = @options[:language] @text_column = @options[:text_column] unless @text_column raise Paperclip::Error, 'No content text column given' end @clean = (RUBY_VERSION >= '2.0' ? false : options.key?(:clean) ? !!options[:clean] : true) end
Public Instance Methods
default_text_column()
click to toggle source
Returns the name of the default text column
# File lib/paperclip/document/processors/reader.rb, line 47 def default_text_column @attachment.name.to_s + '_content_text' end
make()
click to toggle source
Extract the text of all the document
# File lib/paperclip/document/processors/reader.rb, line 22 def make destination_path = tmp_dir.to_s options = { output: destination_path, clean: @clean } options[:language] = (language.is_a?(Proc) ? language.call(attachment.instance) : language) Docsplit.extract_text(file_path.to_s, options) destination_file = File.join(destination_path, basename + '.txt') instance = @attachment.instance f = File.open(destination_file) instance[text_column] = f.read instance.run_callbacks(:save) { false } f.close File.open(file.path) end
text_column?()
click to toggle source
Check if the default text column is present
# File lib/paperclip/document/processors/reader.rb, line 39 def text_column? expected_column = default_text_column instance.class.columns.detect do |column| column.name.to_s == expected_column end end