class ExcelImport::ExcelLoader
Public Class Methods
new()
click to toggle source
Calls superclass method
# File lib/excel_import/excel_loader.rb, line 22 def initialize super end
Public Instance Methods
perform_load(options = {})
click to toggle source
# File lib/excel_import/excel_loader.rb, line 37 def perform_load(options = {}) allow_empty_rows = DataShift::Loaders::Configuration.call.allow_empty_rows logger.info "Starting bulk load from Excel : #{file_name}" start(file_name, options) # maps list of headers into suitable calls on the Active Record class bind_headers(headers) is_dummy_run = DataShift::Configuration.call.dummy_run begin puts 'Dummy Run - Changes will be rolled back' if is_dummy_run load_object_class.transaction do @excel.sheet_final.each_with_index do |row, current_row_idx| # next if current_row_idx == headers.idx # Excel num_rows seems to return all 'visible' rows, which appears to be greater than the actual data rows # (TODO - write spec to process .xls with a huge number of rows) # # manually have to detect when actual data ends, this isn't very smart but # got no better idea than ending once we hit the first completely empty row break if !allow_empty_rows && (row.nil? || row.empty?) logger.info "Processing Row #{current_row_idx}" contains_data = false doc_context.progress_monitor.start_monitoring # Iterate over the bindings, # For each column bound to a model operator, create a context from data in associated Excel column @binder.bindings.each do |method_binding| unless method_binding.valid? logger.warn("No binding was found for column (#{current_row_idx})") next end # If binding to a column, get the value from the cell (bindings can be to internal methods) value = method_binding.index ? row[method_binding.index] : nil context = doc_context.create_node_context(method_binding, current_row_idx, value) contains_data ||= context.contains_data? logger.info "Processing Column #{method_binding.index} (#{method_binding.pp})" begin context.process rescue if doc_context.all_or_nothing? logger.error('All or nothing set and Current Column failed so complete Row aborted') break end end end # Excel data rows not accurate, seems to have to manually detect when actual Excel data rows end break if !allow_empty_rows && contains_data == false doc_context.save_and_monitor_progress # unless next operation is update, reset the loader object doc_context.reset unless doc_context.node_context.next_update? end # all rows processed if is_dummy_run puts 'Excel loading stage done - Dummy run so Rolling Back.' raise ActiveRecord::Rollback # Don't actually create/upload to DB if we are doing dummy run end end # TRANSACTION N.B ActiveRecord::Rollback does not propagate outside of the containing transaction block rescue => e puts "ERROR: Excel loading failed : #{e.inspect}" raise e ensure report end puts 'Excel loading stage Complete.' end
run(file_name, load_class, excel_instance)
click to toggle source
# File lib/excel_import/excel_loader.rb, line 26 def run(file_name, load_class, excel_instance) @file_name = file_name @excel = excel_instance setup_load_class(load_class) logger.info("Loading objects of type #{load_object_class}") perform_load end
Private Instance Methods
parse_headers(header_row, header_row_idx = 0)
click to toggle source
# File lib/excel_import/excel_loader.rb, line 131 def parse_headers(header_row, header_row_idx = 0) headers = DataShift::Headers.new(:excel, header_row_idx) # TODO: - make more robust - currently end on first empty column # There is no actual max columns in Excel .. you will run out of memory though at some point (0..::DataShift::ExcelBase.max_columns).each do |column| cell = header_row[column] break unless cell header = cell.to_s.strip break if header.empty? headers << header end headers end
start(file_name, options = {})
click to toggle source
# File lib/excel_import/excel_loader.rb, line 121 def start(file_name, options = {}) open_excel(file_name, options) set_headers(parse_headers(@excel.headers)) if headers.empty? raise MissingHeadersError, "No headers found - Check Sheet #{sheet} is complete and Row #{headers.idx} contains headers" end excel end