class NoSE::Loader::CsvLoader
Load data into an index from a set of CSV files
Public Class Methods
new(workload = nil, backend = nil)
click to toggle source
Calls superclass method
NoSE::Loader::LoaderBase::new
# File lib/nose/loader/csv.rb, line 11 def initialize(workload = nil, backend = nil) super @logger = Logging.logger['nose::loader::csvloader'] end
Public Instance Methods
load(indexes, config, show_progress = false, limit = nil, skip_existing = true)
click to toggle source
Load data for all the indexes
# File lib/nose/loader/csv.rb, line 18 def load(indexes, config, show_progress = false, limit = nil, skip_existing = true) indexes.map!(&:to_id_graph).uniq! if @backend.by_id_graph simple_indexes = find_simple_indexes indexes, skip_existing simple_indexes.each do |entity, simple_index_list| filename = File.join config[:directory], "#{entity.name}.csv" total_rows = (limit || 0) - 1 # account for header row File.open(filename) { |file| file.each_line { total_rows += 1 } } progress = initialize_progress entity, simple_index_list, total_rows if show_progress load_file_indexes filename, entity, simple_index_list, progress end end
Private Instance Methods
find_simple_indexes(indexes, skip_existing)
click to toggle source
Find the simple indexes we should populate @return [Hash<Entity, Index>]
# File lib/nose/loader/csv.rb, line 38 def find_simple_indexes(indexes, skip_existing) simple_indexes = indexes.select do |index| index.graph.size == 1 && !(skip_existing && !@backend.index_empty?(index)) end simple_indexes.group_by do |index| index.hash_fields.first.parent end end
initialize_progress(entity, simple_index_list, total_rows)
click to toggle source
Initialize a progress bar to reporting loading results @return [Formatador::ProgressBar]
# File lib/nose/loader/csv.rb, line 51 def initialize_progress(entity, simple_index_list, total_rows) @logger.info "Loading simple indexes for #{entity.name}" @logger.info simple_index_list.map(&:key).join(', ') Formatador.new.redisplay_progressbar 0, total_rows Formatador::ProgressBar.new total_rows, started_at: Time.now.utc end
load_file_indexes(filename, entity, simple_index_list, progress)
click to toggle source
Load all indexes for a given file @return [void]
# File lib/nose/loader/csv.rb, line 61 def load_file_indexes(filename, entity, simple_index_list, progress) SmarterCSV.process(filename, downcase_header: false, chunk_size: 1000, convert_values_to_numeric: false) do |chunk| Parallel.each(chunk.each_slice(100), finish: (lambda do |_, _, _| next if progress.nil? inc = [progress.total - progress.current, 100].min progress.increment inc end)) do |minichunk| load_simple_chunk minichunk, entity, simple_index_list end end end
load_simple_chunk(chunk, entity, indexes)
click to toggle source
Load a chunk of data from a simple entity index @return [void]
# File lib/nose/loader/csv.rb, line 79 def load_simple_chunk(chunk, entity, indexes) # Prefix all hash keys with the entity name and convert values chunk.map! do |row| index_row = {} row.each_key do |key| field_class = entity[key.to_s].class value = field_class.value_from_string row[key] index_row["#{entity.name}_#{key}"] = value end index_row end # Insert the batch into the index indexes.each do |index| @backend.index_insert_chunk index, chunk end end