class DataSpork::Importer

Constants

ENCODE_VALUES
SANITIZE_VALUES
VERBOSE
VERBOSE_IO_OPTIONS

Attributes

blank_row[RW]
col_map[R]
col_num[RW]
col_tags[R]
effective_date[RW]
headers[R]
input_type[R]
options[R]
root_tag[R]
row[R]
row_num[R]
row_tag[R]
setup_state[RW]
writers[R]

Public Class Methods

convert(input_type, options = nil) click to toggle source

Entry point to convert the input document and output it to the selected format(s).

@param :input_type symbol indicating whether to output :xlsx, :csv, or :json @param :options hash with options to control the behavior of the conversion

# File lib/data_spork/importer.rb, line 20
def self.convert(input_type, options = nil)
  self.new(input_type, options).convert
end
new(input_type, options = nil) click to toggle source

Constructor

# File lib/data_spork/importer.rb, line 25
def initialize(input_type, options = nil)
  @input_type = input_type
  init_options options
  init_writers
end

Public Instance Methods

add_writers() click to toggle source
# File lib/data_spork/importer.rb, line 41
def add_writers
  writers << XmlWriter.new(self)
end
append(row) click to toggle source

Appends the specified row to the output. @param :row Array of values parsed from the CSV input.

# File lib/data_spork/importer.rb, line 135
def append(row)
  @row = row
  @row_num += 1
  sanitize
  output
end
clip_effective_date?(first_col) click to toggle source

Answer true if the first_col value is the effective date header, and clip the effective date value.

# File lib/data_spork/importer.rb, line 216
def clip_effective_date?(first_col)
  if first_col.match(effective_date_pattern)
    self.effective_date = "#{row[1]}".strip
    true
  else
    false
  end
end
col_value(index = nil) click to toggle source

Answer the value for the current column of data, or for the specified index.

# File lib/data_spork/importer.rb, line 241
def col_value(index = nil)
  row[index || col_num]
end
convert() click to toggle source

Drives the conversion of the CSV input file to XML formatted output. @param :path_to_csv string path name of the CSV input file

# File lib/data_spork/importer.rb, line 106
def convert
  start
  each do |row|
    append row
  end
  finish
end
csv?() click to toggle source
# File lib/data_spork/importer.rb, line 96
def csv?
  input_type == :csv
end
each(&block) click to toggle source
# File lib/data_spork/importer.rb, line 92
def each(&block)
  reader.each &block
end
effective_date_pattern() click to toggle source
# File lib/data_spork/importer.rb, line 45
def effective_date_pattern
  /^[Ee]ffective [Dd]ate+/
end
file_modifier() click to toggle source
# File lib/data_spork/importer.rb, line 80
def file_modifier
  ''
end
finish() click to toggle source
# File lib/data_spork/importer.rb, line 129
def finish
  write :finish
end
get_substitute_value(value) click to toggle source

Overridden by subclasses to substitute field-specific values based on their position in the row. The returned value is substituted for the passed value. This method expects only columns that are included in the output.

@param :value the value to be substituted

# File lib/data_spork/importer.rb, line 176
def get_substitute_value(value)
  value
end
header(index = nil) click to toggle source

Answer the header for the current column of data, or for the specified index.

# File lib/data_spork/importer.rb, line 246
def header(index = nil)
  headers[index || col_num]
end
headers?() click to toggle source

Answer true if the headers are already determined.

# File lib/data_spork/importer.rb, line 195
def headers?
  !headers.empty?
end
init_options(options) click to toggle source
# File lib/data_spork/importer.rb, line 31
def init_options(options)
  @options = { source_path: '.' }.merge(options ||= {})
  @options[:output_path] = @options[:source_path] if @options[:output_path].nil? and @options[:output_file]
end
init_writers() click to toggle source
# File lib/data_spork/importer.rb, line 36
def init_writers
  @writers = [ ]
  add_writers
end
input_pathname() click to toggle source
# File lib/data_spork/importer.rb, line 67
def input_pathname
  Pathname(options[:source_path]).join(source_name).to_s
end
on_begin_row() click to toggle source
# File lib/data_spork/importer.rb, line 255
def on_begin_row
  write :begin_put_row
end
on_end_row() click to toggle source
# File lib/data_spork/importer.rb, line 263
def on_end_row
  write :end_put_row
end
on_output_column() click to toggle source
# File lib/data_spork/importer.rb, line 259
def on_output_column
  write :put_column
end
output() click to toggle source

Output the current row of data, which were parsed from the CSV input.

# File lib/data_spork/importer.rb, line 200
def output
  unless reject?.tap { |r| print "rejected #{row_num}: #{row}" if r and VERBOSE }
    if headers.empty?
      send setup_state
    else
      put_row #if location_filter?
    end
  end
end
output_column?() click to toggle source

Answer true when the current column should be included in the output.

# File lib/data_spork/importer.rb, line 251
def output_column?
  col_tags.include? header
end
output_filename() click to toggle source
# File lib/data_spork/importer.rb, line 75
def output_filename
  p = output_pathname.join(options[:output_file])
  p.sub_ext "#{file_modifier}#{p.extname}"
end
output_pathname() click to toggle source
# File lib/data_spork/importer.rb, line 71
def output_pathname
  Pathname(options[:output_path]).join('output')
end
print(str) click to toggle source
print_error(str) click to toggle source
put_row() click to toggle source

Output the current row, one column at a time.

# File lib/data_spork/importer.rb, line 268
def put_row
  on_begin_row
  row.each_index do |index|
    self.col_num = index
    if output_column?
      on_output_column
    end
  end
  on_end_row
end
reader() click to toggle source
# File lib/data_spork/importer.rb, line 84
def reader
  if csv?
    CSV_Reader.new(self)
  elsif xlsx?
    XLSX_Reader.new(self)
  end
end
reject?() click to toggle source

Answer true if rules dictate the current row should be discarded from processing.

# File lib/data_spork/importer.rb, line 211
def reject?
  headers? and blank_row
end
sanitize() click to toggle source

Sanitize the current row of data. This is done in place, so not worried about a return value.

# File lib/data_spork/importer.rb, line 143
def sanitize
  self.col_num = 0
  self.blank_row = true
  row.collect! do |utf_8|
    value = (ENCODE_VALUES ? "#{utf_8}".encode('iso-8859-1', xml: :text) : utf_8)
    self.blank_row = false if blank_row and !value.blank?
    sanitize_value(value) if headers? and SANITIZE_VALUES
    substitute_value(value).tap do
      self.col_num += 1
    end
  end
end
sanitize_value(value) click to toggle source

Sanitize field-specific values based on their position in the row. The values must be modified in place, so there is no need to return a value. This method does not sanitize columns that are not included in the output.

@param :value the value to be sanitized

# File lib/data_spork/importer.rb, line 185
def sanitize_value(value)
  if headers? and output_column?
    case header
      when nil?
        0
    end
  end
end
setup() click to toggle source

Initializes the headers on the first row and optionally outputs them when VERBOSE=true.

# File lib/data_spork/importer.rb, line 233
def setup
  row.each do |col|
    headers << col_map[col]
  end
  print "headers: #{row_num}: #{headers}" if VERBOSE
end
setup_writers() click to toggle source

Initializes the xml document and transfers setup_state to :setup

# File lib/data_spork/importer.rb, line 226
def setup_writers
  write :start
  self.setup_state = :setup
  send setup_state  # automatically transition to next state
end
source_name() click to toggle source
# File lib/data_spork/importer.rb, line 61
def source_name
  base = DEFAULT_INPUT_NAME
  modifier = ''
  "#{base}#{modifier}.#{input_type}"
end
start() click to toggle source
# File lib/data_spork/importer.rb, line 114
def start
  if VERBOSE_IO_OPTIONS
    print_error "options: #{options}"
    print_error "input_pathname: #{input_pathname}, exists: #{File.exist?(input_pathname)}"
    print_error "output_filename: #{output_filename}" if options[:output_file]
  end
  if options[:output_file]
    output_pathname.mkpath
    $stdout = File.open("#{output_filename}", 'w')
  end
  @row_num = 0
  @headers = []
  self.setup_state = :setup_writers
end
substitute_value(value) click to toggle source

Substitute field-specific values based on their position in the row. The returned value is substituted for the passed value. This method does not process columns that are not included in the output.

Subclasses should not override this method, but should override get_substitute_value instead.

@param :value the value to be substituted

# File lib/data_spork/importer.rb, line 163
def substitute_value(value)
  if headers? and output_column?
    get_substitute_value(value)
  else
    value
  end
end
write(msg) click to toggle source
# File lib/data_spork/importer.rb, line 57
def write(msg)
  writers.each {|writer| writer.send msg }
end
xlsx?() click to toggle source
# File lib/data_spork/importer.rb, line 100
def xlsx?
  [ :xlsx, :xls ].include? input_type
end