class Dreader::Engine

This is where the real stuff begins

Attributes

colspec[R]

the specification of the columns to process

options[R]

readable for debugging purposes the options we passed

table[R]

the data we read

virtualcols[R]

the specification of the virtual columns

Public Class Methods

new() click to toggle source
# File lib/dreader.rb, line 111
def initialize
  @options = {}
  @colspec = []
  @virtualcols = []
end

Private Class Methods

open_spreadsheet(filename) click to toggle source
# File lib/dreader.rb, line 400
def self.open_spreadsheet(filename)
  case File.extname(filename)
  when ".csv" then Roo::CSV.new(filename)
  when ".tsv" then Roo::CSV.new(filename, csv_options: {col_sep: "\t"})
  when ".ods" then Roo::OpenOffice.new(filename)
  when ".xls" then Roo::Excel.new(filename)
  when ".xlsx" then Roo::Excelx.new(filename)
  else raise "Unknown extension: #{File.extname(filename)}"
  end
end

Public Instance Methods

bulk_declare(hash, &block) click to toggle source

bulk declare columns we intend to read

  • hash is a hash in the form { symbolic_name: colref }

i.bulk_declare {name: 'B', age: 'C'} is equivalent to:

i.column :name do

colref 'B'

end i.column :age do

colref 'C'

end

i.bulk_declare {name: 'B', age: 'C'} do

process do |cell|
  cell.strip
end

end

is equivalent to:

i.column :name do

colref 'B'
process do |cell|
  cell.strip
end

end i.column :age do

colref 'C'
process do |cell|
  cell.strip
end

end

# File lib/dreader.rb, line 172
def bulk_declare hash, &block 
  hash.keys.each do |key|
    column = Column.new
    column.colref hash[key]
    if block
      column.instance_eval(&block)
    end
    @colspec << column.to_hash.merge({name: key})
  end
end
column(name, &block) click to toggle source

define a DSL for column specification

  • `name` is the name of the column

  • `block` contains two declarations, `process` and `check`, which are used, respectively, to make a cell into the desired data and to check whether the desired data is ok

# File lib/dreader.rb, line 132
def column name, &block
  column = Column.new
  column.instance_eval(&block)

  @colspec << column.to_hash.merge({name: name})
end
debug(args = {}) click to toggle source

show to stdout the first `n` records we read from the file given the current configuration

# File lib/dreader.rb, line 292
def debug args = {}
  if args.class == Hash
    hash = @options.merge(args)
  else
    puts "dreader error at #{__callee__}: this function takes a Hash as input"
    exit
  end

  # apply some defaults, if not defined in the options
  hash[:process] = true if not hash.has_key? :process # shall we apply the process function?
  hash[:check] = true if not hash.has_key? :check     # shall we check the data read?
  hash[:n] = 10 if not hash[:n]

  spreadsheet = Dreader::Engine.open_spreadsheet (hash[:filename])
  sheet = spreadsheet.sheet(hash[:sheet] || 0)

  puts "Current configuration:"
  @options.each do |k, v|
    puts "  #{k}: #{v}"
  end

  puts "Configuration used by debug:"
  hash.each do |k, v|
    puts "  #{k}: #{v}"
  end
 
  n = hash[:n]
  first_row = hash[:first_row] || 1
  last_row = first_row + n - 1

  puts "  Last row (according to roo): #{sheet.last_row}"
  puts "  Number of rows I will read in this session: #{n} (from #{first_row} to #{last_row})"
  
  (first_row..last_row).each do |row_number|
    puts "Row #{row_number} is:"
    r = Hash.new
    @colspec.each_with_index do |colspec, index|
      colname = colspec[:name]
      cell = sheet.cell(row_number, colspec[:colref])

      processed_str = ""
      checked_str = ""
      
      if hash[:process]
        begin
          processed = colspec[:process] ? colspec[:process].call(cell) : cell
          processed_str = "processed: '#{processed}' (#{processed.class})"
        rescue => e
          puts "dreader error at #{__callee__}: 'check' specification for :#{colname} raised an exception at row #{row_number} (col #{index + 1}, value: #{cell})"
          raise e
        end
      end
      if hash[:check]
        begin
          processed = colspec[:process] ? colspec[:process].call(cell) : cell
          check = colspec[:check] ? colspec[:check].call(processed) : "no check specified"
          checked_str = "checked: '#{check}'"
        rescue => e
          puts "dreader error at #{__callee__}: 'check' specification for #{colname} at row #{row_number} raised an exception (col #{index + 1}, value: #{cell})"
          raise e
        end
      end

      puts "  #{colname} => orig: '#{cell}' (#{cell.class}) #{processed_str} #{checked_str} (column: '#{colspec[:colref]}')"
    end
  end
end
errors() click to toggle source

return an array of strings with all the errors we have encounterd an empty array is a good news

# File lib/dreader.rb, line 362
def errors
  @errors
end
get_row(row_number) click to toggle source

get (processed) row number

  • row_number is the row to get: index starts at 1.

get_row(1) get the first line read, that is, the row specified by `first_row` in `options` (or in read)

You need to invoke read first

# File lib/dreader.rb, line 279
def get_row row_number
  if row_number > @table.size
    puts "dreader error at #{__callee__}: 'row_number' is out of range (did you invoke read first?)"
    exit
  elsif row_number <= 0
    puts "dreader error at #{__callee__}: 'row_number' is zero or negative (first row is 1)."
  else
    @table[row_number - 1]
  end
end
load(args = {})
Alias for: read
mapping(&block) click to toggle source

define what we do with each line we read

  • `block` is the code which takes as input a `row` and processes `row` is a hash in which each spreadsheet cell is accessible under the column names. Each cell has the following values: :value, :error, :row_number, :col_number

# File lib/dreader.rb, line 202
def mapping &block
  @mapping = block
end
process() click to toggle source

apply the mapping code to the array it makes sense to invoke it only once

the mapping is applied only if it defined

# File lib/dreader.rb, line 388
def process
  @table.each do |r|
    @mapping.call(r) if @mapping
  end
end
read(args = {}) click to toggle source

read a file and store it internally

@param hash, a hash, possibly overriding any of the parameters

set in the initial options.  This allows you, for
instance, to apply the same column specification to
different files and different sheets

@return the data read from filename, in the form of an array of

hashes
# File lib/dreader.rb, line 215
def read args = {}
  if args.class == Hash
    hash = @options.merge(args)
  else
    puts "dreader error at #{__callee__}: this function takes a Hash as input"
    exit
  end

  spreadsheet = Dreader::Engine.open_spreadsheet (hash[:filename])
  sheet = spreadsheet.sheet(hash[:sheet] || 0)

  @table = Array.new
  @errors = Array.new

  first_row = hash[:first_row] || 1
  last_row = hash[:last_row] || sheet.last_row

  (first_row..last_row).each do |row_number|
    r = Hash.new
    @colspec.each_with_index do |colspec, index|
      cell = sheet.cell(row_number, colspec[:colref])
      
      colname = colspec[:name]

      r[colname] = Hash.new
      r[colname][:row_number] = row_number
      r[colname][:col_number] = colspec[:colref]

      begin
        r[colname][:value] = value = colspec[:process] ? colspec[:process].call(cell) : cell
      rescue => e
        puts "dreader error at #{__callee__}: 'process' specification for :#{colname} raised an exception at row #{row_number} (col #{index + 1}, value: #{cell})"
        raise e
      end

      begin
        if colspec[:check] and not colspec[:check].call(value) then
          r[colname][:error] = true
          @errors << "dreader error at #{__callee__}: value \"#{cell}\" for #{colname} at row #{row_number} (col #{index + 1}) does not pass the check function"
        else
          r[colname][:error] = false
        end
      rescue => e
        puts "dreader error at #{__callee__}: 'check' specification for :#{colname} raised an exception at row #{row_number} (col #{index + 1}, value: #{cell})"
        raise e
      end
    end

    @table << r
  end

  @table
end
Also aliased as: load
to_s() click to toggle source
# File lib/dreader.rb, line 394
def to_s
  @table.to_s
end
virtual_column(name, &block) click to toggle source

virtual columns define derived attributes the code specified in the virtual column is executed after reading a row and before applying the mapping function

virtual colum declarations are executed in the order in which they are defined

# File lib/dreader.rb, line 190
def virtual_column name, &block
  column = Column.new
  column.instance_eval &block

  @virtualcols << column.to_hash.merge({name: name})
end
virtual_columns() click to toggle source
# File lib/dreader.rb, line 366
def virtual_columns
  # execute the virtual column specification
  @table.each do |r|
    @virtualcols.each do |virtualcol|
      begin
        # add the cell to the table
        r[virtualcol[:name]] = {
          value: virtualcol[:process].call(r),
          virtual: true,
        }
      rescue => e
        puts "dreader error at #{__callee__}: 'process' specification for :#{virtualcol[:name]} raised an exception at row #{r[r.keys.first][:row_number]}"
        raise e
      end
    end
  end
end