class Dreader::Engine
This is where the real stuff begins
Attributes
the specification of the columns to process
readable for debugging purposes the options we passed
the data we read
the specification of the virtual columns
Public Class Methods
# File lib/dreader.rb, line 111 def initialize @options = {} @colspec = [] @virtualcols = [] end
Private Class Methods
# File lib/dreader.rb, line 400 def self.open_spreadsheet(filename) case File.extname(filename) when ".csv" then Roo::CSV.new(filename) when ".tsv" then Roo::CSV.new(filename, csv_options: {col_sep: "\t"}) when ".ods" then Roo::OpenOffice.new(filename) when ".xls" then Roo::Excel.new(filename) when ".xlsx" then Roo::Excelx.new(filename) else raise "Unknown extension: #{File.extname(filename)}" end end
Public Instance Methods
bulk declare columns we intend to read
-
hash is a hash in the form { symbolic_name: colref }
i.bulk_declare {name: 'B', age: 'C'} is equivalent to:
i.column :name do
colref 'B'
end i.column :age do
colref 'C'
end
i.bulk_declare {name: 'B', age: 'C'} do
process do |cell| cell.strip end
end
is equivalent to:
i.column :name do
colref 'B' process do |cell| cell.strip end
end i.column :age do
colref 'C' process do |cell| cell.strip end
end
# File lib/dreader.rb, line 172 def bulk_declare hash, &block hash.keys.each do |key| column = Column.new column.colref hash[key] if block column.instance_eval(&block) end @colspec << column.to_hash.merge({name: key}) end end
define a DSL for column specification
-
`name` is the name of the column
-
`block` contains two declarations, `process` and `check`, which are used, respectively, to make a cell into the desired data and to check whether the desired data is ok
# File lib/dreader.rb, line 132 def column name, &block column = Column.new column.instance_eval(&block) @colspec << column.to_hash.merge({name: name}) end
show to stdout the first `n` records we read from the file given the current configuration
# File lib/dreader.rb, line 292 def debug args = {} if args.class == Hash hash = @options.merge(args) else puts "dreader error at #{__callee__}: this function takes a Hash as input" exit end # apply some defaults, if not defined in the options hash[:process] = true if not hash.has_key? :process # shall we apply the process function? hash[:check] = true if not hash.has_key? :check # shall we check the data read? hash[:n] = 10 if not hash[:n] spreadsheet = Dreader::Engine.open_spreadsheet (hash[:filename]) sheet = spreadsheet.sheet(hash[:sheet] || 0) puts "Current configuration:" @options.each do |k, v| puts " #{k}: #{v}" end puts "Configuration used by debug:" hash.each do |k, v| puts " #{k}: #{v}" end n = hash[:n] first_row = hash[:first_row] || 1 last_row = first_row + n - 1 puts " Last row (according to roo): #{sheet.last_row}" puts " Number of rows I will read in this session: #{n} (from #{first_row} to #{last_row})" (first_row..last_row).each do |row_number| puts "Row #{row_number} is:" r = Hash.new @colspec.each_with_index do |colspec, index| colname = colspec[:name] cell = sheet.cell(row_number, colspec[:colref]) processed_str = "" checked_str = "" if hash[:process] begin processed = colspec[:process] ? colspec[:process].call(cell) : cell processed_str = "processed: '#{processed}' (#{processed.class})" rescue => e puts "dreader error at #{__callee__}: 'check' specification for :#{colname} raised an exception at row #{row_number} (col #{index + 1}, value: #{cell})" raise e end end if hash[:check] begin processed = colspec[:process] ? colspec[:process].call(cell) : cell check = colspec[:check] ? colspec[:check].call(processed) : "no check specified" checked_str = "checked: '#{check}'" rescue => e puts "dreader error at #{__callee__}: 'check' specification for #{colname} at row #{row_number} raised an exception (col #{index + 1}, value: #{cell})" raise e end end puts " #{colname} => orig: '#{cell}' (#{cell.class}) #{processed_str} #{checked_str} (column: '#{colspec[:colref]}')" end end end
return an array of strings with all the errors we have encounterd an empty array is a good news
# File lib/dreader.rb, line 362 def errors @errors end
get (processed) row number
-
row_number is the row to get: index starts at 1.
get_row
(1) get the first line read, that is, the row specified by `first_row` in `options` (or in read)
You need to invoke read first
# File lib/dreader.rb, line 279 def get_row row_number if row_number > @table.size puts "dreader error at #{__callee__}: 'row_number' is out of range (did you invoke read first?)" exit elsif row_number <= 0 puts "dreader error at #{__callee__}: 'row_number' is zero or negative (first row is 1)." else @table[row_number - 1] end end
define what we do with each line we read
-
`block` is the code which takes as input a `row` and processes `row` is a hash in which each spreadsheet cell is accessible under the column names. Each cell has the following values: :value, :error, :row_number, :col_number
# File lib/dreader.rb, line 202 def mapping &block @mapping = block end
apply the mapping code to the array it makes sense to invoke it only once
the mapping is applied only if it defined
# File lib/dreader.rb, line 388 def process @table.each do |r| @mapping.call(r) if @mapping end end
read a file and store it internally
@param hash, a hash, possibly overriding any of the parameters
set in the initial options. This allows you, for instance, to apply the same column specification to different files and different sheets
@return the data read from filename, in the form of an array of
hashes
# File lib/dreader.rb, line 215 def read args = {} if args.class == Hash hash = @options.merge(args) else puts "dreader error at #{__callee__}: this function takes a Hash as input" exit end spreadsheet = Dreader::Engine.open_spreadsheet (hash[:filename]) sheet = spreadsheet.sheet(hash[:sheet] || 0) @table = Array.new @errors = Array.new first_row = hash[:first_row] || 1 last_row = hash[:last_row] || sheet.last_row (first_row..last_row).each do |row_number| r = Hash.new @colspec.each_with_index do |colspec, index| cell = sheet.cell(row_number, colspec[:colref]) colname = colspec[:name] r[colname] = Hash.new r[colname][:row_number] = row_number r[colname][:col_number] = colspec[:colref] begin r[colname][:value] = value = colspec[:process] ? colspec[:process].call(cell) : cell rescue => e puts "dreader error at #{__callee__}: 'process' specification for :#{colname} raised an exception at row #{row_number} (col #{index + 1}, value: #{cell})" raise e end begin if colspec[:check] and not colspec[:check].call(value) then r[colname][:error] = true @errors << "dreader error at #{__callee__}: value \"#{cell}\" for #{colname} at row #{row_number} (col #{index + 1}) does not pass the check function" else r[colname][:error] = false end rescue => e puts "dreader error at #{__callee__}: 'check' specification for :#{colname} raised an exception at row #{row_number} (col #{index + 1}, value: #{cell})" raise e end end @table << r end @table end
# File lib/dreader.rb, line 394 def to_s @table.to_s end
virtual columns define derived attributes the code specified in the virtual column is executed after reading a row and before applying the mapping function
virtual colum declarations are executed in the order in which they are defined
# File lib/dreader.rb, line 190 def virtual_column name, &block column = Column.new column.instance_eval &block @virtualcols << column.to_hash.merge({name: name}) end
# File lib/dreader.rb, line 366 def virtual_columns # execute the virtual column specification @table.each do |r| @virtualcols.each do |virtualcol| begin # add the cell to the table r[virtualcol[:name]] = { value: virtualcol[:process].call(r), virtual: true, } rescue => e puts "dreader error at #{__callee__}: 'process' specification for :#{virtualcol[:name]} raised an exception at row #{r[r.keys.first][:row_number]}" raise e end end end end