class Saxlsx::RowsCollectionParser

Constants

NUM_FORMATS
SECONDS_IN_DAY

Public Class Methods

new(workbook, &block) click to toggle source
# File lib/saxlsx/rows_collection_parser.rb, line 44
def initialize(workbook, &block)
  @base_date      = workbook.base_date
  @auto_format    = workbook.auto_format
  @shared_strings = workbook.shared_strings
  @number_formats = workbook.number_formats
  @block = block
end
parse(index, data, workbook, &block) click to toggle source
# File lib/saxlsx/rows_collection_parser.rb, line 40
def self.parse(index, data, workbook, &block)
  SaxParser.parse self.new(workbook, &block), data
end

Public Instance Methods

attr(name, value) click to toggle source
# File lib/saxlsx/rows_collection_parser.rb, line 71
def attr(name, value)
  if @current_element == :c
    case name
    when :t
      @current_type = value
    when :r
      @current_column = value.gsub(/\d/, '')
    when :s
      @current_number_format = detect_format_type(value.to_i)
    end
  end
end
end_element(name) click to toggle source
# File lib/saxlsx/rows_collection_parser.rb, line 64
def end_element(name)
  if name == :row
    @block.call @current_row
    @current_row = nil
  end
end
start_element(name) click to toggle source
# File lib/saxlsx/rows_collection_parser.rb, line 52
def start_element(name)
  @current_element = name
  case name
  when :row
    @current_row = []
    @next_column = 'A'
  when :c
    @current_type = nil
    @current_number_format = nil
  end
end
text(value) click to toggle source
# File lib/saxlsx/rows_collection_parser.rb, line 84
def text(value)
  if @current_row && (@current_element == :v || @current_element == :t)
    while @next_column != @current_column
      @current_row << nil
      @next_column = ColumnNameGenerator.next_to(@next_column)
    end
    @current_row << value_of(value)
    @next_column = ColumnNameGenerator.next_to(@next_column)
  end
end

Private Instance Methods

detect_custom_format_type(code) click to toggle source

This is the least deterministic part of reading xlsx files. Due to custom styles, you can't know for sure when a date is a date other than looking at its format and gessing. It's not impossible to guess right, though.

stackoverflow.com/questions/4948998/determining-if-an-xlsx-cell-is-date-formatted-for-excel-2007-spreadsheets

# File lib/saxlsx/rows_collection_parser.rb, line 148
def detect_custom_format_type(code)
  code = code.gsub(/\[[^\]]+\]/, '') # Strip meta - [...]
  if code =~ /0/
    :float
  elsif code =~ /[ymdhis]/i
    :date_time
  else
    :unsupported
  end
end
detect_format_type(index) click to toggle source
# File lib/saxlsx/rows_collection_parser.rb, line 137
def detect_format_type(index)
  format = @number_formats[index]
  NUM_FORMATS[format] || detect_custom_format_type(format)
end
value_of(text) click to toggle source
# File lib/saxlsx/rows_collection_parser.rb, line 97
def value_of(text)
  case @current_type
  when 's'
    @shared_strings[text.to_i]
  when 'inlineStr'
    CGI.unescapeHTML(text)
  when 'b'
    BooleanParser.parse text
  else
    case @current_number_format
    when :date
      @base_date + Float(text)
    when :date_time
      # Round time to seconds
      date = @base_date + Rational((Float(text) * SECONDS_IN_DAY).round, SECONDS_IN_DAY)
      DateTime.new(date.year, date.month, date.day, date.hour, date.minute, date.second)
    when :fixnum
      Integer(text, 10)
    when :float, :percentage
      Float(text)
    when :rational
      Rational(text)
    when :bignum
      Float(text) # raises ArgumentError if text is not a number
      BigDecimal(text) # doesn't raise ArgumentError
    else
      if @current_type == 'n'
        Float(text)
      elsif @auto_format && text =~ /\A-?\d+(\.\d+(?:e[+-]\d+)?)?\Z/i
        # Auto convert numbers
        $1 ? Float(text) : Integer(text, 10)
      else
        CGI.unescapeHTML(text)
      end
    end
  end
rescue ArgumentError
  CGI.unescapeHTML(text)
end