class Roo::Excelx
Constants
- ERROR_VALUES
- ExceedsMaxError
Public Class Methods
initialization and opening of a spreadsheet file values for packed: :zip optional cell_max (int) parameter for early aborting attempts to parse enormous documents.
Roo::Base::new
# File lib/roo/excelx.rb, line 36 def initialize(filename_or_stream, options = {}) packed = options[:packed] file_warning = options.fetch(:file_warning, :error) cell_max = options.delete(:cell_max) sheet_options = {} sheet_options[:expand_merged_ranges] = (options[:expand_merged_ranges] || false) sheet_options[:no_hyperlinks] = (options[:no_hyperlinks] || false) sheet_options[:empty_cell] = (options[:empty_cell] || false) shared_options = {} shared_options[:disable_html_wrapper] = (options[:disable_html_wrapper] || false) unless is_stream?(filename_or_stream) file_type_check(filename_or_stream, %w[.xlsx .xlsm], 'an Excel 2007', file_warning, packed) basename = find_basename(filename_or_stream) end # NOTE: Create temp directory and allow Ruby to cleanup the temp directory # when the object is garbage collected. Initially, the finalizer was # created in the Roo::Tempdir module, but that led to a segfault # when testing in Ruby 2.4.0. @tmpdir = self.class.make_tempdir(self, basename, options[:tmpdir_root]) ObjectSpace.define_finalizer(self, self.class.finalize(object_id)) @shared = Shared.new(@tmpdir, shared_options) @filename = local_filename(filename_or_stream, @tmpdir, packed) process_zipfile(@filename || filename_or_stream) @sheet_names = [] @sheets = [] @sheets_by_name = {} workbook.sheets.each_with_index do |sheet, index| next if options[:only_visible_sheets] && sheet['state'] == 'hidden' sheet_name = sheet['name'] @sheet_names << sheet_name @sheets_by_name[sheet_name] = @sheets[index] = Sheet.new(sheet_name, @shared, index, sheet_options) end if cell_max cell_count = ::Roo::Utils.num_cells_in_range(sheet_for(options.delete(:sheet)).dimensions) raise ExceedsMaxError.new("Excel file exceeds cell maximum: #{cell_count} > #{cell_max}") if cell_count > cell_max end super rescue self.class.finalize_tempdirs(object_id) raise end
Public Instance Methods
Returns the content of a spreadsheet-cell. (1,1) is the upper left corner. (1,1), (1,‘A’), (‘A’,1), (‘a’,1) all refers to the cell at the first line and first row.
# File lib/roo/excelx.rb, line 114 def cell(row, col, sheet = nil) key = normalize(row, col) safe_send(sheet_for(sheet).cells[key], :value) end
returns the type of a cell:
-
:float
-
:string,
-
:date
-
:percentage
-
:formula
-
:time
-
:datetime
# File lib/roo/excelx.rb, line 198 def celltype(row, col, sheet = nil) key = normalize(row, col) safe_send(sheet_for(sheet).cells[key], :type) end
returns all values in this column as an array column numbers are 1,2,3,… like in the spreadsheet
# File lib/roo/excelx.rb, line 125 def column(column_number, sheet = nil) if column_number.is_a?(::String) column_number = ::Roo::Utils.letter_to_number(column_number) end sheet_for(sheet).column(column_number) end
returns the comment at (row/col) nil if there is no comment
# File lib/roo/excelx.rb, line 279 def comment(row, col, sheet = nil) key = normalize(row, col) sheet_for(sheet).comments[key] end
true, if there is a comment
# File lib/roo/excelx.rb, line 285 def comment?(row, col, sheet = nil) !!comment(row, col, sheet) end
# File lib/roo/excelx.rb, line 289 def comments(sheet = nil) sheet_for(sheet).comments.map do |(x, y), comment| [x, y, comment] end end
Yield an array of Excelx::Cell
Takes options for sheet, pad_cells, and max_rows
# File lib/roo/excelx.rb, line 297 def each_row_streaming(options = {}) sheet = sheet_for(options.delete(:sheet)) if block_given? sheet.each_row(options) { |row| yield row } else sheet.to_enum(:each_row, options) end end
# File lib/roo/excelx.rb, line 232 def empty?(row, col, sheet = nil) sheet = sheet_for(sheet) key = normalize(row, col) cell = sheet.cells[key] !cell || cell.empty? || (row < sheet.first_row || row > sheet.last_row || col < sheet.first_column || col > sheet.last_column) end
returns the internal format of an excel cell
# File lib/roo/excelx.rb, line 227 def excelx_format(row, col, sheet = nil) key = normalize(row, col) sheet_for(sheet).excelx_format(key) end
returns the internal type of an excel cell
-
:numeric_or_formula
-
:string
Note: this is only available within the Excelx
class
# File lib/roo/excelx.rb, line 207 def excelx_type(row, col, sheet = nil) key = normalize(row, col) safe_send(sheet_for(sheet).cells[key], :cell_type) end
returns the internal value of an excelx cell Note: this is only available within the Excelx
class
# File lib/roo/excelx.rb, line 214 def excelx_value(row, col, sheet = nil) key = normalize(row, col) safe_send(sheet_for(sheet).cells[key], :cell_value) end
returns the number of the first non-empty column
# File lib/roo/excelx.rb, line 143 def first_column(sheet = nil) sheet_for(sheet).first_column end
returns the number of the first non-empty row
# File lib/roo/excelx.rb, line 133 def first_row(sheet = nil) sheet_for(sheet).first_row end
Given a cell, return the cell’s style
# File lib/roo/excelx.rb, line 184 def font(row, col, sheet = nil) key = normalize(row, col) definition_index = safe_send(sheet_for(sheet).cells[key], :style) styles.definitions[definition_index] if definition_index end
returns the internal value of an excelx cell Note: this is only available within the Excelx
class
# File lib/roo/excelx.rb, line 221 def formatted_value(row, col, sheet = nil) key = normalize(row, col) safe_send(sheet_for(sheet).cells[key], :formatted_value) end
Returns the formula at (row,col). Returns nil if there is no formula. The method formula?
checks if there is a formula.
# File lib/roo/excelx.rb, line 163 def formula(row, col, sheet = nil) key = normalize(row, col) safe_send(sheet_for(sheet).cells[key], :formula) end
Predicate methods really should return a boolean value. Hopefully no one was relying on the fact that this previously returned either nil/formula
# File lib/roo/excelx.rb, line 171 def formula?(*args) !!formula(*args) end
returns each formula in the selected sheet as an array of tuples in following format
- [row, col, formula], [row, col, formula],…
# File lib/roo/excelx.rb, line 177 def formulas(sheet = nil) sheet_for(sheet).cells.select { |_, cell| cell.formula }.map do |(x, y), cell| [x, y, cell.formula] end end
returns the hyperlink at (row/col) nil if there is no hyperlink
# File lib/roo/excelx.rb, line 272 def hyperlink(row, col, sheet = nil) key = normalize(row, col) sheet_for(sheet).hyperlinks[key] end
# File lib/roo/excelx.rb, line 266 def hyperlink?(row, col, sheet = nil) !!hyperlink(row, col, sheet) end
# File lib/roo/excelx.rb, line 105 def images(sheet = nil) images_names = sheet_for(sheet).images.map(&:last) images_names.map { |iname| image_files.find { |ifile| ifile[iname] } } end
returns the row,col values of the labelled cell (nil,nil) if label is not defined
# File lib/roo/excelx.rb, line 248 def label(name) labels = workbook.defined_names return [nil, nil, nil] if labels.empty? || !labels.key?(name) [labels[name].row, labels[name].col, labels[name].sheet] end
Returns an array which all labels. Each element is an array with
- labelname, [row,col,sheetname]
# File lib/roo/excelx.rb, line 257 def labels @labels ||= workbook.defined_names.map do |name, label| [ name, [label.row, label.col, label.sheet] ] end end
returns the number of the last non-empty column
# File lib/roo/excelx.rb, line 148 def last_column(sheet = nil) sheet_for(sheet).last_column end
returns the number of the last non-empty row
# File lib/roo/excelx.rb, line 138 def last_row(sheet = nil) sheet_for(sheet).last_row end
Roo::Base#method_missing
# File lib/roo/excelx.rb, line 86 def method_missing(method, *args) if (label = workbook.defined_names[method.to_s]) safe_send(sheet_for(label.sheet).cells[label.key], :value) else # call super for methods like #a1 super end end
# File lib/roo/excelx.rb, line 119 def row(rownumber, sheet = nil) sheet_for(sheet).row(rownumber) end
# File lib/roo/excelx.rb, line 99 def sheet_for(sheet) sheet ||= default_sheet validate_sheet!(sheet) @sheets_by_name[sheet] || @sheets[sheet] end
# File lib/roo/excelx.rb, line 95 def sheets @sheet_names end
shows the internal representation of all cells for debugging purposes
# File lib/roo/excelx.rb, line 242 def to_s(sheet = nil) sheet_for(sheet).cells.inspect end
Private Instance Methods
# File lib/roo/excelx.rb, line 308 def clean_sheet(sheet) @sheets_by_name[sheet].cells.each_pair do |coord, value| next unless value.value.is_a?(::String) @sheets_by_name[sheet].cells[coord].value = sanitize_value(value.value) end @cleaned[sheet] = true end
# File lib/roo/excelx.rb, line 385 def extract_images(entries, tmpdir) img_entries = entries.select { |e| e.name[/media\/image([0-9]+)/] } img_entries.each do |entry| path = "#{@tmpdir}/roo#{entry.name.gsub(/xl\/|\//, "_")}" image_files << path entry.extract(path) end end
Extracts the sheets in order, but it will ignore sheets that are not worksheets.
# File lib/roo/excelx.rb, line 374 def extract_sheets_in_order(entries, sheet_ids, sheets, tmpdir) (sheet_ids & sheets.keys).each_with_index do |id, i| name = sheets[id] entry = entries.find { |e| "/#{e.name}" =~ /#{name}$/ } path = "#{tmpdir}/roo_sheet#{i + 1}" sheet_files << path @sheet_files << path entry.extract(path) end end
Internal: extracts the worksheet_ids from the workbook.xml file. xlsx
documents require a workbook.xml file, so a if the file is missing it is not a valid xlsx file. In these cases, an ArgumentError is raised.
wb - a Zip::Entry for the workbook.xml file. path - A String for Zip::Entry’s destination path.
Examples
extract_worksheet_ids(<Zip::Entry>, 'tmpdir/roo_workbook.xml') # => ["rId1", "rId2", "rId3"]
Returns an Array of Strings.
# File lib/roo/excelx.rb, line 332 def extract_worksheet_ids(entries, path) wb = entries.find { |e| e.name[/workbook.xml$/] } fail ArgumentError 'missing required workbook file' if wb.nil? wb.extract(path) workbook_doc = Roo::Utils.load_xml(path).remove_namespaces! workbook_doc.xpath('//sheet').map { |s| s['id'] } end
Internal
wb_rels - A Zip::Entry for the workbook.xml.rels file. path - A String for the Zip::Entry’s destination path.
Examples
extract_worksheets(<Zip::Entry>, 'tmpdir/roo_workbook.xml.rels') # => { "rId1"=>"worksheets/sheet1.xml", "rId2"=>"worksheets/sheet2.xml", "rId3"=>"worksheets/sheet3.xml" }
Returns a Hash.
# File lib/roo/excelx.rb, line 356 def extract_worksheet_rels(entries, path) wb_rels = entries.find { |e| e.name[/workbook.xml.rels$/] } fail ArgumentError 'missing required workbook file' if wb_rels.nil? wb_rels.extract(path) rels_doc = Roo::Utils.load_xml(path).remove_namespaces! relationships = rels_doc.xpath('//Relationship').select do |relationship| worksheet_types.include? relationship['Type'] end relationships.each_with_object({}) do |relationship, hash| hash[relationship['Id']] = relationship['Target'] end end
Extracts all needed files from the zip file
# File lib/roo/excelx.rb, line 395 def process_zipfile(zipfilename_or_stream) @sheet_files = [] unless is_stream?(zipfilename_or_stream) zip_file = Zip::File.open(zipfilename_or_stream) else zip_file = Zip::CentralDirectory.new zip_file.read_from_stream zipfilename_or_stream end process_zipfile_entries zip_file.to_a.sort_by(&:name) end
# File lib/roo/excelx.rb, line 408 def process_zipfile_entries(entries) # NOTE: When Google or Numbers 3.1 exports to xlsx, the worksheet filenames # are not in order. With Numbers 3.1, the first sheet is always # sheet.xml, not sheet1.xml. With Google, the order of the worksheets is # independent of a worksheet's filename (i.e. sheet6.xml can be the # first worksheet). # # workbook.xml lists the correct order of worksheets and # workbook.xml.rels lists the filenames for those worksheets. # # workbook.xml: # <sheet state="visible" name="IS" sheetId="1" r:id="rId3"/> # <sheet state="visible" name="BS" sheetId="2" r:id="rId4"/> # workbook.xml.rel: # <Relationship Id="rId4" Target="worksheets/sheet5.xml" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"/> # <Relationship Id="rId3" Target="worksheets/sheet4.xml" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"/> sheet_ids = extract_worksheet_ids(entries, "#{@tmpdir}/roo_workbook.xml") sheets = extract_worksheet_rels(entries, "#{@tmpdir}/roo_workbook.xml.rels") extract_sheets_in_order(entries, sheet_ids, sheets, @tmpdir) extract_images(entries, @tmpdir) entries.each do |entry| path = case entry.name.downcase when /richdata/ # FIXME: Ignore richData as parsing is not implemented yet and can cause # Zip::DestinationFileExistsError when including a second "styles.xml" entry # see http://schemas.microsoft.com/office/spreadsheetml/2017/richdata2 nil when /sharedstrings.xml$/ "#{@tmpdir}/roo_sharedStrings.xml" when /styles.xml$/ "#{@tmpdir}/roo_styles.xml" when /comments([0-9]+).xml$/ # FIXME: Most of the time, The order of the comment files are the same # the sheet order, i.e. sheet1.xml's comments are in comments1.xml. # In some situations, this isn't true. The true location of a # sheet's comment file is in the sheet1.xml.rels file. SEE # ECMA-376 12.3.3 in "Ecma Office Open XML Part 1". nr = Regexp.last_match[1].to_i comments_files[nr - 1] = "#{@tmpdir}/roo_comments#{nr}" when %r{chartsheets/_rels/sheet([0-9]+).xml.rels$} # NOTE: Chart sheet relationship files were interfering with # worksheets. nil when /sheet([0-9]+).xml.rels$/ # FIXME: Roo seems to use sheet[\d].xml.rels for hyperlinks only, but # it also stores the location for sharedStrings, comments, # drawings, etc. nr = Regexp.last_match[1].to_i rels_files[nr - 1] = "#{@tmpdir}/roo_rels#{nr}" when /drawing([0-9]+).xml.rels$/ # Extracting drawing relationships to make images lists for each sheet nr = Regexp.last_match[1].to_i image_rels[nr - 1] = "#{@tmpdir}/roo_image_rels#{nr}" end entry.extract(path) if path end end
# File lib/roo/excelx.rb, line 469 def safe_send(object, method, *args) object.send(method, *args) if object&.respond_to?(method) end
# File lib/roo/excelx.rb, line 473 def worksheet_types [ 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet', # OOXML Transitional 'http://purl.oclc.org/ooxml/officeDocument/relationships/worksheet' # OOXML Strict ] end