class Creeker::SharedStrings

Attributes

book[R]
dictionary[R]

Public Class Methods

new(book, multi_thread = false) click to toggle source
# File lib/creeker/shared_strings.rb, line 10
def initialize book, multi_thread = false
  @book = book
  parse_shared_shared_strings(multi_thread)
end
parse_shared_string_from_document(xml, multi_thread) click to toggle source
# File lib/creeker/shared_strings.rb, line 28
def self.parse_shared_string_from_document(xml, multi_thread)
  dictionary = Hash.new
  # (1..10).each do |i|
  #   thread = Thread.new do
  #     xml.css('si').first(i * 10000).each_with_index do |si, idx|
  #       text_nodes = si.css('t')
  #       if text_nodes.count == 1 # plain text node
  #         dictionary[idx] = text_nodes.first.content
  #       else # rich text nodes with text fragments
  #         dictionary[idx] = text_nodes.map(&:content).join('')
  #       end
  #     end
  #   end

  #   sleep 1*i
  #   GC.start
  # end

  # Creeker::Book.new(Upload.last.document.path)
  if multi_thread
    xml.css('si').to_a.in_groups_of(10000).each_with_index do |group, index|
      thread = Thread.new do
        group.each_with_index do |si, idx|
          text_nodes = si.css('t')
          if text_nodes.count == 1 # plain text node
            dictionary[idx] = text_nodes.first.content
          else # rich text nodes with text fragments
            dictionary[idx] = text_nodes.map(&:content).join('')
          end
        end
      end

      sleep index
      GC.start
    end
  else
    xml.css('si').first(20).each_with_index do |si, idx|
      text_nodes = si.css('t')
      if text_nodes.count == 1 # plain text node
        dictionary[idx] = text_nodes.first.content
      else # rich text nodes with text fragments
        dictionary[idx] = text_nodes.map(&:content).join('')
      end
    end
    sleep 1
    GC.start
  end
  dictionary
end

Public Instance Methods

parse_shared_shared_strings(multi_thread) click to toggle source
# File lib/creeker/shared_strings.rb, line 15
def parse_shared_shared_strings multi_thread
  path = "xl/sharedStrings.xml"
  if @book.files.file.exist?(path)
    doc = @book.files.file.open path
    xml = Nokogiri::XML::Document.parse doc
    parse_shared_string_from_document(xml, multi_thread)
  end
end
parse_shared_string_from_document(xml, multi_thread) click to toggle source
# File lib/creeker/shared_strings.rb, line 24
def parse_shared_string_from_document(xml, multi_thread)
  @dictionary = self.class.parse_shared_string_from_document(xml, multi_thread)
end