class Roo::Excelx::SharedStrings

Public Instance Methods

[](index) click to toggle source
# File lib/roo/excelx/shared_strings.rb, line 8
def [](index)
  to_a[index]
end
to_a() click to toggle source
# File lib/roo/excelx/shared_strings.rb, line 12
def to_a
  @array ||= extract_shared_strings
end
to_html() click to toggle source
# File lib/roo/excelx/shared_strings.rb, line 16
def to_html
  @html ||= extract_html
end
use_html?(index) click to toggle source

Use to_html or to_a for html returns See what is happening with commit???

# File lib/roo/excelx/shared_strings.rb, line 22
def use_html?(index)
  return false if @options[:disable_html_wrapper]
  to_html[index][/<([biu]|sup|sub)>/]
end

Private Instance Methods

create_html(text, formatting) click to toggle source

This will return an html string

# File lib/roo/excelx/shared_strings.rb, line 137
def create_html(text, formatting)
  tmp_str = +""
  formatting.each do |elem, val|
    tmp_str << "<#{elem}>" if val
  end
  tmp_str << text

  formatting.reverse_each do |elem, val|
    tmp_str << "</#{elem}>" if val
  end
  tmp_str
end
extract_html() click to toggle source
# File lib/roo/excelx/shared_strings.rb, line 58
def extract_html
  return [] unless doc_exists?
  fix_invalid_shared_strings(doc)
  # read the shared strings xml document
  doc.xpath('/sst/si').map do |si|
    html_string = '<html>'.dup
    si.children.each do |elem|
      case elem.name
      when 'r'
        html_string << extract_html_r(elem)
      when 't'
        html_string << elem.content
      end # case elem.name
    end # si.children.each do |elem|
    html_string << '</html>'
  end # doc.xpath('/sst/si').map do |si|
end
extract_html_r(r_elem) click to toggle source

The goal of this function is to take the following XML code snippet and create a html tag

r_elem :

XML Element that is in sharedStrings.xml of excel_book.xlsx

{code:xml} <r>

<rPr>
   <i/>
   <b/>
   <u/>
   <vertAlign val="subscript"/>
   <vertAlign val="superscript"/>
</rPr>
<t>TEXT</t>

</r> {code}

Expected Output :

“<html><sub|sup><u>TEXT</u></sub|/sup></html>”

# File lib/roo/excelx/shared_strings.rb, line 92
def extract_html_r(r_elem)
  str = +""
  xml_elems = {
    sub: false,
    sup: false,
    b:   false,
    i:   false,
    u:   false
  }
  r_elem.children.each do |elem|
    case elem.name
    when 'rPr'
      elem.children.each do |rPr_elem|
        case rPr_elem.name
        when 'b'
          # set formatting for Bold to true
          xml_elems[:b] = true
        when 'i'
          # set formatting for Italics to true
          xml_elems[:i] = true
        when 'u'
          # set formatting for Underline to true
          xml_elems[:u] = true
        when 'vertAlign'
          # See if the Vertical Alignment is subscript or superscript
          case rPr_elem.xpath('@val').first.value
          when 'subscript'
            # set formatting for Subscript to true and Superscript to false ... Can't have both
            xml_elems[:sub] = true
            xml_elems[:sup] = false
          when 'superscript'
            # set formatting for Superscript to true and Subscript to false ... Can't have both
            xml_elems[:sup] = true
            xml_elems[:sub] = false
          end
        end
      end
    when 't'
      str << create_html(elem.content, xml_elems)
    end
  end
  str
end
extract_shared_strings() click to toggle source
# File lib/roo/excelx/shared_strings.rb, line 37
def extract_shared_strings
  return [] unless doc_exists?

  document = fix_invalid_shared_strings(doc)
  # read the shared strings xml document
  document.xpath('/sst/si').map do |si|
    shared_string = +""
    si.children.each do |elem|
      case elem.name
      when 'r'
        elem.children.each do |r_elem|
          shared_string << r_elem.content if r_elem.name == 't'
        end
      when 't'
        shared_string = elem.content
      end
    end
    shared_string
  end
end
fix_invalid_shared_strings(doc) click to toggle source
# File lib/roo/excelx/shared_strings.rb, line 29
def fix_invalid_shared_strings(doc)
  invalid = { '_x000D_'  => "\n" }
  xml = doc.to_s
  return doc unless xml[/#{invalid.keys.join('|')}/]

  ::Nokogiri::XML(xml.gsub(/#{invalid.keys.join('|')}/, invalid))
end