module ODT2HTML::AnalyzeContent
Public Instance Methods
# File lib/odt2html/analyze_content.rb, line 4 def analyze_content_xml # # Get the namespaces from the root element; populate the # dynamic instance variable names and the namespace hash from them. # get_namespaces create_dispatch_table # handle style:style elements @doc.root.elements.each( "#{@office_ns}:automatic-styles/#{@style_ns}:style") do |el| process_style_style( el ) end # handle text:list-style elements @doc.root.elements.each( "#{@office_ns}:automatic-styles/#{@text_ns}:list-style") do |el| process_text_list_style( el ) end @doc.root.elements.each( "#{@office_ns}:body/#{@office_ns}:text") do |item| process_children( item, @body ) end end
Emit an element with the given element_name
and attr_hash
(as attributes) as a child of the output_node
# File lib/odt2html/analyze_content.rb, line 299 def emit_element( output_node, element_name, attr_hash=nil ) if (attr_hash != nil) then attr_hash.each do |key, value| if (value == nil) then attr_hash.delete( key ) end end if attr_hash.empty? then attr_hash = nil end end output_node.add_element( element_name, attr_hash ) end
Modify the style attribute of output_element
by adding the given property
and value
Algorithm:
If there's no style attribute, create it. If it exists, look for the property. If the property doesn't exist, add it and its value If it does exist,
# File lib/odt2html/analyze_content.rb, line 322 def modify_style_attribute( output_element, property, value ) current = output_element.attribute("style") new_value = (current != nil) ? current.value + ";" : "" new_value += "#{property}:#{value}" output_element.attributes["style"] = new_value end
Process an element’s children node: the context node output_node: the node to which to add the children xpath_expr: which children to process (default is all)
Algorithm: If the node is a text node, output to the destination. If it’s an element, munge its name into process_prefix_elementname
. If that method exists, call it to handle the element. Otherwise, process this node’s children recursively.
# File lib/odt2html/analyze_content.rb, line 44 def process_children( node, output_node, xpath_expr="node()" ) REXML::XPath.each( node, xpath_expr ) do |item| if (item.kind_of?(REXML::Element)) then str = "process_" + @namespace_urn[item.namespace] + "_" + item.name.tr_s(":-", "__") if self.class.method_defined?( str ) then self.send( str, item, output_node ) else process_children(item, output_node) end elsif (item.kind_of?(REXML::Text) && !item.value.match(/^\s*$/)) output_node.add_text(item.value) end end # # If it's empty, add a null string to force a begin and end # tag to be generated if (!output_node.has_elements? && !output_node.has_text?) then output_node.add_text("") end end
# File lib/odt2html/analyze_content.rb, line 194 def process_table_table( element, output_node ) style_name = register_style( element ); table_el = emit_element(output_node, "table", {"class" => style_name, "cellpadding" => "0", "cellspacing" => "0"} ) process_children( element, table_el, "#{@table_ns}:table-column" ) if (REXML::XPath.match( element, "#{@table_ns}:table-header-rows" )) then thead = emit_element( table_el, "thead" ) process_children( element, thead, "#{@table_ns}:table-header-rows/#{@table_ns}:table-row" ) end tbody = emit_element( table_el, "tbody" ) process_children( element, tbody, "#{@table_ns}:table-row" ) end
# File lib/odt2html/analyze_content.rb, line 222 def process_table_table_cell( element, output_node ) attr_hash = Hash.new style_name = register_style( element ); if (style_name != nil) then attr_hash["class"] = style_name end repeat = 1; attr = element.attribute("#{@table_ns}:number-columns-repeated") if (attr != nil) then repeat = attr.value.to_i end attr = element.attribute("#{@table_ns}:number-columns-spanned") if (attr != nil) then attr_hash["colspan"] = attr.value end attr = element.attribute("#{@table_ns}:number-rows-spanned") if (attr != nil) then attr_hash["rowspan"] = attr.value end (1..repeat).each do |i| td = emit_element( output_node, "td", attr_hash ) process_children( element, td ) end end
# File lib/odt2html/analyze_content.rb, line 207 def process_table_table_column( element, output_node ) style_name = register_style(element) span = element.attribute("#{@table_ns}:number-columns-repeated") if (span != nil) then span = span.value end emit_element( output_node, "col", {"class" => style_name, "span" => span} ) end
# File lib/odt2html/analyze_content.rb, line 216 def process_table_table_row( element, output_node ) style_name = register_style( element ); tr = emit_element( output_node, "tr", {"class" => style_name} ) process_children( element, tr, "#{@table_ns}:table-cell" ) end
# File lib/odt2html/analyze_content.rb, line 130 def process_text_a( element, output_node ) style_name = register_style( element ) href = element.attribute("#{@xlink_ns}:href").value link = emit_element( output_node, "a", {"class" => style_name, "href" => href} ) process_children( element, link ) end
# File lib/odt2html/analyze_content.rb, line 142 def process_text_bookmark( element, output_node ) process_text_bookmark_start( element, output_node ) end
# File lib/odt2html/analyze_content.rb, line 146 def process_text_bookmark_start( element, output_node ) style_name = register_style( element ) the_name = element.attribute("#{@text_ns}:name").value; anchor = emit_element( output_node, "a", {"class" => style_name, "name" => the_name} ) anchor.add_text(""); end
Headings are processed as <hn>
elements. The heading level comes from the text:outline-level
attribute, with a maximum of 6.
# File lib/odt2html/analyze_content.rb, line 103 def process_text_h( element, output_node ) style_name = register_style( element ) level = element.attribute("#{@text_ns}:outline-level").value.to_i if (level > 6) then level = 6 end heading = emit_element( output_node, "h" + level.to_s, {"class" => style_name} ) process_children( element, heading ) end
# File lib/odt2html/analyze_content.rb, line 138 def process_text_line_break( element, output_node ) br = emit_element( output_node, "br" ) end
# File lib/odt2html/analyze_content.rb, line 154 def process_text_list( element, output_node ) # determine the level tag = "ul" level = REXML::XPath.match( element, "ancestor::#{@text_ns}:list" ).size + 1 if (level == 1) then style_name = element.attribute("#{@text_ns}:style-name") else style_name = REXML::XPath.match( element, "ancestor::#{@text_ns}:list[last()]/@#{@text_ns}:style-name" )[0] end if (style_name != nil) then style_name = style_name.value + "_" + level.to_s style_name = style_name.tr_s('.','_') @style_info[style_name].block_used = true # # Determine if this is a numbered or bulleted list found = @style_info[style_name].find { |obj| obj.property == "list-style-type" } if (found) then if (!found.value.match(/disc|circle|square/)) then tag="ol" end end end list_el = emit_element( output_node, tag, {"class" => style_name} ) process_children(element, list_el) end
List items are easy; just put the children inside a <li>
</li>
pair.
# File lib/odt2html/analyze_content.rb, line 188 def process_text_list_item( element, output_node ) style_name = register_style( element ) item = emit_element( output_node, "li", {"class" => style_name} ) process_children( element, item ) end
Create styles for each level of a <text:list-style>
element. For bulleted lists, it sets the bullet type by indexing into the marker
array; for numbered lists, it uses the numbering
hash to translate OpenDocument’s style:num-format
to the corresponding CSS list-style-type
.
# File lib/odt2html/analyze_content.rb, line 274 def process_text_list_style( element ) marker = ["circle", "disc", "square"]; numbering = {"1" => "decimal", "a" => "lower-alpha", "A" => "upper-alpha", "i" => "lower-roman", "I" => "upper-roman" } main_name = element.attribute( "#{@style_ns}:name" ).value element.elements.each do |child| level = child.attribute("#{@text_ns}:level").value selector = main_name + "_" + level if (child.name == "list-level-style-bullet") process_normal_style_attr( selector, "list-style-type", marker[(level.to_i-1)%3] ) elsif (child.name == "list-level-style-number") process_normal_style_attr( selector, "list-style-type", numbering[child.attribute("#{@style_ns}:num-format").value] ) end end end
Paragraphs are processed as <p>
elements. / This is no longer valid A <text:p>
with no children will generate a <br />
.
# File lib/odt2html/analyze_content.rb, line 71 def process_text_p( element, output_node ) style_name = register_style( element ) # always include class attribute attr_hash = {"class" => style_name} # If this paragraph has the same style as the previous one, # and a top border, and doesn't have style:join-border set to false # then eliminate the top border to merge it with previous paragraph if (style_name != nil && @previous_para_style == style_name) then if (@style_info[style_name].has_top_border? && element.attribute_value("#{@style_ns}:join-border") != false) then attr_hash["style"] = "border-top: none" modify_style_attribute( @previous_para, "border-bottom", "none") end end para = emit_element( output_node, "p", attr_hash ) @previous_para_style = style_name @previous_para = para if (element.has_elements? || element.has_text?) then process_children( element, para ) # else # para.add_element("br") end end
# File lib/odt2html/analyze_content.rb, line 126 def process_text_s( element, output_node ) output_node.add_text( " " ) end
Text spans cannot produce a newline after their opening tag, so the extra ""
parameter is passed to emit_start_tag
# File lib/odt2html/analyze_content.rb, line 116 def process_text_span( element, output_node ) style_name = register_style( element ) span = emit_element( output_node, "span", {"class" => style_name} ) process_children( element, span ) end
# File lib/odt2html/analyze_content.rb, line 122 def process_text_tab( element, output_node ) output_node.add_text( " " ) end
Return the style name for this element, with periods changed to underscores to make it valid CSS.
Side effect: registers this style as “having been used” in the document
# File lib/odt2html/analyze_content.rb, line 254 def register_style( element ) # get namespace prefix for this element style_name = element.attribute("#{element.prefix}:style-name"); if (style_name != nil) then style_name = style_name.value.tr_s('.','_') if (@style_info[style_name] != nil) then @style_info[style_name].block_used = true end end return style_name end