class Htmltoword::Document
Public Class Methods
content_types_xml_file()
click to toggle source
# File lib/htmltoword/document.rb, line 43 def content_types_xml_file '[Content_Types].xml' end
create(content, template_name = nil, extras = false)
click to toggle source
# File lib/htmltoword/document.rb, line 7 def create(content, template_name = nil, extras = false) template_name += extension if template_name && !template_name.end_with?(extension) document = new(template_file(template_name)) document.replace_files(content, extras) document.generate end
create_and_save(content, file_path, template_name = nil, extras = false)
click to toggle source
# File lib/htmltoword/document.rb, line 14 def create_and_save(content, file_path, template_name = nil, extras = false) File.open(file_path, 'wb') do |out| out << create(content, template_name, extras) end end
create_with_content(template, content, extras = false)
click to toggle source
# File lib/htmltoword/document.rb, line 20 def create_with_content(template, content, extras = false) template += extension unless template.end_with?(extension) document = new(template_file(template)) document.replace_files(content, extras) document.generate end
doc_xml_file()
click to toggle source
# File lib/htmltoword/document.rb, line 31 def doc_xml_file 'word/document.xml' end
extension()
click to toggle source
# File lib/htmltoword/document.rb, line 27 def extension '.docx' end
new(template_path)
click to toggle source
# File lib/htmltoword/document.rb, line 48 def initialize(template_path) @replaceable_files = {} @template_path = template_path @image_files = [] end
numbering_xml_file()
click to toggle source
# File lib/htmltoword/document.rb, line 35 def numbering_xml_file 'word/numbering.xml' end
relations_xml_file()
click to toggle source
# File lib/htmltoword/document.rb, line 39 def relations_xml_file 'word/_rels/document.xml.rels' end
Public Instance Methods
generate()
click to toggle source
Generate a string representing the contents of a docx file.
# File lib/htmltoword/document.rb, line 57 def generate Zip::File.open(@template_path) do |template_zip| buffer = Zip::OutputStream.write_buffer do |out| template_zip.each do |entry| out.put_next_entry entry.name if @replaceable_files[entry.name] && entry.name == Document.doc_xml_file source = entry.get_input_stream.read # Change only the body of document. TODO: Improve this... source = source.sub(/(<w:body>)((.|\n)*?)(<w:sectPr)/, "\\1#{@replaceable_files[entry.name]}\\4") out.write(source) elsif @replaceable_files[entry.name] out.write(@replaceable_files[entry.name]) elsif entry.name == Document.content_types_xml_file raw_file = entry.get_input_stream.read content_types = @image_files.empty? ? raw_file : inject_image_content_types(raw_file) out.write(content_types) else out.write(template_zip.read(entry.name)) end end unless @image_files.empty? #stream the image files into the media folder using open-uri @image_files.each do |hash| out.put_next_entry("word/media/#{hash[:filename]}") URI.open(hash[:url], 'rb') do |f| out.write(f.read) end end end end buffer.string end end
replace_files(html, extras = false)
click to toggle source
# File lib/htmltoword/document.rb, line 92 def replace_files(html, extras = false) html = '<body></body>' if html.nil? || html.empty? original_source = Nokogiri::HTML(html.gsub(/>\s+</, '><')) source = xslt(stylesheet_name: 'cleanup').transform(original_source) transform_and_replace(source, xslt_path('numbering'), Document.numbering_xml_file) transform_and_replace(source, xslt_path('relations'), Document.relations_xml_file) transform_doc_xml(source, extras) local_images(source) end
transform_doc_xml(source, extras = false)
click to toggle source
# File lib/htmltoword/document.rb, line 102 def transform_doc_xml(source, extras = false) transformed_source = xslt(stylesheet_name: 'cleanup').transform(source) transformed_source = xslt(stylesheet_name: 'inline_elements').transform(transformed_source) transform_and_replace(transformed_source, document_xslt(extras), Document.doc_xml_file, extras) end
Private Instance Methods
content_type_from_extension(ext)
click to toggle source
get extension from filename and clean to match content_types
# File lib/htmltoword/document.rb, line 129 def content_type_from_extension(ext) ext == "jpg" ? "jpeg" : ext end
inject_image_content_types(source)
click to toggle source
inject the required content_types into the [content_types].xml fileā¦
# File lib/htmltoword/document.rb, line 134 def inject_image_content_types(source) doc = Nokogiri::XML(source) #get a list of all extensions currently in content_types file existing_exts = doc.css("Default").map { |node| node.attribute("Extension").value }.compact #get a list of extensions we need for our images required_exts = @image_files.map{ |i| i[:ext] } #workout which required extensions are missing from the content_types file missing_exts = (required_exts - existing_exts).uniq #inject missing extensions into document missing_exts.each do |ext| doc.at_css("Types").add_child( "<Default Extension='#{ext}' ContentType='image/#{content_type_from_extension(ext)}'/>") end #return the amended source to be saved into the zip doc.to_s end
local_images(source)
click to toggle source
generates an array of hashes with filename and full url for all images to be embeded in the word document
# File lib/htmltoword/document.rb, line 119 def local_images(source) source.css('img').each_with_index do |image,i| filename = image['data-filename'] ? image['data-filename'] : image['src'].split("/").last ext = File.extname(filename).delete(".").downcase @image_files << { filename: "image#{i+1}.#{ext}", url: image['src'], ext: ext } end end
transform_and_replace(source, stylesheet_path, file, remove_ns = false)
click to toggle source
# File lib/htmltoword/document.rb, line 110 def transform_and_replace(source, stylesheet_path, file, remove_ns = false) stylesheet = xslt(stylesheet_path: stylesheet_path) content = stylesheet.apply_to(source) content.gsub!(/\s*xmlns:(\w+)="(.*?)\s*"/, '') if remove_ns @replaceable_files[file] = content end