class JekyllImport::Importers::WordpressDotCom
Public Class Methods
download_images(title, post_hpricot, assets_folder)
click to toggle source
Will modify post DOM tree
# File lib/jekyll-import/importers/wordpressdotcom.rb, line 25 def self.download_images(title, post_hpricot, assets_folder) images = (post_hpricot / "img") return if images.empty? Jekyll.logger.info "Downloading images for ", title images.each do |i| uri = i["src"] dst = File.join(assets_folder, File.basename(uri)) i["src"] = File.join("{{ site.baseurl }}", dst) Jekyll.logger.info uri if File.exist?(dst) Jekyll.logger.info "Already in cache. Clean assets folder if you want a redownload." next end begin FileUtils.mkdir_p assets_folder OpenURI.open_uri(uri, :allow_redirections => :safe) do |f| File.open(dst, "wb") do |out| out.puts f.read end end Jekyll.logger.info "OK!" rescue StandardError => e Jekyll.logger.error "Error: #{e.message}" Jekyll.logger.error e.backtrace.join("\n") end end end
process(options)
click to toggle source
# File lib/jekyll-import/importers/wordpressdotcom.rb, line 140 def self.process(options) source = options.fetch("source", "wordpress.xml") fetch = !options.fetch("no_fetch_images", false) assets_folder = options.fetch("assets_folder", "assets") FileUtils.mkdir_p(assets_folder) import_count = Hash.new(0) doc = Hpricot::XML(File.read(source)) # Fetch authors data from header authors = Hash[ (doc / :channel / "wp:author").map do |author| [author.at("wp:author_login").inner_text.strip, { "login" => author.at("wp:author_login").inner_text.strip, "email" => author.at("wp:author_email").inner_text, "display_name" => author.at("wp:author_display_name").inner_text, "first_name" => author.at("wp:author_first_name").inner_text, "last_name" => author.at("wp:author_last_name").inner_text, },] end ] rescue {} (doc / :channel / :item).each do |node| item = Item.new(node) categories = node.search('category[@domain="category"]').map(&:inner_text).reject { |c| c == "Uncategorized" }.uniq tags = node.search('category[@domain="post_tag"]').map(&:inner_text).uniq metas = {} node.search("wp:postmeta").each do |meta| key = meta.at("wp:meta_key").inner_text value = meta.at("wp:meta_value").inner_text metas[key] = value end author_login = item.text_for("dc:creator").strip header = { "layout" => item.post_type, "title" => item.title, "date" => item.published_at, "type" => item.post_type, "parent_id" => item.parent_id, "published" => item.published?, "password" => item.post_password, "status" => item.status, "categories" => categories, "tags" => tags, "meta" => metas, "author" => authors[author_login], "permalink" => item.permalink, } begin content = Hpricot(item.text_for("content:encoded")) header["excerpt"] = item.excerpt if item.excerpt if fetch # Put the images into a /yyyy/mm/ subfolder to reduce clashes assets_dir_path = if item.published_at File.join(assets_folder, item.published_at.strftime("/%Y/%m")) else assets_folder end download_images(item.title, content, assets_dir_path) end FileUtils.mkdir_p item.directory_name File.open(File.join(item.directory_name, item.file_name), "w") do |f| f.puts header.to_yaml f.puts "---" f.puts Util.wpautop(content.to_html) end rescue StandardError => e Jekyll.logger.error "Couldn't import post!" Jekyll.logger.error "Title: #{item.title}" Jekyll.logger.error "Name/Slug: #{item.file_name}\n" Jekyll.logger.error "Error: #{e.message}" next end import_count[item.post_type] += 1 end import_count.each do |key, value| Jekyll.logger.info "Imported #{value} #{key}s" end end
require_deps()
click to toggle source
# File lib/jekyll-import/importers/wordpressdotcom.rb, line 6 def self.require_deps JekyllImport.require_with_fallback(%w( rubygems fileutils safe_yaml hpricot time open-uri open_uri_redirections )) end
sluggify(title)
click to toggle source
# File lib/jekyll-import/importers/wordpressdotcom.rb, line 228 def self.sluggify(title) title.gsub(%r![^[:alnum:]]+!, "-").downcase end
specify_options(c)
click to toggle source
# File lib/jekyll-import/importers/wordpressdotcom.rb, line 18 def self.specify_options(c) c.option "source", "--source FILE", "WordPress export XML file (default: 'wordpress.xml')" c.option "no_fetch_images", "--no-fetch-images", "Do not fetch the images referenced in the posts (default: false)" c.option "assets_folder", "--assets_folder FOLDER", "Folder where assets such as images will be downloaded to (default: 'assets')" end