class JekyllImport::Importers::WordpressDotCom

Public Class Methods

download_images(title, post_hpricot, assets_folder) click to toggle source

Will modify post DOM tree

# File lib/jekyll-import/importers/wordpressdotcom.rb, line 25
def self.download_images(title, post_hpricot, assets_folder)
  images = (post_hpricot / "img")
  return if images.empty?

  Jekyll.logger.info "Downloading images for ", title
  images.each do |i|
    uri = i["src"]

    dst = File.join(assets_folder, File.basename(uri))
    i["src"] = File.join("{{ site.baseurl }}", dst)
    Jekyll.logger.info uri
    if File.exist?(dst)
      Jekyll.logger.info "Already in cache. Clean assets folder if you want a redownload."
      next
    end
    begin
      FileUtils.mkdir_p assets_folder
      OpenURI.open_uri(uri, :allow_redirections => :safe) do |f|
        File.open(dst, "wb") do |out|
          out.puts f.read
        end
      end
      Jekyll.logger.info "OK!"
    rescue StandardError => e
      Jekyll.logger.error "Error: #{e.message}"
      Jekyll.logger.error e.backtrace.join("\n")
    end
  end
end
process(options) click to toggle source
# File lib/jekyll-import/importers/wordpressdotcom.rb, line 140
def self.process(options)
  source        = options.fetch("source", "wordpress.xml")
  fetch         = !options.fetch("no_fetch_images", false)
  assets_folder = options.fetch("assets_folder", "assets")
  FileUtils.mkdir_p(assets_folder)

  import_count = Hash.new(0)
  doc = Hpricot::XML(File.read(source))
  # Fetch authors data from header
  authors = Hash[
    (doc / :channel / "wp:author").map do |author|
      [author.at("wp:author_login").inner_text.strip, {
        "login"        => author.at("wp:author_login").inner_text.strip,
        "email"        => author.at("wp:author_email").inner_text,
        "display_name" => author.at("wp:author_display_name").inner_text,
        "first_name"   => author.at("wp:author_first_name").inner_text,
        "last_name"    => author.at("wp:author_last_name").inner_text,
      },]
    end
  ] rescue {}

  (doc / :channel / :item).each do |node|
    item = Item.new(node)
    categories = node.search('category[@domain="category"]').map(&:inner_text).reject { |c| c == "Uncategorized" }.uniq
    tags = node.search('category[@domain="post_tag"]').map(&:inner_text).uniq

    metas = {}
    node.search("wp:postmeta").each do |meta|
      key = meta.at("wp:meta_key").inner_text
      value = meta.at("wp:meta_value").inner_text
      metas[key] = value
    end

    author_login = item.text_for("dc:creator").strip

    header = {
      "layout"     => item.post_type,
      "title"      => item.title,
      "date"       => item.published_at,
      "type"       => item.post_type,
      "parent_id"  => item.parent_id,
      "published"  => item.published?,
      "password"   => item.post_password,
      "status"     => item.status,
      "categories" => categories,
      "tags"       => tags,
      "meta"       => metas,
      "author"     => authors[author_login],
      "permalink"  => item.permalink,
    }

    begin
      content = Hpricot(item.text_for("content:encoded"))
      header["excerpt"] = item.excerpt if item.excerpt

      if fetch
        # Put the images into a /yyyy/mm/ subfolder to reduce clashes
        assets_dir_path = if item.published_at
                            File.join(assets_folder, item.published_at.strftime("/%Y/%m"))
                          else
                            assets_folder
                          end

        download_images(item.title, content, assets_dir_path)
      end

      FileUtils.mkdir_p item.directory_name
      File.open(File.join(item.directory_name, item.file_name), "w") do |f|
        f.puts header.to_yaml
        f.puts "---"
        f.puts Util.wpautop(content.to_html)
      end
    rescue StandardError => e
      Jekyll.logger.error "Couldn't import post!"
      Jekyll.logger.error "Title: #{item.title}"
      Jekyll.logger.error "Name/Slug: #{item.file_name}\n"
      Jekyll.logger.error "Error: #{e.message}"
      next
    end

    import_count[item.post_type] += 1
  end

  import_count.each do |key, value|
    Jekyll.logger.info "Imported #{value} #{key}s"
  end
end
require_deps() click to toggle source
# File lib/jekyll-import/importers/wordpressdotcom.rb, line 6
def self.require_deps
  JekyllImport.require_with_fallback(%w(
    rubygems
    fileutils
    safe_yaml
    hpricot
    time
    open-uri
    open_uri_redirections
  ))
end
sluggify(title) click to toggle source
# File lib/jekyll-import/importers/wordpressdotcom.rb, line 228
def self.sluggify(title)
  title.gsub(%r![^[:alnum:]]+!, "-").downcase
end
specify_options(c) click to toggle source
# File lib/jekyll-import/importers/wordpressdotcom.rb, line 18
def self.specify_options(c)
  c.option "source",          "--source FILE",          "WordPress export XML file (default: 'wordpress.xml')"
  c.option "no_fetch_images", "--no-fetch-images",      "Do not fetch the images referenced in the posts (default: false)"
  c.option "assets_folder",   "--assets_folder FOLDER", "Folder where assets such as images will be downloaded to (default: 'assets')"
end