module JekyllImport::Importers::Blogger::BloggerAtomStreamListenerMethods

Attributes

comments[RW]
leave_blogger_info[RW]
original_url_base[R]

Public Instance Methods

post_data_from_in_entry_elem_info() click to toggle source
# File lib/jekyll-import/importers/blogger.rb, line 214
def post_data_from_in_entry_elem_info
  if @in_entry_elem.nil? || !@in_entry_elem.key?(:meta) || !@in_entry_elem[:meta].key?(:kind)
    nil
  elsif @in_entry_elem[:meta][:kind] == "post"
    timestamp = Time.parse(@in_entry_elem[:meta][:published]).strftime("%Y-%m-%d")
    if @in_entry_elem[:meta][:original_url]
      original_uri = URI.parse(@in_entry_elem[:meta][:original_url])
      original_path = original_uri.path.to_s
      filename = format("%s-%s", timestamp, File.basename(original_path, File.extname(original_path)))

      @original_url_base = "#{original_uri.scheme}://#{original_uri.host}"
    elsif @in_entry_elem[:meta][:draft]
      # Drafts don't have published urls
      name = @in_entry_elem[:meta][:title]
      filename = if name.nil?
                   timestamp
                 else
                   format("%s-%s", timestamp, CGI.escape(name.downcase.tr("+/\\:'\"<>{}?%*|.", "-")))
                 end
    else
      raise "Original URL is missing"
    end

    header = {
      "layout" => "post",
      "title"  => @in_entry_elem[:meta][:title],
      "date"   => @in_entry_elem[:meta][:published],
      "author" => @in_entry_elem[:meta][:author],
      "tags"   => @in_entry_elem[:meta][:category],
    }
    header["modified_time"] = @in_entry_elem[:meta][:updated] if @in_entry_elem[:meta][:updated] && @in_entry_elem[:meta][:updated] != @in_entry_elem[:meta][:published]
    header["thumbnail"] = @in_entry_elem[:meta][:thumbnail] if @in_entry_elem[:meta][:thumbnail]
    header["blogger_id"] = @in_entry_elem[:meta][:id] if @leave_blogger_info
    header["blogger_orig_url"] = @in_entry_elem[:meta][:original_url] if @leave_blogger_info && @in_entry_elem[:meta][:original_url]

    body = @in_entry_elem[:body]

    # body escaping associated with liquid
    body.gsub!(%r!{{!, '{{ "{{" }}') if %r!{{!.match?(body)
    body.gsub!(%r!{%!, '{{ "{%" }}') if %r!{%!.match?(body)

    { :filename => filename, :header => header, :body => body }
  elsif @in_entry_elem[:meta][:kind] == "comment"
    timestamp = Time.parse(@in_entry_elem[:meta][:published]).strftime("%Y-%m-%d")
    raise "Original URL is missing" unless @in_entry_elem[:meta][:original_url]

    @comment_seq ||= 1

    original_uri  = URI.parse(@in_entry_elem[:meta][:original_url])
    original_path = original_uri.path.to_s
    filename = format("%s-%s-%s", timestamp, File.basename(original_path, File.extname(original_path)), @comment_seq)

    @comment_seq += 1
    @original_url_base = "#{original_uri.scheme}://#{original_uri.host}"

    header = {
      "date"            => @in_entry_elem[:meta][:published],
      "author"          => @in_entry_elem[:meta][:author],
      "blogger_post_id" => @in_entry_elem[:meta][:post_id],
    }
    header["modified_time"] = @in_entry_elem[:meta][:updated] if @in_entry_elem[:meta][:updated] && @in_entry_elem[:meta][:updated] != @in_entry_elem[:meta][:published]
    header["thumbnail"] = @in_entry_elem[:meta][:thumbnail] if @in_entry_elem[:meta][:thumbnail]
    header["blogger_id"] = @in_entry_elem[:meta][:id] if @leave_blogger_info
    header["blogger_orig_url"] = @in_entry_elem[:meta][:original_url] if @leave_blogger_info && @in_entry_elem[:meta][:original_url]

    body = @in_entry_elem[:body]

    # body escaping associated with liquid
    body.gsub!(%r!{{!, '{{ "{{" }}') if %r!{{!.match?(body)
    body.gsub!(%r!{%!, '{{ "{%" }}') if %r!{%!.match?(body)

    { :filename => filename, :header => header, :body => body }
  end
end
tag_end(tag) click to toggle source
# File lib/jekyll-import/importers/blogger.rb, line 166
def tag_end(tag)
  case tag
  when "entry"
    raise "nest entry element" unless @in_entry_elem

    if @in_entry_elem[:meta][:kind] == "post"
      post_data = post_data_from_in_entry_elem_info

      if post_data
        target_dir = "_posts"
        target_dir = "_drafts" if @in_entry_elem[:meta][:draft]

        FileUtils.mkdir_p(target_dir)

        file_name = URI.decode_www_form_component("#{post_data[:filename]}.html")
        File.open(File.join(target_dir, file_name), "w") do |f|
          f.flock(File::LOCK_EX)

          f << post_data[:header].to_yaml
          f << "---\n\n"
          f << post_data[:body]
        end
      end
    elsif @in_entry_elem[:meta][:kind] == "comment" && @comments
      post_data = post_data_from_in_entry_elem_info

      if post_data
        target_dir = "_comments"

        FileUtils.mkdir_p(target_dir)

        file_name = URI::DEFAULT_PARSER.unescape("#{post_data[:filename]}.html")
        File.open(File.join(target_dir, file_name), "w") do |f|
          f.flock(File::LOCK_EX)

          f << post_data[:header].to_yaml
          f << "---\n\n"
          f << post_data[:body]
        end
      end
    end

    @in_entry_elem = nil
  end

  @tag_bread.pop
end
tag_start(tag, attrs) click to toggle source
# File lib/jekyll-import/importers/blogger.rb, line 106
def tag_start(tag, attrs)
  @tag_bread ||= []
  @tag_bread.push(tag)

  case tag
  when "entry"
    raise "nest entry element" if @in_entry_elem

    @in_entry_elem = { :meta => {}, :body => nil }
  when "title"
    raise 'only <title type="text"></title> is supported' if @in_entry_elem && attrs["type"] != "text"
  when "category"
    if @in_entry_elem
      if attrs["scheme"] == "http://www.blogger.com/atom/ns#"
        @in_entry_elem[:meta][:category] = [] unless @in_entry_elem[:meta][:category]
        @in_entry_elem[:meta][:category] << attrs["term"]
      elsif attrs["scheme"] == "http://schemas.google.com/g/2005#kind"
        kind = attrs["term"]
        kind.sub!(Regexp.new("^http://schemas\\.google\\.com/blogger/2008/kind\\#"), "")
        @in_entry_elem[:meta][:kind] = kind
      end
    end
  when "content"
    @in_entry_elem[:meta][:content_type] = attrs["type"] if @in_entry_elem
  when "link"
    if @in_entry_elem
      if attrs["rel"] == "alternate" && attrs["type"] == "text/html"
        @in_entry_elem[:meta][:original_url] = attrs["href"]
      elsif attrs["rel"] == "replies" && attrs["type"] == "text/html"
        @in_entry_elem[:meta][:original_url] = attrs["href"].sub(%r!\#comment-form$!, "") unless @in_entry_elem[:meta][:original_url]
      end
    end
  when "media:thumbnail"
    @in_entry_elem[:meta][:thumbnail] = attrs["url"] if @in_entry_elem
  when "thr:in-reply-to"
    @in_entry_elem[:meta][:post_id] = attrs["ref"] if @in_entry_elem
  end
end
text(text) click to toggle source
# File lib/jekyll-import/importers/blogger.rb, line 145
def text(text)
  return unless @in_entry_elem

  case @tag_bread.last
  when "id"
    @in_entry_elem[:meta][:id] = text
  when "published"
    @in_entry_elem[:meta][:published] = text
  when "updated"
    @in_entry_elem[:meta][:updated] = text
  when "title"
    @in_entry_elem[:meta][:title] = text
  when "content"
    @in_entry_elem[:body] = text
  when "name"
    @in_entry_elem[:meta][:author] = text if @tag_bread[-2..-1] == %w(author name)
  when "app:draft"
    @in_entry_elem[:meta][:draft] = true if @tag_bread[-2..-1] == %w(app:control app:draft) && text == "yes"
  end
end