class Splitter_rss

Constants

NS

Public Class Methods

new(document, destination) click to toggle source
# File lib/abelard/load.rb, line 40
def initialize(document, destination)
  @doc = document
  @dest = destination
end

Public Instance Methods

extract_comments(item) click to toggle source
# File lib/abelard/load.rb, line 45
def extract_comments(item)
  # In a wordpress export, the comments are in wp:comment elements
  basename = Post_id_rss.new(item)

  all = []
  comment_nodes = item.find("wp:comment", NS)
  comment_nodes.each do |node|
    comment_doc = LibXML::XML::Document.new()
    comment_doc.root = node.remove!
    approved = comment_doc.find_first("/wp:comment/wp:comment_approved", NS)
    author_email = comment_doc.find_first("/wp:comment/wp:comment_author_email", NS)
    author_ip = comment_doc.find_first("/wp:comment/wp:comment_author_IP", NS)
    id = comment_doc.find_first("/wp:comment/wp:comment_id", NS)

    # delete some sensitive fields
    author_email.remove! if (author_email)
    author_ip.remove! if (author_ip)
    
    if (approved && (approved.content == '1'))
      filename = basename.as_comment(id.content)

      all << Item.new(comment_doc, "#{@dest}/#{filename}")
    end
  end
  all
end
save(node) click to toggle source
# File lib/abelard/load.rb, line 119
def save(node)
  filename = Post_id_rss.new(node).to_s
  new_item = item(node, "#{@dest}/#{filename}")
  if new_item.status == :published
    new_item.save
  else
    $stderr.puts("skipping #{filename} as status #{new_item.status}")
  end
end
split_items() click to toggle source
# File lib/abelard/load.rb, line 72
def split_items
  channel_count = 1
  rss = @doc.root
  @parent = LibXML::XML::Document.new()
  root = LibXML::XML::Node.new(rss.name)
  @parent.root = root
  rss.attributes.each { |a| root.attributes[a.name] = a.value }
  rss.children.select(&:element?).each do |channel|
    if (channel.name == "channel")
      root << channel.clone # shallow copy for feed.xml
      
      channelself = XmlUtil::self_link(channel)
      is_comments = (channelself =~ /comments/)
      
      copy = LibXML::XML::Node.new(channel.name)
      channel.attributes.each { |a| copy.attributes[a.name] = a.value }
      channel.children.select(&:element?).each do |node|
        $stderr.puts(node.name)
        if (node.name == "item")
          # attachments dont get saved as posts
          if ( node.find("wp:attachment_url", "wp:http://wordpress.org/export/1.2/").length > 0 )
            $stderr.puts("skipping attachment")
          else
            # in a wordpress export file, comments are included inside the post item
            comments = extract_comments(node)
            save(node)
            comments.each { |c| c.save }
          end
        else
          copy << node.copy(true)
        end
      end
      ch_copy = root.copy(true)
      ch_copy << copy
      unless is_comments
        channel_doc = LibXML::XML::Document.new()
        channel_doc.root = ch_copy
        channel_doc.save("#{@dest}/channel-#{channel_count}.xml")
      end
      channel_count = channel_count + 1
    else
      root << channel
    end
  end
  @parent.save("#{@dest}/feed.xml")
end