class SuttyMigration::Wordpress
Brings posts and attachments from a SQLite3 database. You can convert a MySQL/MariaDB dump by using `mysql2sqlite`.
It doesn't convert them into Jekyll
posts but allows you to write a migration plugin where you can convert data by yourself. We may add this feature in the future.
Attributes
Public Class Methods
@param :site [Jekyll::Site] Jekyll
site @param :url [String] Wordpress
site URL (must be up for downloads) @param :database [String] Database path, by default `_data/wordpress.sqlite3` @param :prefix [String] WP table prefix @param :limit [Integer] Page length @param :multisite [Boolean] Site is multisite
# File lib/sutty_migration/wordpress.rb 28 def initialize(site:, url:, database: nil, prefix: 'wp_', limit: 10, multisite: nil) 29 @site = site 30 @prefix = prefix.freeze 31 @limit = limit.freeze 32 @url = url.freeze 33 @database = database || File.join(site.source, '_data', 'wordpress.sqlite3') 34 @multisite = multisite 35 end
Public Instance Methods
Generate database connections for a multisite WP
@return [Hash] { “ID” => SuttyMigration::Wordpress
}
# File lib/sutty_migration/wordpress.rb 40 def blogs 41 @blogs ||= wp["select * from #{prefix}blogs"].to_a.map do |blog| 42 url = "https://#{blog[:domain]}#{blog[:path]}" 43 pfx = "#{prefix}#{blog[:blog_id]}_" if blog[:blog_id] > 1 44 pfx ||= prefix 45 46 [blog[:blog_id], 47 blog.merge(db: self.class.new(site: site, url: url, prefix: pfx, database: database, limit: limit, 48 multisite: self))] 49 end.to_h 50 end
Downloads a file if needed, optionally showing a progress bar.
@param :url [String] File URL @param :progress [Boolean] Toggle progress bar @return [String] File local path
# File lib/sutty_migration/wordpress.rb 80 def download(url:, progress: true) 81 uri = URI(url) 82 dest = uri.path.sub(%r{\A/}, '') 83 full = File.join(site.source, dest) 84 85 return dest if File.exist? full 86 87 ::Jekyll.logger.info "Downloading #{dest}" 88 89 FileUtils.mkdir_p File.dirname(full) 90 91 File.open(full, 'w') do |f| 92 if progress 93 head = Faraday.head(url) 94 content_length = head.headers['content-length'].to_i 95 progress = ProgressBar.create(title: File.basename(dest), total: content_length, output: $stderr) 96 end 97 98 Faraday.get(url) do |req| 99 req.options.on_data = proc do |chunk, downloaded_bytes| 100 f.write chunk 101 102 if progress 103 progress.progress = downloaded_bytes > content_length ? content_length : downloaded_bytes 104 end 105 end 106 end 107 end 108 109 dest 110 end
Download all attachments. Adds the local path to them.
@param :progress [Boolean] Toggle progress bar @return [Nil]
# File lib/sutty_migration/wordpress.rb 69 def download_all(progress: true) 70 posts(layout: 'attachment').each do |attachment| 71 attachment[:front_matter]['file_path'] = download(url: attachment[:guid], progress: progress) 72 end 73 end
List post types
@return [Array]
# File lib/sutty_migration/wordpress.rb 115 def layouts 116 @layouts ||= wp["select distinct post_type from #{prefix}posts"].to_a.map(&:values).flatten 117 end
# File lib/sutty_migration/wordpress.rb 52 def options 53 @options ||= wp["select option_name, option_value from #{prefix}options"].to_a.map(&:values).to_h.transform_keys(&:to_sym) 54 end
Finds all posts optionally filtering by post type. This is not the official Sequel syntax, but it retrieves metadata as objects with a single query (and a sub-query).
@param :layout [String] Layout name, one of layouts
@param :with_meta [Boolean] Toggle metadata pulling and conversion @return [Enumerator]
# File lib/sutty_migration/wordpress.rb 126 def posts(**options) 127 unless options[:layout].blank? || layouts.include?(options[:layout]) 128 raise ArgumentError, "#{options[:layout]} must be one of #{layouts.join(', ')}" 129 end 130 131 wp[post_query(**options)].each_page(limit).to_a.map(&:to_a).flatten.tap do |p| 132 p.map do |post| 133 # Sequel parses dates on localtime 134 post[:date] = ::Jekyll::Utils.parse_date(post[:date]) unless post[:date].blank? 135 unless post[:last_modified_at].blank? 136 post[:last_modified_at] = 137 ::Jekyll::Utils.parse_date(post[:last_modified_at]) 138 end 139 140 post[:front_matter] = 141 begin 142 unless post[:front_matter].blank? 143 JSON.parse(post[:front_matter]).transform_keys(&:to_sym).transform_values do |v| 144 v.size == 1 ? v.first : v 145 end 146 end 147 rescue JSON::ParserError 148 {} 149 end 150 post[:terms] = 151 begin 152 unless post[:terms].blank? 153 JSON.parse(post[:terms]).transform_keys(&:to_sym).transform_values do |v| 154 v.size == 1 ? v.first : v 155 end 156 end 157 rescue JSON::ParserError 158 {} 159 end 160 end 161 end 162 end
Brings all users.
@param :with_meta [Boolean] include metadata @return [Array]
# File lib/sutty_migration/wordpress.rb 168 def users(**options) 169 options[:with_meta] = true unless options.key? :with_meta 170 171 wp[user_query(**options)].each_page(limit).to_a.map(&:to_a).flatten.tap do |u| 172 next unless options[:with_meta] 173 174 u.map do |user| 175 user[:meta] = JSON.parse(user[:meta]).transform_keys(&:to_sym) unless user[:meta].blank? 176 end 177 end 178 end
Private Instance Methods
Recover the post meta as a JSON object with multiple values converted to arrays
@return [String]
# File lib/sutty_migration/wordpress.rb 243 def meta_query 244 <<~EOQ 245 select 246 post_id, 247 json_group_object(meta_key, json(meta_values)) as front_matter 248 from ( 249 select 250 post_id, 251 meta_key, 252 json_group_array(meta_value) as meta_values 253 from #{prefix}postmeta 254 group by post_id, meta_key 255 ) 256 group by post_id 257 EOQ 258 end
Query for posts, optionally bringing metadata as JSON objects.
@param :layout [String] Layout name @param :with_meta [Boolean] Query metadata @return [String]
# File lib/sutty_migration/wordpress.rb 205 def post_query(layout: nil, with_meta: true) 206 <<~EOQ 207 select 208 p.ID as id, 209 strftime('%Y-%m-%d %H:%M:%S UTC', p.post_date_gmt) as date, 210 strftime('%Y-%m-%d %H:%M:%S UTC', p.post_modified_gmt) as last_modified_at, 211 p.post_author as author, 212 p.post_type as layout, 213 p.post_name as slug, 214 p.post_title as title, 215 p.post_content as content, 216 p.post_excerpt as excerpt, 217 p.post_status as status, 218 p.comment_status as comment_status, 219 p.ping_status as ping_status, 220 p.post_password as password, 221 p.to_ping as to_ping, 222 p.pinged as pinged, 223 p.post_content_filtered as content_filtered, 224 p.post_parent as parent, 225 p.guid as guid, 226 p.menu_order as menu_order, 227 p.post_mime_type as mime_type, 228 p.comment_count as comment_count 229 #{', f.front_matter as front_matter' if with_meta} 230 #{', t.terms as terms' if with_meta} 231 from #{prefix}posts as p 232 #{"left join (#{meta_query}) as f on f.post_id = p.ID" if with_meta} 233 #{"left join (#{terms_query}) as t on t.post_id = p.ID" if with_meta} 234 #{"where p.post_type = '#{layout}'" if layout} 235 group by p.ID 236 EOQ 237 end
Term taxonomy query
@param :layout [String] Layout name @return [String]
# File lib/sutty_migration/wordpress.rb 264 def terms_query 265 <<~EOQ 266 select 267 post_id, 268 json_group_object(taxonomy, json(terms)) as terms 269 from ( 270 select 271 r.object_id as post_id, 272 tt.taxonomy, 273 json_group_array(t.name) as terms 274 from #{prefix}term_relationships as r 275 left join #{prefix}term_taxonomy as tt on tt.term_taxonomy_id = r.term_taxonomy_id 276 left join #{prefix}terms as t on t.term_id = tt.term_id 277 group by r.object_id) 278 group by post_id 279 EOQ 280 end
Finds all users. If it's a multisite WP, we need to check the main table.
@param :with_meta [Boolean] include metadata @return [String]
# File lib/sutty_migration/wordpress.rb 187 def user_query(with_meta: true) 188 pfx = multisite&.prefix || prefix 189 190 <<~EOQ 191 select 192 u.* 193 #{', json_group_object(m.meta_key, m.meta_value) as meta' if with_meta} 194 from #{pfx}users as u 195 #{"left join #{pfx}usermeta as m on m.user_id = u.id" if with_meta} 196 group by u.id 197 EOQ 198 end