class PgShrink::Database::Postgres
Constants
- DEFAULT_OPTS
Attributes
connection[RW]
Public Class Methods
new(opts)
click to toggle source
# File lib/pg_shrink/database/postgres.rb, line 43 def initialize(opts) @opts = DEFAULT_OPTS.merge(opts.symbolize_keys) @connection = Sequel.connect(connection_string) end
Public Instance Methods
batch_size()
click to toggle source
# File lib/pg_shrink/database/postgres.rb, line 39 def batch_size @opts[:batch_size] end
connection_string()
click to toggle source
# File lib/pg_shrink/database/postgres.rb, line 18 def connection_string if @opts[:postgres_url] @opts[:postgres_url] else str = "postgres://#{@opts[:user]}" str << ":#{@opts[:password]}" if @opts[:password] str << "@#{@opts[:host]}" str << ":#{@opts[:port]}" if @opts[:port] str << "/#{@opts[:database]}" end end
database_name()
click to toggle source
# File lib/pg_shrink/database/postgres.rb, line 30 def database_name if @opts[:postgres_url] @opts[:postgres_url] =~ /.*\/([^\/]+)$/ return $1 else @opts[:database] end end
delete_records(table_name, conditions, exclude_conditions = [])
click to toggle source
# File lib/pg_shrink/database/postgres.rb, line 104 def delete_records(table_name, conditions, exclude_conditions = []) query = connection.from(table_name) Array.wrap(conditions).compact.each do |cond| query = query.where(cond) end Array.wrap(exclude_conditions).compact.each do |exclude_cond| query = query.exclude(exclude_cond) end query.delete end
get_records(table_name, opts)
click to toggle source
# File lib/pg_shrink/database/postgres.rb, line 100 def get_records(table_name, opts) self.connection.from(table_name).where(opts).all end
propagate_delete(opts)
click to toggle source
# File lib/pg_shrink/database/postgres.rb, line 115 def propagate_delete(opts) child_table = opts[:child_table] parent_table = opts[:parent_table] child_key = opts[:child_key] parent_key = opts[:parent_key] where_clause = opts[:where] # what we conceptually want to do is delete the left outer join where id is null. # That's not working in postgres, so we instead use where not exists. Docs # indicate using where not exists and select 1 in this case. # See: # http://www.postgresql.org/docs/current/interactive/functions-subquery.html#FUNCTIONS-SUBQUERY-EXISTS query = "DELETE FROM #{child_table} WHERE NOT EXISTS (" + "SELECT 1 from #{parent_table} where " + "#{child_table}.#{child_key} = " + "#{parent_table}.#{parent_key}" + ")" query_builder = connection.from(child_table) query_builder = query_builder.where(where_clause) if where_clause # Outside of the join statements, we want to maintain the ease of hash-based # conditions. Do this by using a query builder but then swapping in delete SQL # in the end. sql = query_builder.sql.gsub("WHERE", "AND"). gsub("SELECT * FROM \"#{child_table}\"", query) connection[sql].delete end
records_in_batches(table_name) { |batch| ... }
click to toggle source
WARNING! This assumes the database is not changing during run. If requirements change we may need to insert a lock.
# File lib/pg_shrink/database/postgres.rb, line 50 def records_in_batches(table_name) table = self.table(table_name) primary_key = table.primary_key unless primary_key raise "Error: Records in batches called on table without a primary key" end max_id = self.connection["select max(#{primary_key}) from #{table_name}"]. first[:max] i = 0; while i < max_id do sql = "select * from #{table_name} where " + "#{primary_key} > #{i} limit #{batch_size}" batch = self.connection[sql].all.compact yield(batch) if batch.any? i = batch.last[primary_key] else break end end end
shrink!()
click to toggle source
# File lib/pg_shrink/database/postgres.rb, line 163 def shrink! filter! vacuum_and_reindex_all! sanitize! self.log("Shrinking Done!") end
update_records(table_name, old_records, new_records)
click to toggle source
# File lib/pg_shrink/database/postgres.rb, line 73 def update_records(table_name, old_records, new_records) table = self.table(table_name) primary_key = table.primary_key old_records_by_key = old_records.index_by {|r| r[primary_key]} new_records_by_key = new_records.index_by {|r| r[primary_key]} if (new_records_by_key.keys - old_records_by_key.keys).size > 0 raise "Bad voodoo! New records have primary keys not in old records!" end deleted_record_ids = old_records_by_key.keys - new_records_by_key.keys if deleted_record_ids.any? raise "Bad voodoo! Some records missing in new records!" end # TODO: This can be optimized if performance is too slow. Will impact # the speed of sanitizing the already-filtered dataset. new_records.each do |rec| if old_records_by_key[rec[primary_key]] != rec self.connection.from(table_name). where(primary_key => rec[primary_key]). update(rec) end end end
vacuum_and_reindex!(table_name)
click to toggle source
# File lib/pg_shrink/database/postgres.rb, line 147 def vacuum_and_reindex!(table_name) self.log("Beginning vacuum on #{table_name}") connection["vacuum full #{table_name}"].first self.log("Beginning reindex on #{table_name}") connection["reindex table #{table_name}"].first self.log("done reindexing #{table_name}") end
vacuum_and_reindex_all!()
click to toggle source
# File lib/pg_shrink/database/postgres.rb, line 155 def vacuum_and_reindex_all! self.log("Beginning full database vacuum") connection["vacuum full"].first self.log("beginning full database reindex") connection["reindex database #{database_name}"].first self.log("done reindexing full database") end