class ARDatabaseDuplicator
Attributes
Public Class Methods
# File lib/ar_database_duplicator.rb, line 257 def self.instance(options={}) options[:source] ||= 'development' options[:destination] ||= 'dev_data' options[:schema] ||= 'db/schema.rb' options[:force] = false unless options.has_key?(:force) options[:test] = true unless options.has_key?(:test) options[:split_data] = true unless options.has_key?(:split_data) @duplicator ||= new(options) end
# File lib/ar_database_duplicator.rb, line 125 def initialize(options={}) @source = options[:source] || 'development' @destination = options[:destination] || 'dev_data' @schema_file = options[:schema_file] || 'db/schema.rb' @force = options.fetch(:force) { false } @test = options.fetch(:test) { false } @split_data = options.fetch(:split_data) { true } end
# File lib/ar_database_duplicator.rb, line 267 def self.reset! @duplicator = nil end
Public Instance Methods
# File lib/ar_database_duplicator.rb, line 167 def define_class(name) name = name.camelize.to_sym Object.const_set(name, Class.new(ActiveRecord::Base)) unless Object.const_defined?(name) Object.const_get(name) end
# File lib/ar_database_duplicator.rb, line 142 def destination=(new_value) raise ArgumentError, "Production is not an allowed duplication destination." if new_value.downcase == "production" @destination_directory_exists = false @destination = new_value end
Duplicate each record, via ActiveRecord
, from the source to the destination database. Field replacements can be given via a hash in the form of :original_field => :pseudo_person_field If a block is passed, the record will be passed for inspection/alteration before it is saved into the destination database.
# File lib/ar_database_duplicator.rb, line 177 def duplicate(klass, replacements={}, *additional_replacements, &block) klass = define_class(klass) unless klass.is_a?(Class) plural = plural(klass) automatic_replacements = [replacements] + additional_replacements raise(ArgumentError, "Each group of replacements must be given as a Hash") unless automatic_replacements.all? { |x| x.is_a?(Hash) } sti_klasses = [] set_temporary_vetted_attributes(klass, automatic_replacements) # If we aren't guaranteed to fail on vetting if block_given? || !block_required?(klass) # If we have potential duplication to do if force || !already_duplicated?(klass) # Connect to the source database with_source do # Grab a quick count to see if there is anything we need to do. estimated_total = klass.count if estimated_total > 0 inform(test ? "Extracting first 1,000 #{plural} for testing" : "Extracting all #{plural}") # Pull in all records. Perhaps later we can enhance this to do it in batches. unless singleton?(klass) records = test ? klass.find(:all, :limit => 1000) : klass.find(:all) else records = [klass.instance] end # Handle any single table inheritance that may have shown up records.map(&:class).uniq.each { |k| sti_klasses << k if k != klass } sti_klasses.each { |k| set_temporary_vetted_attributes(k, automatic_replacements) } # Record the size so we can give some progress indication. inform "#{records.size} #{plural} read" transfer(klass, records, automatic_replacements, &block) else inform "Skipping #{plural}. No records exist." end end else inform "Skipping #{plural}. Records already exist." end else inform "Skipping #{plural}. The following field(s) were not checked: #{klass.unvetted_attributes.join(', ')}" end # Clean things up for the next bit of code that might use this class. klass.clear_temporary_safe_attributes sti_klasses.each { |k| k.clear_temporary_safe_attributes } end
# File lib/ar_database_duplicator.rb, line 153 def load_duplication(klass) raise ArgumentError, "Production must be duplicated, not loaded from." if source.downcase == "production" klass = define_class(klass) unless klass.is_a?(Class) records = with_source(klass) { klass.all } puts "#{records.size} #{plural(klass)} read." klass.without_field_vetting { transfer(klass, records) } end
# File lib/ar_database_duplicator.rb, line 161 def load_schema # Adding this class just so we can check if a schema has already been loaded Object.const_set(:SchemaMigration, Class.new(ActiveRecord::Base)) unless Object.const_defined?(:SchemaMigration) split_data ? load_schema_split : load_schema_combined end
# File lib/ar_database_duplicator.rb, line 148 def split_data=(new_value) @destination_directory_exists = false @split_data = new_value end
# File lib/ar_database_duplicator.rb, line 138 def use_destination(subname=nil) use_connection destination, subname end
# File lib/ar_database_duplicator.rb, line 134 def use_source(subname=nil) use_connection source, subname end
# File lib/ar_database_duplicator.rb, line 233 def while_not_silent(&block) with_silence_at(false, &block) end
# File lib/ar_database_duplicator.rb, line 229 def while_silent(&block) with_silence_at(true, &block) end
With a specified connection, connect, execute a block, then restore the connection to it’s previous state (if any).
# File lib/ar_database_duplicator.rb, line 246 def with_connection(name, subname=nil, silent_change=false, &block) old_connection = connection begin use_connection(name, subname, silent_change) result = yield ensure use_spec(old_connection) end result end
# File lib/ar_database_duplicator.rb, line 241 def with_destination(subname=nil, silent_change=false, &block) with_connection(destination, subname, silent_change, &block) end
# File lib/ar_database_duplicator.rb, line 237 def with_source(subname=nil, silent_change=false, &block) with_connection(source, subname, silent_change, &block) end
Private Instance Methods
# File lib/ar_database_duplicator.rb, line 582 def already_duplicated?(klass) with_destination(klass, true) do singleton?(klass) ? klass.count > 0 : !klass.first.nil? end end
# File lib/ar_database_duplicator.rb, line 273 def base_path @base_path ||= Rails.root + "db" + "duplication" end
Returns true if we absolutely know that a block will be required for vetting to pass
# File lib/ar_database_duplicator.rb, line 619 def block_required?(klass) with_source(nil, true) { !klass.unvetted_attributes.empty? } end
Hopefully this will be rails version agnostic. But knowing my luck… Oh well.
# File lib/ar_database_duplicator.rb, line 602 def clear_callbacks(klass) callbacks = [:after_initialize, :after_find, :after_touch, :before_validation, :after_validation, :before_save, :around_save, :after_save, :before_create, :around_create, :after_create, :before_update, :around_update, :after_update, :before_destroy, :around_destroy, :after_destroy, :after_commit, :after_rollback ] callbacks.each do |callback| begin klass.send(callback).clear rescue NoMethodError end end end
# File lib/ar_database_duplicator.rb, line 293 def connected_to?(name) connection == name end
# File lib/ar_database_duplicator.rb, line 285 def connection @connection end
# File lib/ar_database_duplicator.rb, line 289 def connection=(new_name) @connection = new_name end
# File lib/ar_database_duplicator.rb, line 297 def create_destination_directory destination_directory.mkpath unless destination_directory.exist? @destination_directory_exists = true end
# File lib/ar_database_duplicator.rb, line 281 def destination_directory split_data ? base_path + destination : base_path end
# File lib/ar_database_duplicator.rb, line 277 def destination_directory_exists? @destination_directory_exists end
# File lib/ar_database_duplicator.rb, line 303 def entity @entity ||= PseudoEntity.new end
# File lib/ar_database_duplicator.rb, line 307 def inform(message) puts message unless silent end
Load the schema into the destination database
# File lib/ar_database_duplicator.rb, line 312 def load_schema_combined with_destination do # If there is no schema or we are forcing things if !schema_loaded? captured_schema = CapturedSchema.new(self, schema_file) # sqlite3 handles index names at the database level and not at the table level. # This can cause issues with adding indexes. Since we wont be depending on them anyway # we will just stub this out so we can load the schema without issues. #schema_klass = ActiveRecord::Schema # #def schema_klass.add_index(*args) # say_with_time "add_index(#{args.map(&:inspect).join(', ')})" do # say "skipped", :subitem # end #end load schema_file ActiveRecord::Schema.define(:version => captured_schema.recorded_assume_migrated[1]) do create_table "table_schemas", :force => true do |t| t.string "table_name" t.text "schema" end end captured_schema.table_names.each do |table_name| TableSchema.create(:table_name => table_name, :schema => captured_schema.schema_for(table_name)) end else inform 'Skipping schema load. Schema already loaded.' end end end
Load the schema into the separate destination databases. Each db corresponds to one table.
# File lib/ar_database_duplicator.rb, line 346 def load_schema_split captured_schema = CapturedSchema.new(self, schema_file) no_schema_loaded = true # Now that we know all of the tables, indexes, etc we are ready to split things up into multiple databases for easy transport. captured_schema.table_names.sort.each do |table_name| if !schema_loaded?(table_name) no_schema_loaded = false with_destination(table_name) do commands = captured_schema.table_commands_for(table_name) ActiveRecord::Schema.define(:version => captured_schema.recorded_assume_migrated[1]) do commands.each do |command| command = command.dup block = command.pop self.send(*command, &block) end create_table "table_schemas", :force => true do |t| t.string "table_name" t.text "schema" end command = captured_schema.recorded_initialize_schema.dup block = command.pop self.send(*command, &block) unless command.empty? command = captured_schema.recorded_assume_migrated.dup block = command.pop self.send(*command, &block) unless command.empty? end TableSchema.create(:table_name => table_name, :schema => captured_schema.schema_for(table_name)) end end end inform 'Skipping schema load. Schema already loaded.' if no_schema_loaded end
# File lib/ar_database_duplicator.rb, line 568 def plural(klass) title_plural(klass).downcase end
Replace each value in the target if it is already populated.
# File lib/ar_database_duplicator.rb, line 412 def replace(target, hash) hash.each do |key, value_key| # We either have a symbol representing a method to call on PseudoEntity or a straight value. value = value_key # In general we aren't dealing with encrypted data. encrypted = false # If this is a command we are call to get the value if value_key.is_a?(Symbol) # If we are replacing an encrypted field if value_key.to_s.start_with?('encrypted_') encrypted = true # Change the command to be the non encrypted version so we can get the actual value. value_key = value_key.to_s[10..-1].to_sym end # Throw an error if we do not recognize the PseudoEntity method raise "No replacement defined for #{value_key.inspect}" unless entity.respond_to?(value_key) # Grab the actual value we will use for replacement value = entity.send(value_key) end # If the value is to be encrypted if encrypted salt_method = "#{key}_salt".to_sym iv_method = "#{key}_iv".to_sym # If the record has an existing salt then replace it if target.respond_to?(salt_method) && !target.send(salt_method).blank? salt = entity.reset('salt') replace_with(target, salt_method, salt) else salt = nil end # If the record has an existing iv then replace it if target.respond_to?(iv_method) && !target.send(iv_method).blank? iv = entity.reset('iv') replace_with(target, iv_method, iv) else iv = nil end # Use the same combination as I use on my luggage. No one will ever guess that. value = value.encrypt(:key => "1234", :salt => salt, :iv => iv) end replace_with target, key, value end end
# File lib/ar_database_duplicator.rb, line 387 def replace_attributes(record, automatic_replacements, &block) # Do any automatic field replacements automatic_replacements.each do |replacement_hash| # For each hash, reset the pseudo entity and the use it to do replacements. entity.reset! replace(record, replacement_hash) unless replacement_hash.empty? end # Before we save it, pass the newly cloned record to a block for inspection/alteration if block_given? block_replacements = # If the block only wants the record send it in. if block.arity == 1 yield(entity.reset!) else # Otherwise send in a PseudoEntity with the made up data to be used for field replacement. yield(entity.reset!, record) end replace(record, block_replacements) unless !block_replacements.is_a?(Hash) || block_replacements.empty? end end
Replace a value in the target if it is already populated.
# File lib/ar_database_duplicator.rb, line 460 def replace_with(target, key, value) if value.is_a?(Proc) value = case value.arity when 0 value.call when 1 value.call(entity) when 2 value.call(entity, target) else value.call(entity, target, key) end end target.send("#{key}=", value) unless target.send(key).blank? target.vet_attribute(key) if target.respond_to?(:vet_attribute) end
# File lib/ar_database_duplicator.rb, line 478 def salt entity.class.new.salt end
# File lib/ar_database_duplicator.rb, line 588 def schema_loaded?(subname=nil) if force false else define_class('SchemaMigration') with_destination(subname, true) { SchemaMigration.table_exists? && SchemaMigration.count > 0 } end end
# File lib/ar_database_duplicator.rb, line 483 def set_temporary_vetted_attributes(klass, automatic_replacements) # Reset the class to its normal safe attributes. We will not trust that this has been done for us before. Even if we were the last ones to touch this class. klass.clear_temporary_safe_attributes # Duplication considers the following fields always safe and won't be modifying them. klass.mark_attribute_temporarily_safe(:id) klass.mark_attribute_temporarily_safe(:created_at) klass.mark_attribute_temporarily_safe(:updated_at) klass.mark_attribute_temporarily_safe(:deleted_at) klass.mark_attribute_temporarily_safe(:lock_version) # Take each attributes that we will attempt to automatically replace automatic_replacements.each do |replacement_set| replacement_set.each do |attr, value| # Mark it temporarily safe at the class level. # This allows an attribute to be considered vetted if any instance has a nil value and no substitution is performed. klass.mark_attribute_temporarily_safe(attr) # If PseudoEntity will be using an encrypted version of its attribute if value.is_a?(Symbol) && value.to_s.starts_with?("encrypted_") # Then it will automatically attempt to populate the salt and iv fields as well. So we can clear those. klass.mark_attribute_temporarily_safe "#{attr}_salt" klass.mark_attribute_temporarily_safe "#{attr}_iv" end end end end
# File lib/ar_database_duplicator.rb, line 597 def singleton?(klass) klass.included_modules.map(&:to_s).include?('ActiveRecord::Singleton') end
# File lib/ar_database_duplicator.rb, line 564 def title_plural(klass) klass.name.titleize.pluralize end
# File lib/ar_database_duplicator.rb, line 510 def transfer(klass, records, automatic_replacements={}, &block) plural = plural(klass) inform "Transferring #{plural}" # Switch to the destination database with_destination(klass) do problematic_records = [] # Blow away all callbacks. We are looking at a pure data transfer here. clear_callbacks(klass) progress_bar = ProgressBar.create(:title => title_plural(klass), :total => records.size, :format => '%t %p%% [%b>>%i] %c/%C %E ', :smoothing => 0.9) # Take each record, replace any data required, and save records.each do |record| replace_attributes(record, automatic_replacements, &block) # Trick active record into saving this record all over again in its entirety record.instance_variable_set(:@new_record, true) # Save without validation as there is no guaranteed order of how the classes will be duplicated. We don't want to trigger any callbacks referencing other tables. # Besides, they should have already been validated when they were saved in production. begin record.vetted_save rescue ActiveRecord::StatementInvalid => e inform "Problems saving record #{record.id}." inform e.message inform "Adding record to emergency yaml dump" problematic_records << record rescue ActiveRecord::VettedRecord::UnvettedAttribute => e inform "#{record.class.name}##{record.id} not duplicated for security reasons" inform e.message rescue => e puts "Not good! I just got an #{e.inspect}" # Quick cleanup klass.clear_temporary_safe_attributes sti_klasses.each { |k| k.clear_temporary_safe_attributes } raise e end # Give an update of the percentage transferred progress_bar.increment end unless problematic_records.blank? file_name = "#{destination}.#{klass.name}.yaml" inform "Saving #{problematic_records.size} #{plural} to #{file_name}" # TODO: Change to deal with split data File.open( file_name, 'w' ) { |out| YAML.dump(problematic_records, out) } end end inform "All #{plural} transferred" end
# File lib/ar_database_duplicator.rb, line 623 def use_connection(name, subname=nil, silent_change=false) # If this is a connection defined in the database.yml if ActiveRecord::Base.configurations.keys.include?(name) # The database spec is the same as the name spec = name else # Otherwise we are going to use a sqlite3 database specified at runtime # Convert from a class to the table name if needed. subname = subname.table_name if subname.is_a?(Class) && subname < ActiveRecord::Base if name == destination # Start with the location the sqlite data will be database = destination_directory # If we are splitting the data into individual tables if split_data # Add the subname to the path if one is given unless subname.blank? database += subname else # Move up one directory level and add a sqlite3 extension to avoid name collision. database = database.parent + "#{destination}.sqlite3" end else # Add a sqlite3 extension to avoid name collisions. database += "#{destination}.sqlite3" end else database = Pathname(name.to_s) end # Create the database spec spec = {:adapter => 'sqlite3',:database => database.to_s, :host => 'localhost', :username => 'root'} # Set the name to something nice for display name = database.basename(database.extname) end use_spec(spec, silent_change ? nil : name) end
# File lib/ar_database_duplicator.rb, line 660 def use_spec(spec, name=nil) # If we aren't already connected to the database unless connected_to?(spec) # Create the directory structure if needed create_destination_directory if spec.is_a?(Hash) && (spec[:adapter] == 'sqlite3') && !destination_directory_exists? # Give a heads up on the switch inform "Switching to #{name}" if name # Disconnect any existing connections ActiveRecord::Base.clear_active_connections! if connection # Make the connection if we were given a new one ActiveRecord::Base.establish_connection(spec) if spec # Remember where we are connected to so we don't do it again if it isn't necessary self.connection = spec end end
# File lib/ar_database_duplicator.rb, line 572 def with_silence_at(value) saved_setting = silent self.silent = value begin yield ensure @silent = saved_setting end end