class Backup

Used to backup a mogilefs domain using a backup profile.

Attributes

db[RW]
db_host[RW]
db_pass[RW]
db_port[RW]
db_user[RW]
domain[RW]
tracker_host[RW]
tracker_port[RW]
workers[RW]

Public Class Methods

new(o={}) click to toggle source

Run validations and prepare the object for a backup @param [Hash] o hash containing the settings for the backup

# File lib/backup.rb, line 8
def initialize(o={})

  #Load up the settings file
  check_settings_file
  settings = YAML::load(File.open("#{$backup_path}/settings.yml"))
  @db = settings['db']
  @db_host = settings['db_host']
  @db_port = settings['db_port']
  @db_pass = settings['db_pass']
  @db_user = settings['db_user']
  @domain = settings['domain']
  @tracker_ip = settings['tracker_ip']
  @tracker_port = settings['tracker_port']
  @workers = o[:workers] if o[:workers]


  #run validations and setup
  raise unless check_backup_path
  create_sqlite_db
  connect_sqlite
  migrate_sqlite
  mogile_db_connect
  mogile_tracker_connect
  check_mogile_domain(domain)

  require ('domain')
  require('file')
  require('bakfile')
  require('fileclass')
end

Public Instance Methods

backup(o = {}) click to toggle source

The real logic for backing the domain up. It is pretty careful about making sure that it doesn’t report a file as backed up unless it actually was. Supports the ability to remove deleted files from the backup as well. We grab files from the mogilefs mysql server in groups of 500 * number of workers (default is 1 worker) @param [Hash] o if :no_delete then don’t remove deleted files from the backup (intensive process)

# File lib/backup.rb, line 138
def backup(o = {})

  #Loop over the main backup logic.  We'll break out at the end unless o[:non_stop] is set
  loop do
    files = []
    #first we retry files that we haven't been able to backup successfully, if any.
    BakFile.find_each(:conditions => ['saved = ?', false]) do |bak_file|
      files << bak_file
    end

    launch_backup_workers(files)

    #now back up any new files.  if they fail to be backed up we'll retry them the next time the backup
    #command is ran.
    dmid = Domain.find_by_namespace(self.domain)
    results = Fid.find_in_batches(:conditions => ['dmid = ? AND fid > ?', dmid, BakFile.max_fid], :batch_size => 500 * self.workers.to_i, :include => [:domain, :fileclass]) do |batch|

      #Insert all the files into our bak db with :saved false so that we don't think we backed up something that crashed
      files = []
      batch.each do |file|
        files << BakFile.new(:fid => file.fid,
                             :domain => file.domain.namespace,
                             :dkey => file.dkey,
                             :length => file.length,
                             :classname => file.classname,
                             :saved => false)
      end

      #There is no way to do a bulk insert in sqlite so this generates a lot of inserts.  wrapping all of the inserts
      #inside a single transaction makes it much much faster.
      BakFile.transaction do
        BakFile.import files, :validate => false
      end

      #Fire up the workers now that we have work for them to do
      launch_backup_workers(files)

      #Terminate program if the signal handler says so and this is a clean place to do it
      return true if SignalHandler.instance.should_quit
    end

    #Delete files from the backup that no longer exist in the mogilefs domain.  Unfortunently there is no easy way to detect
    #which files have been deleted from the MogileFS domain.  Our only option is to brute force our way through.  This is a bulk
    #query that checks a thousand files in each query against the MogileFS database server.  The query is kind of tricky because
    #I wanted to do this with nothing but SELECT privileges which meant I couldn't create a temporary table (which would require,
    #create temporary table and insert privleges).  You might want to only run this operation every once and awhile if you have a
    #very large domain.  In my testing,  it is able to get through domains with millions of files in a matter of a second.  So
    #all in all it's not so bad
    if !o[:no_delete]
      Log.instance.info("Start: Search for files to delete")
      BakFile.find_in_batches { |bak_files|
        union = "SELECT #{bak_files.first.fid} as fid"
        bak_files.shift
        bak_files.each do |bakfile|
          union = "#{union} UNION SELECT #{bakfile.fid}"
        end
        connection = ActiveRecord::Base.connection
        files = connection.select_values("SELECT t1.fid FROM (#{union}) as t1 LEFT JOIN file on t1.fid = file.fid WHERE file.fid IS NULL")
        launch_delete_workers(files)

        #Terminate program if the signal handler says so and this is a clean place to do it
        return true if SignalHandler.instance.should_quit
      }
      Log.instance.info("End: Search for files to delete")
    end

    #Break out of infinite loop unless o[:non_stop] is set
    break unless o[:non_stop]
    sleep 1
  end

end
bak_file(file) click to toggle source

Create a backup of a file using a BakFile object @param [BakFile] file file that needs to be backed up @return [Bool] file save result

# File lib/backup.rb, line 43
def bak_file(file)
  saved = file.bak_it
  if saved
     Log.instance.info("Backed up: FID #{file.fid}")
  else
     Log.instance.info("Error - will try again on next run: FID #{file.fid}")
  end

  return saved
end
launch_backup_workers(files) click to toggle source

Launch workers to backup an array of BakFiles @param [Array] files must be an array of BakFiles

# File lib/backup.rb, line 56
def launch_backup_workers(files)

  #This proc will process the results of the child proc
  parent = Proc.new { |results|
    fids = []

    results.each do |result|
      file = result[:file]
      saved = result[:saved]
      fids << file.fid if saved
    end

    #bulk update all the fids.  much faster then doing it one at a time
    BakFile.update_all({:saved => true}, {:fid => fids})

    #release the connection from the connection pool
    SqliteActiveRecord.clear_active_connections!
  }

  #This proc receives an array of BakFiles,  proccesses them,  and returns a result array to the parent proc. We will break
  #from the files if the signal handler says so.
  child = Proc.new { |files|
    result = []
    files.each do |file|
      break if file.nil?
      break if SignalHandler.instance.should_quit
      saved = bak_file(file)
      result << {:saved => saved, :file => file}
    end
    result
  }

  #launch workers using the above procs and files
  Forkinator.hybrid_fork(self.workers.to_i, files, parent, child)
end
launch_delete_workers(fids) click to toggle source

Launch workers to delete an array of files param [Array] files must be an array of BakFiles that need to be deleted

# File lib/backup.rb, line 94
def launch_delete_workers(fids)

  #This proc receives an array of BakFiles, handles them,  and spits them back to the parent, break from the fids if
  #the signal handler says so.
  child = Proc.new { |fids|
    result = []
    fids.each do |fid|
      break if fid.nil?
      break if SignalHandler.instance.should_quit
      deleted = BakFile.delete_from_fs(fid)
      if deleted
        Log.instance.info("Deleting from backup: FID #{fid}")
      else
        Log.instance.info("Failed to delete from backup: FID #{fid}")
      end

      result << fid
    end
    result
  }

  #This proc will process the results of the child proc
  parent = Proc.new { |results|
    fids = []

    results.each do |result|
      fids << result
    end

    BakFile.delete_all({:fid => fids})

    #release the connection from the connection pool
    SqliteActiveRecord.clear_active_connections!
  }

  #launch workers using the above procs and files
  Forkinator.hybrid_fork(self.workers.to_i, fids, parent, child)

end