class ManageIQ::PostgresHaAdmin::FailoverMonitor

Constants

DB_CHECK_FREQUENCY
FAILOVER_ATTEMPTS
FAILOVER_CHECK_FREQUENCY

Attributes

config_handlers[R]
db_check_frequency[RW]
failover_attempts[RW]
failover_check_frequency[RW]

Public Class Methods

new(config_path = "") click to toggle source
# File lib/manageiq/postgres_ha_admin/failover_monitor.rb, line 15
def initialize(config_path = "")
  initialize_settings(config_path)
  @config_handlers = []
end

Public Instance Methods

active_servers_conninfo(handler, server_store) click to toggle source
# File lib/manageiq/postgres_ha_admin/failover_monitor.rb, line 62
def active_servers_conninfo(handler, server_store)
  servers = server_store.connection_info_list
  current_params = handler.read
  servers.map! { |info| current_params.merge(info) }
end
add_handler(handler) click to toggle source
# File lib/manageiq/postgres_ha_admin/failover_monitor.rb, line 20
def add_handler(handler)
  @config_handlers << [handler, ServerStore.new]
end
monitor() click to toggle source
# File lib/manageiq/postgres_ha_admin/failover_monitor.rb, line 24
def monitor
  config_handlers.each do |handler, server_store|
    begin
      connection = pg_connection(handler.read)
      if connection
        server_store.update_servers(connection)
        connection.finish
        next
      end

      logger.error("Primary Database is not available for #{handler.name}. Starting to execute failover...")
      handler.do_before_failover

      new_conn_info = execute_failover(handler, server_store)
      if new_conn_info
        handler.do_after_failover(new_conn_info)
      else
        logger.error("Failover failed")
      end
    rescue => e
      logger.error("Received #{e.class} error while monitoring #{handler.name}: #{e.message}")
      logger.error(e.backtrace)
    end
  end
end
monitor_loop() click to toggle source
# File lib/manageiq/postgres_ha_admin/failover_monitor.rb, line 50
def monitor_loop
  loop do
    begin
      monitor
    rescue => err
      logger.error("#{err.class}: #{err}")
      logger.error(err.backtrace.join("\n"))
    end
    sleep(db_check_frequency)
  end
end

Private Instance Methods

database_in_recovery?(pg_connection) click to toggle source

Checks if postgres database is in recovery mode

@param pg_connection [PG::Connection] established pg connection @return [Boolean] true if database in recovery mode

# File lib/manageiq/postgres_ha_admin/failover_monitor.rb, line 122
def database_in_recovery?(pg_connection)
  pg_connection.exec("SELECT pg_catalog.pg_is_in_recovery()") do |db_result|
    result = db_result.map_types!(PG::BasicTypeMapForResults.new(pg_connection)).first
    result['pg_is_in_recovery']
  end
end
execute_failover(handler, server_store) click to toggle source
# File lib/manageiq/postgres_ha_admin/failover_monitor.rb, line 84
def execute_failover(handler, server_store)
  failover_attempts.times do
    with_each_standby_connection(handler, server_store) do |connection, params|
      next if database_in_recovery?(connection)
      next unless server_store.host_is_primary?(params[:host], connection)
      logger.info("Failing over to server using conninfo: #{params.reject { |k, _v| k == :password }}")
      server_store.update_servers(connection)
      handler.write(params)
      return params
    end
    sleep(failover_check_frequency)
  end
  false
end
initialize_settings(ha_admin_yml_file) click to toggle source
# File lib/manageiq/postgres_ha_admin/failover_monitor.rb, line 70
def initialize_settings(ha_admin_yml_file)
  ha_admin_yml = {}
  begin
    ha_admin_yml = YAML.load_file(ha_admin_yml_file) if File.exist?(ha_admin_yml_file)
  rescue SystemCallError, IOError => err
    logger.error("#{err.class}: #{err}")
    logger.info("File not loaded: #{ha_admin_yml_file}. Default settings for failover will be used.")
  end
  @failover_attempts = ha_admin_yml['failover_attempts'] || FAILOVER_ATTEMPTS
  @db_check_frequency = ha_admin_yml['db_check_frequency'] || DB_CHECK_FREQUENCY
  @failover_check_frequency = ha_admin_yml['failover_check_frequency'] || FAILOVER_CHECK_FREQUENCY
  logger.info("FAILOVER_ATTEMPTS=#{@failover_attempts} DB_CHECK_FREQUENCY=#{@db_check_frequency} FAILOVER_CHECK_FREQUENCY=#{@failover_check_frequency}")
end
pg_connection(params) click to toggle source
# File lib/manageiq/postgres_ha_admin/failover_monitor.rb, line 111
def pg_connection(params)
  PG::Connection.open(params)
rescue PG::Error => e
  logger.error("Failed to establish PG connection: #{e.message}")
  nil
end
with_each_standby_connection(handler, server_store) { |connection, params| ... } click to toggle source
# File lib/manageiq/postgres_ha_admin/failover_monitor.rb, line 99
def with_each_standby_connection(handler, server_store)
  active_servers_conninfo(handler, server_store).each do |params|
    connection = pg_connection(params)
    next if connection.nil?
    begin
      yield connection, params
    ensure
      connection.finish
    end
  end
end