class Flapjack::Gateways::PagerDuty::AckFinder

Constants

SEM_PAGERDUTY_ACKS_RUNNING
SEM_PAGERDUTY_ACKS_RUNNING_TIMEOUT

Public Class Methods

new(opts = {}) click to toggle source
# File lib/flapjack/gateways/pager_duty.rb, line 168
def initialize(opts = {})
  @lock = opts[:lock]
  @config = opts[:config]

  @initial = true

  Flapjack.logger.debug("New PagerDuty::AckFinder pikelet with the following options: #{@config.inspect}")

  # TODO: only clear this if there isn't another PagerDuty gateway instance running
  # or better, include an instance ID in the semaphore key name
  Flapjack.redis.del(SEM_PAGERDUTY_ACKS_RUNNING)
end

Public Instance Methods

check_ids_by_medium(filter_check_ids, opts = {}) click to toggle source
# File lib/flapjack/gateways/pager_duty.rb, line 273
def check_ids_by_medium(filter_check_ids, opts = {})
  time = opts[:time]

  Flapjack::Data::Medium.lock(Flapjack::Data::Check, Flapjack::Data::Rule) do

    media = Flapjack::Data::Medium.intersect(:transport => 'pagerduty')

    already_acking_ids = []

    media.all.each_with_object({}) do |medium, memo|
      init_scope = Flapjack::Data::Check.intersect(:id => filter_check_ids)
      ch_ids = medium.checks(:initial_scope => init_scope, :time => time).ids

      to_ack_ids = (ch_ids & filter_check_ids) - already_acking_ids
      already_acking_ids.push(*to_ack_ids)
      memo[medium] = to_ack_ids
    end
  end
end
find_pagerduty_acknowledgements() click to toggle source
# File lib/flapjack/gateways/pager_duty.rb, line 215
def find_pagerduty_acknowledgements
  Flapjack.logger.debug("looking for acks in PagerDuty for unack'd problems")

  time = Time.now

  unacked_failing_checks = []

  Flapjack::Data::Check.lock(Flapjack::Data::ScheduledMaintenance,
    Flapjack::Data::UnscheduledMaintenance) do

    unacked_failing_checks = Flapjack::Data::Check.
      intersect(:failing => true).reject do |check|

        check.in_unscheduled_maintenance?(time) ||
          check.in_scheduled_maintenance?(time)
    end
  end

  if unacked_failing_checks.empty?
    Flapjack.logger.debug "found no unacknowledged failing checks"
    return
  end

  Flapjack.logger.debug "found unacknowledged failing checks as follows: " +
    unacked_failing_checks.map(&:name).join(', ')

  check_ids_by_medium(unacked_failing_checks.map(&:id), :time => time).each_pair do |medium, check_ids|
    next if check_ids.empty?
    checks = unacked_failing_checks.select {|c| check_ids.include?(c.id)}
    next if checks.empty?

    pagerduty_acknowledgements(time, medium.pagerduty_subdomain,
                               medium.pagerduty_token,
                               checks.map(&:name)).each do |incident|

      inc_key = incident['incident_key']

      pg_acknowledged_by = incident['last_status_change_by']
      Flapjack.logger.info "#{inc_key} is acknowledged in PagerDuty, creating flapjack acknowledgement... "

      who_text = ""

      if !pg_acknowledged_by.nil? && !pg_acknowledged_by['name'].nil?
        who_text = " by #{pg_acknowledged_by['name']}"
      end

      # default to 4 hours if no duration set in the medium
      ack_duration = medium.pagerduty_ack_duration || (4 * 60 * 60)

      Flapjack::Data::Event.create_acknowledgements(
        @config['processor_queue'] || 'events',
        [checks.detect {|c| c.name == inc_key}],
        :summary  => "Acknowledged on PagerDuty" + who_text,
        :duration => ack_duration)
    end
  end
end
pagerduty_acknowledgements(time, subdomain, token, check_names) click to toggle source

returns any PagerDuty acknowledgements for the named checks

# File lib/flapjack/gateways/pager_duty.rb, line 294
def pagerduty_acknowledgements(time, subdomain, token, check_names)
  if subdomain.blank? || token.blank?
    Flapjack.logger.warn("pagerduty_acknowledgements?: Unable to look for acknowledgements on PagerDuty" \
     " as the following options are required: subdomain (#{subdomain}), token (#{token})")
    return
  end

  t = time.utc

  # handle paginated results
  cumulative_incidents = []

  offset = 0
  requesting = true

  while requesting do
    response = pagerduty_acknowledgements_request(t, subdomain, token, 100, offset)

    if response.nil?
      cumulative_incidents = []
      requesting = false
    else
      cumulative_incidents += response['incidents'].select do |incident|
        check_names.include?(incident['incident_key'])
      end

      offset = response['offset'] + response['incidents'].size

      requesting = (offset < response['total'])
    end
  end

  @initial = false

  cumulative_incidents
end
pagerduty_acknowledgements_request(base_time, subdomain, token, limit, offset) click to toggle source
# File lib/flapjack/gateways/pager_duty.rb, line 331
def pagerduty_acknowledgements_request(base_time, subdomain, token, limit, offset)
  since_offset, until_offset = if @initial
    # the last week -> one hour in the future
    [(60 * 60 * 24 * 7), (60 * 60)]
  else
    # the last 15 minutes -> one hour in the future
    [(60 * 15), (60 * 60)]
  end

  query = {'fields'       => 'incident_key,incident_number,last_status_change_by',
           'since'        => (base_time - since_offset).iso8601,
           'until'        => (base_time + until_offset).iso8601,
           'status'       => 'acknowledged'}

  if (limit != 100) || (offset != 0)
    query.update(:limit => limit, :offset => offset)
  end

  uri = URI::HTTPS.build(:host => "#{subdomain}.pagerduty.com",
                         :path => '/api/v1/incidents',
                         :port => 443,
                         :query => URI.encode_www_form(query))

  request = Net::HTTP::Get.new(uri.request_uri,
    {'Content-type'  => 'application/json',
     'Authorization' => "Token token=#{token}"})

  http = Net::HTTP.new(uri.host, uri.port)
  http.use_ssl = true
  http.verify_mode = OpenSSL::SSL::VERIFY_PEER

  Flapjack.logger.debug("pagerduty_acknowledgements: request to #{uri.request_uri}")
  Flapjack.logger.debug("pagerduty_acknowledgements: query: #{query.inspect}")
  Flapjack.logger.debug("pagerduty_acknowledgements: auth: #{token}")

  http_response = http.request(request)
  Flapjack.logger.debug(http_response.inspect)

  response = nil
  begin
    response = Flapjack.load_json(http_response.body)
  rescue JSON::JSONError
    Flapjack.logger.error("failed to parse json from a post to #{url} ... response headers and body follows...")
  end

  Flapjack.logger.debug("pagerduty_acknowledgements: decoded response as: #{response.inspect}")
  if response.nil? || !response.has_key?('incidents') || !response['incidents'].is_a?(Array)
    Flapjack.logger.error('no valid response received from PagerDuty!')
    return
  end

  response
end
start() click to toggle source
# File lib/flapjack/gateways/pager_duty.rb, line 181
def start
  until Flapjack::Gateways::PagerDuty.test_pagerduty_connection
    Flapjack.logger.error("Can't connect to the PagerDuty API, retrying after 10 seconds")
    Kernel.sleep(10)
  end

  begin
    Zermelo.redis = Flapjack.redis

    loop do
      @lock.synchronize do
        # ensure we're the only instance of the PagerDuty acknowledgement check running (with a naive
        # timeout of one hour to guard against stale locks caused by crashing code) either in this
        # process or in other processes
        if Flapjack.redis.setnx(SEM_PAGERDUTY_ACKS_RUNNING, 'true') == 0
          Flapjack.logger.debug("skipping looking for acks in PagerDuty as this is already happening")
        else
          Flapjack.redis.expire(SEM_PAGERDUTY_ACKS_RUNNING, SEM_PAGERDUTY_ACKS_RUNNING_TIMEOUT)
          find_pagerduty_acknowledgements
          Flapjack.redis.del(SEM_PAGERDUTY_ACKS_RUNNING)
        end

        Kernel.sleep 10
      end
    end
  ensure
    Flapjack.redis.quit
  end
end
stop_type() click to toggle source
# File lib/flapjack/gateways/pager_duty.rb, line 211
def stop_type
  :exception
end