class Flapjack::Gateways::PagerDuty::AckFinder
Constants
- SEM_PAGERDUTY_ACKS_RUNNING
- SEM_PAGERDUTY_ACKS_RUNNING_TIMEOUT
Public Class Methods
new(opts = {})
click to toggle source
# File lib/flapjack/gateways/pager_duty.rb, line 168 def initialize(opts = {}) @lock = opts[:lock] @config = opts[:config] @initial = true Flapjack.logger.debug("New PagerDuty::AckFinder pikelet with the following options: #{@config.inspect}") # TODO: only clear this if there isn't another PagerDuty gateway instance running # or better, include an instance ID in the semaphore key name Flapjack.redis.del(SEM_PAGERDUTY_ACKS_RUNNING) end
Public Instance Methods
check_ids_by_medium(filter_check_ids, opts = {})
click to toggle source
# File lib/flapjack/gateways/pager_duty.rb, line 273 def check_ids_by_medium(filter_check_ids, opts = {}) time = opts[:time] Flapjack::Data::Medium.lock(Flapjack::Data::Check, Flapjack::Data::Rule) do media = Flapjack::Data::Medium.intersect(:transport => 'pagerduty') already_acking_ids = [] media.all.each_with_object({}) do |medium, memo| init_scope = Flapjack::Data::Check.intersect(:id => filter_check_ids) ch_ids = medium.checks(:initial_scope => init_scope, :time => time).ids to_ack_ids = (ch_ids & filter_check_ids) - already_acking_ids already_acking_ids.push(*to_ack_ids) memo[medium] = to_ack_ids end end end
find_pagerduty_acknowledgements()
click to toggle source
# File lib/flapjack/gateways/pager_duty.rb, line 215 def find_pagerduty_acknowledgements Flapjack.logger.debug("looking for acks in PagerDuty for unack'd problems") time = Time.now unacked_failing_checks = [] Flapjack::Data::Check.lock(Flapjack::Data::ScheduledMaintenance, Flapjack::Data::UnscheduledMaintenance) do unacked_failing_checks = Flapjack::Data::Check. intersect(:failing => true).reject do |check| check.in_unscheduled_maintenance?(time) || check.in_scheduled_maintenance?(time) end end if unacked_failing_checks.empty? Flapjack.logger.debug "found no unacknowledged failing checks" return end Flapjack.logger.debug "found unacknowledged failing checks as follows: " + unacked_failing_checks.map(&:name).join(', ') check_ids_by_medium(unacked_failing_checks.map(&:id), :time => time).each_pair do |medium, check_ids| next if check_ids.empty? checks = unacked_failing_checks.select {|c| check_ids.include?(c.id)} next if checks.empty? pagerduty_acknowledgements(time, medium.pagerduty_subdomain, medium.pagerduty_token, checks.map(&:name)).each do |incident| inc_key = incident['incident_key'] pg_acknowledged_by = incident['last_status_change_by'] Flapjack.logger.info "#{inc_key} is acknowledged in PagerDuty, creating flapjack acknowledgement... " who_text = "" if !pg_acknowledged_by.nil? && !pg_acknowledged_by['name'].nil? who_text = " by #{pg_acknowledged_by['name']}" end # default to 4 hours if no duration set in the medium ack_duration = medium.pagerduty_ack_duration || (4 * 60 * 60) Flapjack::Data::Event.create_acknowledgements( @config['processor_queue'] || 'events', [checks.detect {|c| c.name == inc_key}], :summary => "Acknowledged on PagerDuty" + who_text, :duration => ack_duration) end end end
pagerduty_acknowledgements(time, subdomain, token, check_names)
click to toggle source
returns any PagerDuty
acknowledgements for the named checks
# File lib/flapjack/gateways/pager_duty.rb, line 294 def pagerduty_acknowledgements(time, subdomain, token, check_names) if subdomain.blank? || token.blank? Flapjack.logger.warn("pagerduty_acknowledgements?: Unable to look for acknowledgements on PagerDuty" \ " as the following options are required: subdomain (#{subdomain}), token (#{token})") return end t = time.utc # handle paginated results cumulative_incidents = [] offset = 0 requesting = true while requesting do response = pagerduty_acknowledgements_request(t, subdomain, token, 100, offset) if response.nil? cumulative_incidents = [] requesting = false else cumulative_incidents += response['incidents'].select do |incident| check_names.include?(incident['incident_key']) end offset = response['offset'] + response['incidents'].size requesting = (offset < response['total']) end end @initial = false cumulative_incidents end
pagerduty_acknowledgements_request(base_time, subdomain, token, limit, offset)
click to toggle source
# File lib/flapjack/gateways/pager_duty.rb, line 331 def pagerduty_acknowledgements_request(base_time, subdomain, token, limit, offset) since_offset, until_offset = if @initial # the last week -> one hour in the future [(60 * 60 * 24 * 7), (60 * 60)] else # the last 15 minutes -> one hour in the future [(60 * 15), (60 * 60)] end query = {'fields' => 'incident_key,incident_number,last_status_change_by', 'since' => (base_time - since_offset).iso8601, 'until' => (base_time + until_offset).iso8601, 'status' => 'acknowledged'} if (limit != 100) || (offset != 0) query.update(:limit => limit, :offset => offset) end uri = URI::HTTPS.build(:host => "#{subdomain}.pagerduty.com", :path => '/api/v1/incidents', :port => 443, :query => URI.encode_www_form(query)) request = Net::HTTP::Get.new(uri.request_uri, {'Content-type' => 'application/json', 'Authorization' => "Token token=#{token}"}) http = Net::HTTP.new(uri.host, uri.port) http.use_ssl = true http.verify_mode = OpenSSL::SSL::VERIFY_PEER Flapjack.logger.debug("pagerduty_acknowledgements: request to #{uri.request_uri}") Flapjack.logger.debug("pagerduty_acknowledgements: query: #{query.inspect}") Flapjack.logger.debug("pagerduty_acknowledgements: auth: #{token}") http_response = http.request(request) Flapjack.logger.debug(http_response.inspect) response = nil begin response = Flapjack.load_json(http_response.body) rescue JSON::JSONError Flapjack.logger.error("failed to parse json from a post to #{url} ... response headers and body follows...") end Flapjack.logger.debug("pagerduty_acknowledgements: decoded response as: #{response.inspect}") if response.nil? || !response.has_key?('incidents') || !response['incidents'].is_a?(Array) Flapjack.logger.error('no valid response received from PagerDuty!') return end response end
start()
click to toggle source
# File lib/flapjack/gateways/pager_duty.rb, line 181 def start until Flapjack::Gateways::PagerDuty.test_pagerduty_connection Flapjack.logger.error("Can't connect to the PagerDuty API, retrying after 10 seconds") Kernel.sleep(10) end begin Zermelo.redis = Flapjack.redis loop do @lock.synchronize do # ensure we're the only instance of the PagerDuty acknowledgement check running (with a naive # timeout of one hour to guard against stale locks caused by crashing code) either in this # process or in other processes if Flapjack.redis.setnx(SEM_PAGERDUTY_ACKS_RUNNING, 'true') == 0 Flapjack.logger.debug("skipping looking for acks in PagerDuty as this is already happening") else Flapjack.redis.expire(SEM_PAGERDUTY_ACKS_RUNNING, SEM_PAGERDUTY_ACKS_RUNNING_TIMEOUT) find_pagerduty_acknowledgements Flapjack.redis.del(SEM_PAGERDUTY_ACKS_RUNNING) end Kernel.sleep 10 end end ensure Flapjack.redis.quit end end
stop_type()
click to toggle source
# File lib/flapjack/gateways/pager_duty.rb, line 211 def stop_type :exception end