class Notifu::Processor
Attributes
event[RW]
issue[RW]
now[RW]
processing_result[RW]
Public Instance Methods
cleanup!()
click to toggle source
cleanup method
# File lib/notifu/workers/processor.rb, line 354 def cleanup! if is_ok? && self.issue.action == "resolve" Notifu::Cleaner.perform_async(self.issue.notifu_id) end end
duty_time?(timerange)
click to toggle source
# File lib/notifu/workers/processor.rb, line 327 def duty_time? (timerange) case timerange when "ALWAYS" return true when "NEVER" return false when /([0-9]{2}):([0-9]{2})-([0-9]{2}):([0-9]{2})/ t_duty_from = Time.local(self.now.year, self.now.month, self.now.day, timerange[0], timerange[1]).to_i t_duty_until = Time.local(self.now.year, self.now.month, self.now.day, timerange[2], timerange[3]).to_i if t_duty_from <= self.now.to_i <= t_duty_until return true else return false end else return true end end
enough_occurrences?()
click to toggle source
LOGIC BLOCK ###############################################
# File lib/notifu/workers/processor.rb, line 254 def enough_occurrences? self.event.occurrences_count >= self.event.occurrences_trigger ? true : false end
escalate_to?(level, sla)
click to toggle source
# File lib/notifu/workers/processor.rb, line 258 def escalate_to?(level, sla) # escalation_interval = self.event.refresh # escalation_interval ||= sla.refresh escalation_interval = sla.refresh escalation_period = level.to_i * escalation_interval.to_i # log "info", "[#{escalation_period.to_s}] Creation time: " + Time.at(self.issue.time_created.to_i).to_s # log "info", "[#{escalation_period.to_s}] Escalation time: " + Time.at(self.issue.time_created.to_i + escalation_period.to_i).to_s # log "info", "[#{escalation_period.to_s}] Now time: " + Time.at(self.now.to_i).to_s if self.issue.time_created.to_i + escalation_period.to_i <= self.now.to_i && is_critical? return true else return false end end
first_notification?(sla, group)
click to toggle source
# File lib/notifu/workers/processor.rb, line 306 def first_notification? sla, group self.issue.time_last_notified?(group.name, sla.name) == nil ? true : false end
get_silenced()
click to toggle source
get stashes from Sensu API
# File lib/notifu/workers/processor.rb, line 363 def get_silenced begin sensu_api = Excon.get("#{self.event.api_endpoint}/silenced", user: Notifu::CONFIG[:sensu_api][:username], password: Notifu::CONFIG[:sensu_api][:password]) return JSON.parse sensu_api.body rescue Exception => e log "error", "Failed to get stashes - GET #{self.event.api_endpoint}/stashes (#{e.message})" return [] end end
is_critical?()
click to toggle source
# File lib/notifu/workers/processor.rb, line 302 def is_critical? self.event.code == 2 ? true : false end
is_ok?()
click to toggle source
# File lib/notifu/workers/processor.rb, line 294 def is_ok? self.event.code == 0 ? true : false end
is_warning?()
click to toggle source
# File lib/notifu/workers/processor.rb, line 298 def is_warning? self.event.code == 1 ? true : false end
notify!(sla, group)
click to toggle source
NOTIFICATION METHOD (method for :process! ) ###############
# File lib/notifu/workers/processor.rb, line 207 def notify! (sla, group) actors = [] contacts = [] escalation_level = "primary" sla_actors = eval(sla.actors) group.primary.each do |contact| contacts << contact.name end actors += sla_actors[:primary] # secondary escalation if escalate_to?(1, sla) group.secondary.each do |contact| contacts << contact.name end actors += sla_actors[:secondary] if sla_actors[:secondary] escalation_level = "secondary" end # tertiary escalation if escalate_to?(2, sla) group.tertiary.each do |contact| contacts << contact.name end actors += sla_actors[:tertiary] if sla_actors[:tertiary] escalation_level = "tertiary" end actors.each do |actor| job = Sidekiq::Client.push( 'class' => "Notifu::Actors::#{actor.camelize}", 'args' => [ self.event.notifu_id, contacts ], 'queue' => "actor-#{actor}") end log "info", "Taking action (#{group.name}:#{sla.name}) NID #{self.event.notifu_id} [#{self.event.host}/#{self.event.service}/#{self.event.code.to_state}] actor: #{actors.join(', ')}; contacts: #{contacts.join(', ')}; escalation_level: #{escalation_level}" self.issue.time_last_notified!(group.name, sla.name, Time.now.to_i) return { sla: sla.name, group: group.name, actors: actors, contacts: contacts, escalation_level: escalation_level } end
perform(*args)
click to toggle source
SIDEKIQ GLUE METHOD #######################################
# File lib/notifu/workers/processor.rb, line 46 def perform *args t_start = Time.now.to_f*1000.0 log "debug", "Task start" # read event self.event = Notifu::Model::Event.new args self.now = Time.now log "debug", "Processing event NID #{self.event.notifu_id}" # try to check if we already know about the issue, otherwise save it into DB as a new one self.issue = Notifu::Model::Issue.with(:notifu_id, self.event.notifu_id) self.issue ||= Notifu::Model::Issue.create(self.event.data) # let the magic happen process! t_finish = Time.now.to_f*1000.0 log "debug", "Task finish (in #{t_finish-t_start}ms)" end
process!()
click to toggle source
MAIN PROCESSING LOGIC #####################################
# File lib/notifu/workers/processor.rb, line 71 def process! self.event.group_sla.each do |gs| # group related objects begin group = Notifu::Model::Group.with(:name, gs[:group]) sla = Notifu::Model::Sla.with(:name, gs[:sla]) rescue log "info", "#{self.event.notifu_id} [#{self.event.host}/#{self.event.service}/#{self.event.code.to_state}]: Object init failed. Is Notifu API running?" next end notified = { sla: String.new, group: String.new, actors: Array.new, contacts: Array.new, escalation_level: "none" } result = [] # logic if enough_occurrences? && self.event.action.to_s == "create" result << "enough occurrences have passed" if ! silenced? result << "issue is not silenced" if duty_time? sla.timerange_values(self.now) result << "duty is active" if status_changed? result << "issue state has changed" notified = notify!(sla, group) result << "ACTION" else result << "issue state hasn't changed" case self.event.code when 0 result << "issue is in OK state" << "IDLE" when 1 result << "issue is in WARNING state" if first_notification?(sla, group) result << "issue is new" notified = notify!(sla, group) result << "ACTION" else result << "already notified" << "IDLE" end when 2 result << "issue is not a warning" if renotify?(sla, group) result << "it's time to renotify" notified = notify!(sla, group) result << "ACTION" else result << "not yet time to renotify or escalate" << "IDLE" end else result << "unknown state (#{self.event.code})" << "IDLE" end end else result << "duty is not active at this time" << "IDLE" end else result << "issue is silenced" << "IDLE" end elsif self.event.action == "resolve" && self.issue.occurrences_count.to_i >= self.event.occurrences_trigger.to_i if ! silenced? result << "recovery of an event" notified = notify!(sla, group) result << "ACTION" elsif self.event.unsilence result << "recovery of an event (with unsilence)" unsilence! end else result << "not enough occurrences of this event" << "IDLE" end self.event.update_process_result!(notified) action_log_message = { logic: result.join(' -> '), result: result[-1], reason: result[-2], group: group.name, sla: sla.name, host: self.event.host, service: self.event.service, message: self.event.message, state: self.event.code.to_state, contacts: notified[:contacts].to_json, actors: notified[:actors].to_json, occurrences_trigger: self.event.occurrences_trigger.to_i, occurrences_count: self.event.occurrences_count.to_i, check_duration: self.event.duration, escalation_level: notified[:escalation_level].to_s, sidekiq_jid: self.jid, notifu_id: self.event.notifu_id, :"@timestamp" => self.now.iso8601, } action_log "processor", action_log_message end if self.event.process_result.length > 0 self.issue.message = self.event.message self.issue.action = self.event.action self.issue.process_result = self.event.process_result @issue.save end if status_changed? self.issue.code = self.event.code self.issue.time_created = self.event.time_created end self.issue.occurrences_trigger = self.event.occurrences_trigger self.issue.occurrences_count = self.event.occurrences_count self.issue.time_last_event = self.event.time_last_event self.issue.sgs = self.event.sgs self.issue.aspiring_code = self.event.code self.issue.api_endpoint = self.event.api_endpoint self.issue.duration = self.event.duration @issue.save # delayed cleanup job cleanup! end
renotify?(sla, group)
click to toggle source
# File lib/notifu/workers/processor.rb, line 314 def renotify? (sla, group) # t_renotify_int = self.event.refresh # t_renotify_int ||= sla.refresh t_renotify_int = sla.refresh t_last_notified = self.issue.time_last_notified?(group.name, sla.name) if t_last_notified.to_i + t_renotify_int.to_i <= self.now.to_i return true else return false end end
silenced?()
click to toggle source
# File lib/notifu/workers/processor.rb, line 277 def silenced? if self.event.service == "keepalive" id = "silence/#{self.event.host}" else id = "silence/#{self.event.host}/#{self.event.service}" end get_silenced.each do |stash| begin return true if stash["id"] == id rescue NoMethodError return false end end false end
status_changed?()
click to toggle source
# File lib/notifu/workers/processor.rb, line 310 def status_changed? self.event.code.to_i != self.issue.code.to_i ? true : false end
unsilence!()
click to toggle source
unsilence method
# File lib/notifu/workers/processor.rb, line 377 def unsilence! path = "silence/#{self.event.host}/#{self.event.service}" get_silenced.each do |stash| if stash["path"] == path if stash["expire"] < 0 if self.event.unsilence begin Excon.delete("#{self.event.api_endpoint}/stashes/silence/#{self.event.host}/#{self.event.service}", user: Notifu::CONFIG[:sensu_api][:username], password: Notifu::CONFIG[:sensu_api][:password]) log "info", "Unsilenced #{self.event.host}/#{self.event.service} after recovery" rescue Exception => e log "warning", "Failed to unsilence - DELETE #{self.event.api_endpoint}/stashes/silence/#{self.event.host}/#{self.event.service} (#{e.message})" end else log "info", "#{self.event.host}/#{self.event.service} left silenced (auto-unsilence disabled)" end else log "info", "#{self.event.host}/#{self.event.service} left silenced (auto-unsilence doesn't work when TTL is defined)" end end end end