class Smith::Riemann
Constants
- SERVICE_NAME
Public Class Methods
new()
click to toggle source
# File bin/riemann-smith, line 23 def initialize @riemann = Riemann::Client.new(:host => options[:host], :port => options[:port]) end
Public Instance Methods
alert(service, state, description, extra={})
click to toggle source
Construct an alert messages
# File bin/riemann-smith, line 99 def alert(service, state, description, extra={}) {:tags => options[:tags], :ttl => options[:ttl], :service => service(service), :state => state(state), :description => description}.merge(extra).tap { |a| logger.info { "#{a[:service]}: #{state(state)}" } } end
check(response)
click to toggle source
Checks that the list of agents return from the list command matches the list of know agents and composes an appropriate alert.
# File bin/riemann-smith, line 70 def check(response) options[:group].map do |group| begin group_agents = options[:agents] + agent_group(group) running_agents = parse_response(response) group_agents.map do |agent_name| if running_agents.has_key?(agent_name) && running_agents[agent_name].any? { |a| a[:state] == "running" && running?(a[:pid]) } alert(agent_name, :running, "Agent running") else alert(agent_name, :critical, "Agent not running") end end rescue RuntimeError => e alert("Group #{group}: #{e.message}", :critical, e.message) end end.flatten end
on_check(&blk)
click to toggle source
# File bin/riemann-smith, line 64 def on_check(&blk) @on_check = blk end
options()
click to toggle source
# File bin/riemann-smith, line 117 def options @options ||= begin OptionParser.accept(Pathname) {|p,| Pathname.new(p) if p} defaults = {:interval => 30, :ttl => nil, :timeout => 11, :agents => [], :group => [], :tags => [], :host => 'localhost', :port => 5555, :agency_timeout => 60} defaults.tap do |options| parser = OptionParser.new do |opts| opts.separator "\n" opts.set_summary_indent " " opts.banner = "\nUsage: #{opts.program_name} OPTIONS" opts.on_head "\n Periodically lists the running agents and sends the result to riemann." opts.on("--ttl <i>", Integer, "TTL for events") { |t| options[:ttl] = t } opts.on("--interval <i>", Integer, "Polling interval in seconds (default #{options[:interval]})") { |t| options[:interval] = t } opts.on("--timeout <i>", Integer, "Agency timeout (default #{options[:timeout]})") { |t| options[:interval] = t } opts.on("--host <s>", String, "Riemann host (default #{options[:host]})") { |v| options[:host] = v } opts.on("--port <i>", Integer, "Riemann port (default #{options[:port]})") { |v| options[:port] = v } opts.on("--tags <tag1,tag1,...>", Array, "Tags to add to the alert") { |t| options[:tags] = t } opts.on("--agents <agent1,agent2,...>", Array, "Agents to monitor") { |v| options[:agents] + v } opts.on("--agent-group <group name>>", Array, "The name of an 'agent group' to monitor") { |g| options[:group] += g } opts.separator "\n" end parser.parse! end end end
parse_response(response)
click to toggle source
Returns a Nested hash representing the running agents.
# File bin/riemann-smith, line 90 def parse_response(response) split_response = response.split(/\n/).map(&:strip) ((/^total/.match(split_response.first)) ? split_response[1..-1] : []).each_with_object(Hash.new { |h,k| h[k] = []}) do |e, acc| fields = e.split(/\s +/) acc[fields.last] << { :state => fields[0], :uuid => fields[1], :pid => fields[2], :description => fields.last } end end
run()
click to toggle source
# File bin/riemann-smith, line 27 def run Smith::Messaging::Sender.new(Smith::QueueDefinitions::Agency_control.call) do |queue| on_check do |check| @riemann << check end queue.on_error do |channel, channel_close| logger.fatal { "Channel error: #{channel_close.reply_code}: #{channel_close.reply_text}." } Smith.stop end EM.add_periodic_timer(options[:interval]) { run_check(queue) } end end
run_check(queue)
click to toggle source
# File bin/riemann-smith, line 42 def run_check(queue) @on_check.call(alert(:heartbeat, :running, "riemann-smith heatbeat", :ttl => options[:interval] * 2)) queue.on_timeout(options[:timeout]) do |message_id| @on_check.call(alert(:agency, :timeout, "Agency timeout.")) end queue.on_reply(:auto_ack => true, :reply_queue_name => "riemann.#{Digest::SHA1.hexdigest($$.to_s)}") do |reply_payload, r| check(reply_payload.response).each do |agent| @on_check.call(agent) end end queue.consumer_count do |count| if count > 0 queue.publish(Smith::ACL::AgencyCommand.new(:command => 'list', :args => ['-a', '-l'])) else @on_check.call(alert(:agency, :critical, "Agency not running.")) end end end
running?(pid)
click to toggle source
Return true if the given pid exists in the process table.
# File bin/riemann-smith, line 104 def running?(pid) !(pid.nil? || Sys::ProcTable.ps(pid.to_i).nil?) end
service(name)
click to toggle source
Return the service name
# File bin/riemann-smith, line 109 def service(name) "#{SERVICE_NAME} #{name}" end
state(state)
click to toggle source
# File bin/riemann-smith, line 113 def state(state) {:timeout => "timeout", :running => "ok", :critical => "critical"}[state] end