class Riemann::Tools::Riak
Public Class Methods
new()
click to toggle source
# File bin/riemann-riak, line 27 def initialize detect_features @httpstatus = true begin uri = URI.parse(opts[:riak_host]) if uri.host == nil uri.host = opts[:riak_host] end http = Net::HTTP.new(uri.host, opts[:stats_port]) http.use_ssl = uri.scheme == 'https' if http.use_ssl? http.verify_mode = OpenSSL::SSL::VERIFY_NONE end http.start do |h| h.get opts[:stats_path] end rescue => _e @httpstatus = false end # we're going to override the emulator setting to allow users to # dynamically input the cookie # this is done only once - hopefully it doesn't get overridden. ENV['ERL_AFLAGS'] = "-setcookie #{opts[:cookie]}" end
Public Instance Methods
check_disk()
click to toggle source
# File bin/riemann-riak, line 145 def check_disk gb = `du -Ls #{opts[:data_dir]}`.split(/\s+/).first.to_i / (1024.0**2) report( :host => opts[:riak_host], :service => 'riak disk', :state => 'ok', :metric => gb, :description => "#{gb} GB in #{opts[:data_dir]}" ) end
check_keys()
click to toggle source
# File bin/riemann-riak, line 97 def check_keys keys = `#{File.expand_path(File.dirname(__FILE__))}/riemann-riak-keys #{opts[:node_name]}`.chomp if keys =~ /^\d+$/ report( :host => opts[:riak_host], :service => 'riak keys', :state => 'ok', :metric => keys.to_i, :description => keys ) else report( :host => opts[:riak_host], :service => 'riak keys', :state => 'unknown', :description => keys ) end end
check_ring()
click to toggle source
# File bin/riemann-riak, line 69 def check_ring str = if @escript str = `#{File.expand_path(File.dirname(__FILE__))}/riemann-riak-ring #{opts[:node_name]}`.chomp elsif @riakadmin str = `riak-admin ringready` else nil end return if str.nil? if str =~ /^TRUE/ report( :host => opts[:riak_host], :service => 'riak ring', :state => 'ok', :description => str ) else report( :host => opts[:riak_host], :service => 'riak ring', :state => 'warning', :description => str ) end end
check_stats()
click to toggle source
Reports current stats to Riemann
# File bin/riemann-riak, line 256 def check_stats begin stats = self.stats rescue => e event = {:state => 'critical', :description => e.message, :host => opts[:riak_host]} # Report errors report(event.merge(:service => 'riak')) core_services.each do |s| report(event.merge(:service => "riak #{s}")) end fsm_types.each do |typespec| typespec.each do |type, prop| fsm_percentiles.each do |percentile| report(event.merge(:service => "riak #{type} #{prop} #{percentile}")) end end end return end # Riak itself report( :host => opts[:riak_host], :service => 'riak', :state => 'ok' ) # Gets/puts/rr core_services.each do |s| report( :host => opts[:riak_host], :service => "riak #{s}", :state => 'ok', :metric => stats[s].to_i/60.0, :description => "#{stats[s].to_i/60.0}/sec" ) end # FSMs fsm_types.each do |typespec| typespec.each do |type, prop| fsm_percentiles.each do |percentile| val = stats[fsm_stat(type, prop, percentile)].to_i || 0 val = 0 if val == 'undefined' val /= 1000.0 if prop == 'time' # Convert us to ms if prop == 'time' state = fsm_state(type, percentile, val) else state = "ok" end report( :host => opts[:riak_host], :service => "riak #{type} #{prop} #{percentile}", :state => state, :metric => val, :description => "#{val} ms" ) end end end end
check_transfers()
click to toggle source
# File bin/riemann-riak, line 117 def check_transfers str = if @riakadmin `riak-admin transfers` else nil end return if str.nil? if str =~ /'#{opts[:node_name]}' waiting to handoff (\d+) partitions/ report( :host => opts[:riak_host], :service => 'riak transfers', :state => 'critical', :metric => $1.to_i, :description => "waiting to handoff #{$1} partitions" ) else report( :host => opts[:riak_host], :service => 'riak transfers', :state => 'ok', :metric => 0, :description => "No pending transfers" ) end end
core_services()
click to toggle source
# File bin/riemann-riak, line 236 def core_services ['vnode_gets', 'vnode_puts', 'node_gets', 'node_puts', 'node_gets_set', 'node_puts_set', 'read_repairs'] end
detect_features()
click to toggle source
Identifies whether escript and riak-admin are installed
# File bin/riemann-riak, line 56 def detect_features @escript = true # Whether escript is present on this machine @riakadmin = true # Whether riak-admin is present if `which escript` =~ /^\s*$/ @escript = false end if `which riak-admin` =~ /^\s*$/ @riakadmin = false end end
fsm_percentiles()
click to toggle source
# File bin/riemann-riak, line 251 def fsm_percentiles [50, 95, 99] end
fsm_stat(type, property, percentile)
click to toggle source
Returns the riak stat for the given fsm type and percentile.
# File bin/riemann-riak, line 157 def fsm_stat(type, property, percentile) "node_#{type}_fsm_#{property}_#{percentile == 50 ? 'median' : percentile}" end
fsm_state(type, percentile, val)
click to toggle source
Returns the alerts state for the given fsm.
# File bin/riemann-riak, line 162 def fsm_state(type, percentile, val) limit = opts["#{type}_#{percentile}_warning".to_sym] case val when 0 .. limit 'ok' when limit .. limit * 2 'warning' else 'critical' end end
fsm_types()
click to toggle source
# File bin/riemann-riak, line 246 def fsm_types [{'get' => 'time'}, {'put' => 'time'}, {'get' => 'set_objsize'}] end
stats()
click to toggle source
Get current stats as a hash
# File bin/riemann-riak, line 220 def stats if @httpstatus stats_http elsif @riakadmin stats_riak_admin else report( :host => opts[:riak_host], :service => 'riak', :state => 'critical', :description => "No mechanism for fetching Riak stats: neither HTTP nor riak-admin available." ) raise "No mechanism for fetching Riak stats: neither HTTP nor riak-admin available." end end
stats_http()
click to toggle source
Get current stats via HTTP
# File bin/riemann-riak, line 175 def stats_http begin uri = URI.parse(opts[:riak_host]) if uri.host == nil uri.host = opts[:riak_host] end http = Net::HTTP.new(uri.host, opts[:stats_port]) http.use_ssl = uri.scheme == 'https' if http.use_ssl? http.verify_mode = OpenSSL::SSL::VERIFY_NONE end res = http.start do |h| h.get opts[:stats_path] end rescue => e report( :host => opts[:riak_host], :service => 'riak', :state => 'critical', :description => "error fetching #{opts[:riak_host]}:#{opts[:stats_port]} #{e.class}, #{e.message}" ) raise end if res.code.to_i == 200 return JSON.parse(res.body) else report( :host => opts[:riak_host], :service => 'riak', :state => 'critical', :description => "stats returned HTTP #{res.code}:\n\n#{res.body}" ) raise "Can't fetch stats via HTTP: #{res.core}:\n\n#{res.body}" end end
stats_riak_admin()
click to toggle source
Get current stats via riak-admin
# File bin/riemann-riak, line 213 def stats_riak_admin str = `riak-admin status` raise "riak-admin failed" unless $? == 0 Hash[str.split(/\n/).map{|i| i.split(/ : /)}] end
tick()
click to toggle source
# File bin/riemann-riak, line 320 def tick # This can utterly destroy a cluster, so we disable # check_keys check_stats check_ring check_disk check_transfers end