class Riemann::Tools::Riak

Public Class Methods

new() click to toggle source
# File bin/riemann-riak, line 27
def initialize
  detect_features

  @httpstatus = true

  begin
    uri = URI.parse(opts[:riak_host])
    if uri.host == nil
      uri.host = opts[:riak_host]
    end
    http = Net::HTTP.new(uri.host, opts[:stats_port])
    http.use_ssl = uri.scheme == 'https'
    if http.use_ssl?
      http.verify_mode = OpenSSL::SSL::VERIFY_NONE
    end
    http.start do |h|
      h.get opts[:stats_path]
    end
  rescue => _e
    @httpstatus = false
  end

  # we're going to override the emulator setting to allow users to
  # dynamically input the cookie
  # this is done only once - hopefully it doesn't get overridden.
  ENV['ERL_AFLAGS'] = "-setcookie #{opts[:cookie]}"
end

Public Instance Methods

check_disk() click to toggle source
# File bin/riemann-riak, line 145
def check_disk
  gb = `du -Ls #{opts[:data_dir]}`.split(/\s+/).first.to_i / (1024.0**2)
  report(
    :host => opts[:riak_host],
    :service => 'riak disk',
    :state => 'ok',
    :metric => gb,
    :description => "#{gb} GB in #{opts[:data_dir]}"
  )
end
check_keys() click to toggle source
# File bin/riemann-riak, line 97
def check_keys
  keys = `#{File.expand_path(File.dirname(__FILE__))}/riemann-riak-keys #{opts[:node_name]}`.chomp
  if keys =~ /^\d+$/
    report(
      :host => opts[:riak_host],
      :service => 'riak keys',
      :state => 'ok',
      :metric => keys.to_i,
      :description => keys
    )
  else
    report(
      :host => opts[:riak_host],
      :service => 'riak keys',
      :state => 'unknown',
      :description => keys
    )
  end
end
check_ring() click to toggle source
# File bin/riemann-riak, line 69
def check_ring
  str = if @escript
    str = `#{File.expand_path(File.dirname(__FILE__))}/riemann-riak-ring #{opts[:node_name]}`.chomp
  elsif @riakadmin
    str = `riak-admin ringready`
  else
    nil
  end

  return if str.nil?

  if str =~ /^TRUE/
    report(
      :host => opts[:riak_host],
      :service => 'riak ring',
      :state => 'ok',
      :description => str
    )
  else
    report(
      :host => opts[:riak_host],
      :service => 'riak ring',
      :state => 'warning',
      :description => str
    )
  end
end
check_stats() click to toggle source

Reports current stats to Riemann

# File bin/riemann-riak, line 256
def check_stats
  begin
    stats = self.stats
  rescue => e
    event = {:state => 'critical',
             :description => e.message,
             :host => opts[:riak_host]}
    # Report errors
    report(event.merge(:service => 'riak'))
    core_services.each do |s|
      report(event.merge(:service => "riak #{s}"))
    end
    fsm_types.each do |typespec|
      typespec.each do |type, prop|
        fsm_percentiles.each do |percentile|
          report(event.merge(:service => "riak #{type} #{prop} #{percentile}"))
        end
      end
    end
    return
  end

  # Riak itself
  report(
    :host => opts[:riak_host],
    :service => 'riak',
    :state => 'ok'
  )

  # Gets/puts/rr
  core_services.each do |s|
    report(
      :host => opts[:riak_host],
      :service => "riak #{s}",
      :state => 'ok',
      :metric => stats[s].to_i/60.0,
      :description => "#{stats[s].to_i/60.0}/sec"
    )
  end

  # FSMs
  fsm_types.each do |typespec|
    typespec.each do |type, prop|
      fsm_percentiles.each do |percentile|
        val = stats[fsm_stat(type, prop, percentile)].to_i || 0
        val = 0 if val == 'undefined'
        val /= 1000.0 if prop == 'time' # Convert us to ms
        if prop == 'time'
          state = fsm_state(type,  percentile, val)
        else
          state = "ok"
        end
        report(
          :host => opts[:riak_host],
          :service => "riak #{type} #{prop} #{percentile}",
          :state => state,
          :metric => val,
          :description => "#{val} ms"
        )
      end
    end
  end
end
check_transfers() click to toggle source
# File bin/riemann-riak, line 117
def check_transfers
  str = if @riakadmin
    `riak-admin transfers`
  else
    nil
  end

  return if str.nil?

  if str =~ /'#{opts[:node_name]}' waiting to handoff (\d+) partitions/
    report(
      :host => opts[:riak_host],
      :service => 'riak transfers',
      :state   => 'critical',
      :metric  => $1.to_i,
      :description => "waiting to handoff #{$1} partitions"
    )
  else
    report(
      :host => opts[:riak_host],
      :service => 'riak transfers',
      :state   => 'ok',
      :metric  => 0,
      :description => "No pending transfers"
    )
  end
end
core_services() click to toggle source
# File bin/riemann-riak, line 236
def core_services
  ['vnode_gets',
   'vnode_puts',
   'node_gets',
   'node_puts',
   'node_gets_set',
   'node_puts_set',
   'read_repairs']
end
detect_features() click to toggle source

Identifies whether escript and riak-admin are installed

# File bin/riemann-riak, line 56
def detect_features
  @escript = true # Whether escript is present on this machine
  @riakadmin = true # Whether riak-admin is present

  if `which escript` =~ /^\s*$/
    @escript = false
  end

  if `which riak-admin` =~ /^\s*$/
    @riakadmin = false
  end
end
fsm_percentiles() click to toggle source
# File bin/riemann-riak, line 251
def fsm_percentiles
  [50, 95, 99]
end
fsm_stat(type, property, percentile) click to toggle source

Returns the riak stat for the given fsm type and percentile.

# File bin/riemann-riak, line 157
def fsm_stat(type, property, percentile)
  "node_#{type}_fsm_#{property}_#{percentile == 50 ? 'median' : percentile}"
end
fsm_state(type, percentile, val) click to toggle source

Returns the alerts state for the given fsm.

# File bin/riemann-riak, line 162
def fsm_state(type, percentile, val)
  limit = opts["#{type}_#{percentile}_warning".to_sym]
  case val
  when 0 .. limit
    'ok'
  when limit .. limit * 2
    'warning'
  else
    'critical'
  end
end
fsm_types() click to toggle source
# File bin/riemann-riak, line 246
def fsm_types
  [{'get' => 'time'}, {'put' => 'time'},
   {'get' => 'set_objsize'}]
end
stats() click to toggle source

Get current stats as a hash

# File bin/riemann-riak, line 220
def stats
  if @httpstatus
    stats_http
  elsif @riakadmin
    stats_riak_admin
  else
    report(
      :host => opts[:riak_host],
      :service => 'riak',
      :state => 'critical',
      :description => "No mechanism for fetching Riak stats: neither HTTP nor riak-admin available."
    )
    raise "No mechanism for fetching Riak stats: neither HTTP nor riak-admin available."
  end
end
stats_http() click to toggle source

Get current stats via HTTP

# File bin/riemann-riak, line 175
def stats_http
  begin
    uri = URI.parse(opts[:riak_host])
    if uri.host == nil
      uri.host = opts[:riak_host]
    end
    http = Net::HTTP.new(uri.host, opts[:stats_port])
    http.use_ssl = uri.scheme == 'https'
    if http.use_ssl?
      http.verify_mode = OpenSSL::SSL::VERIFY_NONE
    end
    res = http.start do |h|
      h.get opts[:stats_path]
    end
  rescue => e
    report(
      :host => opts[:riak_host],
      :service => 'riak',
      :state => 'critical',
      :description => "error fetching #{opts[:riak_host]}:#{opts[:stats_port]} #{e.class}, #{e.message}"
    )
    raise
  end

  if res.code.to_i == 200
    return JSON.parse(res.body)
  else
    report(
      :host => opts[:riak_host],
      :service => 'riak',
      :state => 'critical',
      :description => "stats returned HTTP #{res.code}:\n\n#{res.body}"
    )
    raise "Can't fetch stats via HTTP: #{res.core}:\n\n#{res.body}"
  end
end
stats_riak_admin() click to toggle source

Get current stats via riak-admin

# File bin/riemann-riak, line 213
def stats_riak_admin
  str = `riak-admin status`
  raise "riak-admin failed" unless $? == 0
  Hash[str.split(/\n/).map{|i| i.split(/ : /)}]
end
tick() click to toggle source
# File bin/riemann-riak, line 320
def tick
  # This can utterly destroy a cluster, so we disable
  # check_keys
  check_stats
  check_ring
  check_disk
  check_transfers
end