308 lines
8.5 KiB
Perl
308 lines
8.5 KiB
Perl
#! /usr/bin/perl -wT
|
|
#
|
|
# Check_apc_ups - Check APC UPS status via SNMP
|
|
# Shamelessly copied from check_breeze.pl
|
|
#
|
|
# To do:
|
|
# - Send SNMP queries directly, instead of forking `snmpget`.
|
|
# - Make the status less verbose. Maybe we can send an "onLine, time
|
|
# remaining: hh:mm:ss" if all is well, and a list of specific problems
|
|
# if something is broken.
|
|
|
|
use strict;
|
|
use Getopt::Long;
|
|
use vars qw($opt_V $opt_h $opt_H $opt_T $opt_t $opt_R $opt_r
|
|
$opt_L $opt_l $PROGNAME);
|
|
use lib "/usr/local/nagios/libexec";
|
|
use utils qw(%ERRORS &print_revision &support &usage);
|
|
|
|
sub print_help ();
|
|
sub print_usage ();
|
|
sub get_snmp_int_val ($);
|
|
sub escalate_exitval ($);
|
|
|
|
$ENV{'PATH'}='';
|
|
$ENV{'BASH_ENV'}='';
|
|
$ENV{'ENV'}='';
|
|
|
|
Getopt::Long::Configure('bundling');
|
|
GetOptions
|
|
("V" => \$opt_V, "version" => \$opt_V,
|
|
"h" => \$opt_h, "help" => \$opt_h,
|
|
"T=s" => \$opt_T, "temp-critical" => \$opt_T,
|
|
"t=s" => \$opt_t, "temp-warning" => \$opt_t,
|
|
"R=s" => \$opt_R, "runtime-critical" => \$opt_R,
|
|
"r=s" => \$opt_r, "runtime-warning" => \$opt_r,
|
|
"L=s" => \$opt_L, "load-critical" => \$opt_L,
|
|
"l=s" => \$opt_l, "load-warning" => \$opt_l,
|
|
"H=s" => \$opt_H, "hostname=s" => \$opt_H);
|
|
|
|
if ($opt_V) {
|
|
print_revision($PROGNAME,'$Revision: 1771 $');
|
|
exit $ERRORS{'OK'};
|
|
}
|
|
|
|
if ($opt_h) {print_help(); exit $ERRORS{'OK'};}
|
|
|
|
($opt_H) || ($opt_H = shift) || usage("Host name/address not specified\n");
|
|
my $host = $1 if ($opt_H =~ /([-.A-Za-z0-9]+)/);
|
|
($host) || usage("Invalid host: $opt_H\n");
|
|
|
|
# Defaults
|
|
|
|
$opt_R *= 60 * 100 if (defined $opt_R); # Convert minutes to secs/100
|
|
$opt_r *= 60 * 100 if (defined $opt_R);
|
|
|
|
my $tempcrit = $opt_T || 60;
|
|
my $tempwarn = $opt_t || 40;
|
|
my $runtimecrit = $opt_R || 30 * 60 * 100; # Secs / 100
|
|
my $runtimewarn = $opt_r || 60 * 60 * 100;
|
|
my $loadcrit = $opt_L || 85;
|
|
my $loadwarn = $opt_l || 50;
|
|
|
|
if ($tempcrit !~ /\d+/) { usage ("Invalid critical temperature threshold.\n"); }
|
|
if ($tempwarn !~ /\d+/) { usage ("Invalid critical temperature threshold.\n"); }
|
|
|
|
if ($runtimecrit !~ /\d+/) {
|
|
usage ("Invalid critical run time threshold.\n");
|
|
}
|
|
if ($runtimewarn !~ /\d+/) {
|
|
usage ("Invalid warning run time threshold.\n");
|
|
}
|
|
|
|
if ($loadcrit !~ /\d+/ || $loadcrit < 0 || $loadcrit > 100) {
|
|
usage ("Invalid critical load threshold.\n");
|
|
}
|
|
if ($loadwarn !~ /\d+/ || $loadwarn < 0 || $loadwarn > 100) {
|
|
usage ("Invalid warning load threshold.\n");
|
|
}
|
|
|
|
|
|
# APC UPS OIDs
|
|
# APC MIBs are available at ftp://ftp.apcftp.com/software/pnetmib/mib
|
|
my $upsBasicOutputStatus = ".1.3.6.1.4.1.318.1.1.1.4.1.1.0";
|
|
my $upsBasicBatteryStatus = ".1.3.6.1.4.1.318.1.1.1.2.1.1.0";
|
|
my $upsAdvInputLineFailCause = ".1.3.6.1.4.1.318.1.1.1.3.2.5.0";
|
|
my $upsAdvBatteryTemperature = ".1.3.6.1.4.1.318.1.1.1.2.2.2.0";
|
|
my $upsAdvBatteryRunTimeRemaining = ".1.3.6.1.4.1.318.1.1.1.2.2.3.0";
|
|
my $upsAdvBatteryReplaceIndicator = ".1.3.6.1.4.1.318.1.1.1.2.2.4.0";
|
|
my $upsAdvOutputLoad = ".1.3.6.1.4.1.318.1.1.1.4.2.3.0";
|
|
my $upsAdvTestDiagnosticsResults = ".1.3.6.1.4.1.318.1.1.1.7.2.3.0";
|
|
|
|
my @outputStatVals = (
|
|
[ undef, undef ], # pad 0
|
|
[ undef, undef ], # pad 1
|
|
[ "onLine", $ERRORS{'OK'} ], # 2
|
|
[ "onBattery", $ERRORS{'WARNING'} ], # 3
|
|
[ "onSmartBoost", $ERRORS{'WARNING'} ], # 4
|
|
[ "timedSleeping", $ERRORS{'WARNING'} ], # 5
|
|
[ "softwareBypass", $ERRORS{'WARNING'} ], # 6
|
|
[ "off", $ERRORS{'CRITICAL'} ], # 7
|
|
[ "rebooting", $ERRORS{'WARNING'} ], # 8
|
|
[ "switchedBypass", $ERRORS{'WARNING'} ], # 9
|
|
[ "hardwareFailureBypass", $ERRORS{'CRITICAL'} ], # 10
|
|
[ "sleepingUntilPowerReturn", $ERRORS{'CRITICAL'} ], # 11
|
|
[ "onSmartTrim", $ERRORS{'WARNING'} ], # 12
|
|
);
|
|
|
|
my @failCauseVals = (
|
|
undef,
|
|
"noTransfer",
|
|
"highLineVoltage",
|
|
"brownout",
|
|
"blackout",
|
|
"smallMomentarySag",
|
|
"deepMomentarySag",
|
|
"smallMomentarySpike",
|
|
"largeMomentarySpike",
|
|
"selfTest",
|
|
"rateOfVoltageChnage",
|
|
);
|
|
|
|
my @battStatVals = (
|
|
[ undef, undef ], # pad 0
|
|
[ undef, undef ], # pad 1
|
|
[ "batteryNormal", $ERRORS{'OK'} ], # 2
|
|
[ "batteryLow", $ERRORS{'CRITICAL'} ], # 3
|
|
);
|
|
|
|
my @battReplVals = (
|
|
[ undef, undef ], # pad 0
|
|
[ "noBatteryNeedsReplacing", $ERRORS{'OK'} ], # 1
|
|
[ "batteryNeedsReplacing", $ERRORS{'CRITICAL'} ], # 2
|
|
);
|
|
|
|
my @diagnosticsResultsVals = (
|
|
[ undef, undef ], # pad 0
|
|
[ "OK", $ERRORS{'OK'} ], # 1
|
|
[ "failed", $ERRORS{'CRITICAL'} ], # 2
|
|
[ "invalidTest", $ERRORS{'CRITICAL'} ], # 3
|
|
[ "testInProgress", $ERRORS{'OK'} ], # 4
|
|
);
|
|
|
|
my $exitval = $ERRORS{'UNKNOWN'};
|
|
my $data;
|
|
my $onbattery = 3;
|
|
|
|
$data = get_snmp_int_val( $upsBasicOutputStatus );
|
|
|
|
print "Output status: ";
|
|
if (defined ($data) && defined ($outputStatVals[$data][0])) {
|
|
print "$outputStatVals[$data][0] | ";
|
|
escalate_exitval($outputStatVals[$data][1]);
|
|
} else {
|
|
print "unknown | ";
|
|
}
|
|
|
|
$data = get_snmp_int_val( $upsAdvBatteryRunTimeRemaining );
|
|
|
|
print "Rem time: ";
|
|
if (defined ($data)) {
|
|
my $hrs = int($data / (60 * 60 * 100)); # Data is hundredths of a second
|
|
my $mins = int($data / (60 * 100)) % 60;
|
|
my $secs = ($data % 100) / 100;
|
|
printf "%d:%02d:%05.2f | ", $hrs, $mins, $secs;
|
|
if ($data <= $runtimecrit) {
|
|
escalate_exitval($ERRORS{'CRITICAL'});
|
|
} elsif ($data <= $runtimewarn) {
|
|
escalate_exitval($ERRORS{'WARNING'});
|
|
} else {
|
|
escalate_exitval($ERRORS{'OK'});
|
|
}
|
|
} else {
|
|
print "unknown | ";
|
|
}
|
|
|
|
$data = get_snmp_int_val( $upsBasicBatteryStatus );
|
|
|
|
print "Battery status: ";
|
|
if (defined ($data) && defined ($battStatVals[$data][0])) {
|
|
my $failcause = "unknown";
|
|
my $fc = get_snmp_int_val( $upsAdvInputLineFailCause );
|
|
if ($data == $onbattery) {
|
|
if (defined ($failCauseVals[$fc])) { $failcause = $failCauseVals[$fc]; }
|
|
print "$battStatVals[$data][0] ($failcause) | ";
|
|
} else {
|
|
print "$battStatVals[$data][0] | ";
|
|
}
|
|
escalate_exitval($battStatVals[$data][1]);
|
|
} else {
|
|
print "unknown | ";
|
|
}
|
|
|
|
$data = get_snmp_int_val( $upsAdvBatteryTemperature );
|
|
|
|
print "Battery temp(C): ";
|
|
if (defined ($data)) {
|
|
print "$data | ";
|
|
if ($data >= $tempcrit) {
|
|
escalate_exitval($ERRORS{'CRITICAL'});
|
|
} elsif ($data >= $tempwarn) {
|
|
escalate_exitval($ERRORS{'WARNING'});
|
|
} else {
|
|
escalate_exitval($ERRORS{'OK'});
|
|
}
|
|
} else {
|
|
print "unknown | ";
|
|
}
|
|
|
|
$data = get_snmp_int_val( $upsAdvBatteryReplaceIndicator );
|
|
|
|
print "Battery repl: ";
|
|
if (defined ($data) && defined ($battReplVals[$data][0])) {
|
|
print "$battReplVals[$data][0] | ";
|
|
escalate_exitval($battReplVals[$data][1]);
|
|
} else {
|
|
print "unknown | ";
|
|
}
|
|
|
|
$data = get_snmp_int_val( $upsAdvOutputLoad );
|
|
|
|
print "Output load (%): ";
|
|
if (defined ($data)) {
|
|
print "$data | ";
|
|
if ($data >= $loadcrit) {
|
|
escalate_exitval($ERRORS{'CRITICAL'});
|
|
} elsif ($data >= $loadwarn) {
|
|
escalate_exitval($ERRORS{'WARNING'});
|
|
} else {
|
|
escalate_exitval($ERRORS{'OK'});
|
|
}
|
|
} else {
|
|
print "unknown | ";
|
|
}
|
|
|
|
$data = get_snmp_int_val( $upsAdvTestDiagnosticsResults );
|
|
|
|
print "Diag result: ";
|
|
if (defined ($data) && defined ($diagnosticsResultsVals[$data][0])) {
|
|
print "$diagnosticsResultsVals[$data][0]\n";
|
|
escalate_exitval($diagnosticsResultsVals[$data][1]);
|
|
} else {
|
|
print "unknown\n";
|
|
}
|
|
|
|
|
|
exit $exitval;
|
|
|
|
|
|
sub print_usage () {
|
|
print "Usage: $PROGNAME -H <host> -T temp -t temp -R minutes -r minutes\n";
|
|
print " -L percent -l percent\n";
|
|
}
|
|
|
|
sub print_help () {
|
|
print_revision($PROGNAME,'$Revision: 1771 $');
|
|
print "Copyright (c) 2001 Gerald Combs/Jeffrey Blank/Karl DeBisschop
|
|
|
|
This plugin reports the status of an APC UPS equipped with an SNMP management
|
|
module.
|
|
|
|
";
|
|
print_usage();
|
|
print "
|
|
-H, --hostname=HOST
|
|
Name or IP address of host to check
|
|
-T --temp-critical
|
|
Battery degrees C above which a CRITICAL status will result (default: 60)
|
|
-t --temp-warning
|
|
Battery degrees C above which a WARNING status will result (default: 40)
|
|
-R --runtime-critical
|
|
Minutes remaining below which a CRITICAL status will result (default: 30)
|
|
-r --runtime-warning
|
|
Minutes remaining below which a WARNING status will result (default: 60)
|
|
-L --load-critical
|
|
Output load pct above which a CRITICAL status will result (default: 85
|
|
-l --load-warning
|
|
Output load pct above which a WARNING status will result (default: 50
|
|
|
|
";
|
|
support();
|
|
}
|
|
|
|
sub get_snmp_int_val ($) {
|
|
my $val=0;
|
|
my $oid = shift(@_);
|
|
|
|
$val = `/usr/bin/snmpget $host public $oid 2> /dev/null`;
|
|
my @test = split(/ /,$val,3);
|
|
|
|
return undef unless (defined ($test[2]));
|
|
|
|
if ($test[2] =~ /\(\d+\)/) { # Later versions of UCD SNMP
|
|
($val) = ($test[2] =~ /\((\d+)\)/);
|
|
} elsif ($test[2] =~ /: \d+/) {
|
|
($val) = ($test[2] =~ /: (\d+)/);
|
|
} else {
|
|
$val = $test[2];
|
|
}
|
|
|
|
return $val;
|
|
}
|
|
|
|
sub escalate_exitval ($) {
|
|
my $newval = shift(@_);
|
|
|
|
if ($newval > $exitval) { $exitval = $newval; }
|
|
}
|