667 lines
18 KiB
Perl
667 lines
18 KiB
Perl
|
#!/usr/bin/perl -w
|
||
|
|
||
|
# check_status.pl Nagios Plugin - Version 1.3
|
||
|
# Last Updated: 1/9/2003
|
||
|
#
|
||
|
# Report any bugs/questions to Russell Scibetti at russell@quadrix.com
|
||
|
#
|
||
|
# check_status Change Log:
|
||
|
#
|
||
|
# To do for 1.4
|
||
|
# - Better help and documentation (separate doc?)
|
||
|
# - Take argument (patterns to match) from a separate spec file
|
||
|
#
|
||
|
# New Addition to 1.3
|
||
|
# - Added ChangeLog information and updated --help output
|
||
|
# - hostdown (hd) argument for how a service check should respond
|
||
|
# when its host is Down/Unreachable
|
||
|
# (--hostdown="ok|warning|critical|unknown")
|
||
|
# - Changed name from check_state to check_status
|
||
|
# - Set hostdown to default to OK when the argument isn't specified
|
||
|
# - Number of Hosts checked is now output in OK result
|
||
|
#
|
||
|
# Version 1.2 additions:
|
||
|
#
|
||
|
# - Added ability to handle ack'd and downtimed services differently
|
||
|
# depending on argument provided
|
||
|
# (--ack="ok|warning|critical|unknown|down|unreachable"
|
||
|
# --dt="ok|warning|critical|unknown|down|unreachable")
|
||
|
#
|
||
|
# Version 1.1 additions:
|
||
|
#
|
||
|
# - Added --host=<regex>, --servhost=<regex> to allow for specific field
|
||
|
# matching (host for matching hostname in host checks, servhost for
|
||
|
# matching the hostname in service checks, service for matching the
|
||
|
# service name in service checks)
|
||
|
# - Output the number of OK services for an OK output
|
||
|
#
|
||
|
# Version 1.0 features:
|
||
|
#
|
||
|
# - Freshness check of status.log (timestamp)
|
||
|
# - Match service or host checks
|
||
|
# - Can ignore acknowledged or downtimes services/hosts (--ack, --dt)
|
||
|
# - Can output different levels of detail dependent on # of problems
|
||
|
# - Can check for number of critical, warning, or unknowns
|
||
|
#
|
||
|
#############################################################
|
||
|
|
||
|
use Getopt::Long;
|
||
|
use File::stat;
|
||
|
|
||
|
Getopt::Long::Configure('bundling');
|
||
|
|
||
|
GetOptions
|
||
|
("V" => \$version, "version" => \$version,
|
||
|
"h" => \$help, "help" => \$help,
|
||
|
"v" => \$verbose, "verbose" => \$verbose,
|
||
|
"w=s" => \$warning, "warning=s" => \$warning,
|
||
|
"c=s" => \$critical, "critical=s" => \$critical,
|
||
|
"u=s" => \$unknown, "unknown=s" => \$unknown,
|
||
|
"p=s" => \$pattern, "pattern=s" => \$pattern,
|
||
|
"S:s" => \$service, "service:s" => \$service,
|
||
|
"s=s" => \$status, "status=s" => \$status,
|
||
|
"d=s" => \$dir, "dir=s" => \$dir,
|
||
|
"D=s" => \$details, "details=s" => \$details,
|
||
|
"H:s" => \$host, "host:s" => \$host,
|
||
|
"f=s" => \$freshness, "freshness=s" => \$freshness,
|
||
|
"servhost=s" => \$servhost,
|
||
|
"a:s" => \$ack, "ack:s" => \$ack,
|
||
|
"dt:s"=> \$dt, "downtime:s" => \$dt,
|
||
|
"hd:s"=> \$hdown, "hostdown:s" => \$hdown,
|
||
|
"ok" => \$ok);
|
||
|
|
||
|
#Constants:
|
||
|
my $OK = 0;
|
||
|
my $WARNING = 1;
|
||
|
my $CRITICAL = 2;
|
||
|
my $UNKNOWN = 3;
|
||
|
|
||
|
my $crit="CRITICAL";
|
||
|
my $warn="WARNING";
|
||
|
my $unk="UNKNOWN";
|
||
|
my $down="DOWN";
|
||
|
my $unreach="UNREACHABLE";
|
||
|
|
||
|
# Print out Help information
|
||
|
if ($help) {
|
||
|
printVersion();
|
||
|
printHelp();
|
||
|
exitcheck($UNKNOWN);
|
||
|
}
|
||
|
|
||
|
# Print out version information
|
||
|
if ($version) {
|
||
|
printVersion();
|
||
|
exitcheck($UNKNOWN);
|
||
|
}
|
||
|
|
||
|
# Check for status log or directory argument or print usage
|
||
|
if (!$status) {
|
||
|
if (!$dir) {
|
||
|
print "Usage: $0 -s <status file> | -d <Nagios log dir>\n";
|
||
|
print "Use the --help option for full list of arguments\n";
|
||
|
exitcheck($UNKNOWN);
|
||
|
}
|
||
|
elsif ($dir =~ m#[^/]/$#) {
|
||
|
$status = $dir . "status.log";
|
||
|
}
|
||
|
else {
|
||
|
$status = $dir . "/status.log";
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (defined $host) {
|
||
|
if (!$host) {
|
||
|
$host="[^\\s]*";
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (!$host && !$servhost) {
|
||
|
$servhost="[^\\s]*";
|
||
|
}
|
||
|
|
||
|
if (!$host && !$service) {
|
||
|
$service="[^\\s]*";
|
||
|
}
|
||
|
|
||
|
if (defined $ack) {
|
||
|
if (!$ack) {
|
||
|
$ack="ok";
|
||
|
}
|
||
|
elsif (!($ack =~ "ok|critical|warning|unknown|down|unreachable")) {
|
||
|
print "Invalid value for ack\n";
|
||
|
exitcheck($UNKNOWN);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (defined $dt) {
|
||
|
if (!$dt) {
|
||
|
$dt="ok";
|
||
|
}
|
||
|
elsif (!($dt =~ "ok|critical|warning|unknown|down|unreachable")) {
|
||
|
print "Invalid value for dt\n";
|
||
|
exitcheck($UNKNOWN);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (defined $hdown) {
|
||
|
if (!$hdown) {
|
||
|
$hdown="ok";
|
||
|
}
|
||
|
elsif (!($hdown =~ "ok|critical|warning|unknown|down|unreachable")) {
|
||
|
print "Invalid value for hostdown\n";
|
||
|
exitcheck($UNKNOWN);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
my $much_details = 0;
|
||
|
|
||
|
my $ServiceNotOK = "CRITICAL|WARNING|UNKNOWN";
|
||
|
my $HostNotOK = "DOWN|UNREACHABLE";
|
||
|
|
||
|
my %numprob = ("WARNING",0,"CRITICAL",0,"UNKNOWN",0,"DOWN",0,"UNREACHABLE",0);
|
||
|
|
||
|
my $CritOnly = 0;
|
||
|
my $WarnOnly = 0;
|
||
|
my $UnkOnly = 0;
|
||
|
|
||
|
my @wlev;
|
||
|
my @clev;
|
||
|
my @ulev;
|
||
|
my %warnlevel = ("WARNING",0,"CRITICAL",0,"UNKNOWN",0);
|
||
|
my %critlevel = ("WARNING",0,"CRITICAL",0,"UNKNOWN",0);
|
||
|
my %unklevel = ("WARNING",0,"CRITICAL",0,"UNKNOWN",0);
|
||
|
my %hostlevel = ("DOWN",0,"UNREACHABLE",0);
|
||
|
|
||
|
# Store Hosts in downtime
|
||
|
my @hostdowntime;
|
||
|
my $numdowntime = 0;
|
||
|
|
||
|
# Store Hosts in a Down/Unreachable state
|
||
|
my @hostdown;
|
||
|
my $numdown = 0;
|
||
|
|
||
|
# Hash for storing state-change to OK times for hosts:
|
||
|
my %hostoktimes;
|
||
|
|
||
|
# Number of matches in parsing
|
||
|
my $nummatch = 0;
|
||
|
|
||
|
if ($warning) {
|
||
|
if ($warning =~ /,/) {
|
||
|
@wlev = split /,/,$warning;
|
||
|
$warnlevel{"WARNING"} = $wlev[0];
|
||
|
$warnlevel{"CRITICAL"} = $wlev[1];
|
||
|
if ($wlev[2] ) {
|
||
|
$warnlevel{"UNKNOWN"} = $wlev[2];
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
$WarnOnly = $warning;
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
$WarnOnly = 1;
|
||
|
}
|
||
|
|
||
|
if ($critical) {
|
||
|
if ($critical =~ /,/) {
|
||
|
@clev = split /,/,$critical;
|
||
|
$critlevel{"WARNING"} = $clev[0];
|
||
|
$critlevel{"CRITICAL"} = $clev[1];
|
||
|
if ($clev[2] ) {
|
||
|
$critlevel{"UNKNOWN"} = $clev[2];
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
$CritOnly = $critical;
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
$CritOnly = 1;
|
||
|
}
|
||
|
|
||
|
if ($unknown) {
|
||
|
if ($unknown =~ /,/) {
|
||
|
@ulev = split /,/,$unknown;
|
||
|
$unklevel{"WARNING"} = $ulev[0];
|
||
|
$unklevel{"CRITICAL"} = $ulev[1];
|
||
|
if ($ulev[2] ) {
|
||
|
$unklevel{"UNKNOWN"} = $ulev[2];
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
$UnkOnly = $unknown;
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
$UnkOnly = 1;
|
||
|
}
|
||
|
|
||
|
|
||
|
if (!$freshness) {
|
||
|
$freshness = 30 * 60;
|
||
|
}
|
||
|
else {
|
||
|
$freshness = $freshness * 60;
|
||
|
}
|
||
|
|
||
|
my %ct = ("CRITICAL",0,"WARNING",0,"UNKNOWN",0,"DOWN",0,"UNREACHABLE",0);
|
||
|
my %much_ct = ("CRITICAL",0,"WARNING",0,"UNKNOWN",0,"DOWN",0,"UNREACHABLE",0);
|
||
|
|
||
|
my %output = ("CRITICAL","","WARNING","","UNKNOWN","","DOWN","","UNREACHABLE","");
|
||
|
my %much_output = ("CRITICAL","","WARNING","","UNKNOWN","","DOWN","","UNREACHABLE","");
|
||
|
|
||
|
if ($details) {
|
||
|
if ($details =~ /,/) {
|
||
|
my @tempv = split /,/,$details;
|
||
|
$much_details = $tempv[0];
|
||
|
$details = $tempv[1];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
open("sta","$status") || die "Cannot open status file $status!";
|
||
|
|
||
|
$curr_time = time;
|
||
|
$file_time = stat($status)->mtime;
|
||
|
|
||
|
if ($curr_time - $file_time > $freshness) {
|
||
|
printf "State CRITICAL - Status file is stale!!!\n";
|
||
|
exitcheck($CRITICAL);
|
||
|
}
|
||
|
|
||
|
while(<sta>) {
|
||
|
chomp;
|
||
|
if (/^[^\s]+[\s]+HOST;/) {
|
||
|
@hdata = split /;/,$_;
|
||
|
|
||
|
# If you care about matching hosts (not services):
|
||
|
if ($host && $hdata[1] =~ /$host/) {
|
||
|
$nummatch++;
|
||
|
if ( $hdata[2] =~ /$HostNotOK/ ) {
|
||
|
addproblem($_,$hdata[2]);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
# If you are matching services, gather host information:
|
||
|
else {
|
||
|
if ( $hdata[2] =~ /$HostNotOK/ ) {
|
||
|
$hostdown[$numdown] = $hdata[1];
|
||
|
$numdown++;
|
||
|
}
|
||
|
else {
|
||
|
$hostoktimes{$hdata[1]} = $hdata[4];
|
||
|
}
|
||
|
if ( $hdata[17] ne "0" ) {
|
||
|
$hostdowntime[$numdowntime] = $hdata[1];
|
||
|
$numdowntime++;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
elsif (!$host && /^[^\s]+[\s]+SERVICE;/) {
|
||
|
@servdata = split /;/,$_;
|
||
|
if ( ( $pattern && ($_ =~ /$pattern/)) ||
|
||
|
(($servdata[1] =~ /$servhost/) && ($servdata[2] =~ /$service/)) ){
|
||
|
$nummatch++;
|
||
|
if (($servdata[5] eq "HARD") && ($servdata[3] =~ /$ServiceNotOK/)) {
|
||
|
addproblem($_,$servdata[3]);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
close("sta");
|
||
|
|
||
|
if ($nummatch==0) {
|
||
|
print "Nothing Matches your criteria!\n";
|
||
|
exitcheck($UNKNOWN);
|
||
|
}
|
||
|
|
||
|
# Count the number of problems (for reference):
|
||
|
if ($host) {
|
||
|
$total = $numprob{"DOWN"} + $numprob{"UNREACHABLE"};
|
||
|
}
|
||
|
else {
|
||
|
$total = $numprob{"WARNING"} + $numprob{"CRITICAL"} + $numprob{"UNKNOWN"};
|
||
|
}
|
||
|
|
||
|
my $numok = $nummatch - $total;
|
||
|
|
||
|
# If this is a host state check:
|
||
|
if ($host) {
|
||
|
if ($numprob{"DOWN"}>0 || $numprob{"UNREACHABLE"}>0 ) {
|
||
|
if ($details && ($total <= $details)) {
|
||
|
print "State CRITICAL - $total Host Problems: $output{$down} $output{$unreach}\n";
|
||
|
exitcheck($CRITICAL);
|
||
|
}
|
||
|
else {
|
||
|
print "State CRITICAL - $numprob{$down} Hosts Down, $numprob{$unreach} Hosts Unreachable\n";
|
||
|
exitcheck($CRITICAL);
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
print "State OK - $numok Hosts Up, $total Problems\n";
|
||
|
exitcheck($OK);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#If you only defined a Critical level in terms of # of criticals...
|
||
|
elsif ($CritOnly && ($numprob{"CRITICAL"} >= $CritOnly)) {
|
||
|
countAndPrint($crit,$numprob{$crit},0);
|
||
|
exitcheck($CRITICAL);
|
||
|
}
|
||
|
|
||
|
#Critical in terms on # criticals and # warnings...
|
||
|
elsif (!$CritOnly && ($numprob{"WARNING"} >= $critlevel{"WARNING"} ||
|
||
|
$numprob{"CRITICAL"} >= $critlevel{"CRITICAL"} ||
|
||
|
$numprob{"UNKNOWN"} >= $critlevel{"UNKNOWN"} )) {
|
||
|
countAndPrint($crit,$total,1);
|
||
|
exitcheck($CRITICAL);
|
||
|
}
|
||
|
|
||
|
#Warning in terms of # warnings only...
|
||
|
elsif ($WarnOnly && ($numprob{"WARNING"} >= $WarnOnly)) {
|
||
|
countAndPrint($warn,$numprob{$warn},0);
|
||
|
exitcheck($WARNING);
|
||
|
}
|
||
|
|
||
|
#Warning in terms of # warnings and # criticals...
|
||
|
elsif (!$WarnOnly && ($numprob{"WARNING"} >= $warnlevel{"WARNING"} ||
|
||
|
$numprob{"CRITICAL"} >= $warnlevel{"CRITICAL"} ||
|
||
|
$numprob{"UNKNOWN"} >= $warnlevel{"UNKNOWN"})) {
|
||
|
countAndPrint($warn,$total,1);
|
||
|
exitcheck($WARNING);
|
||
|
}
|
||
|
|
||
|
#Unknown in terms on # unknown only...
|
||
|
elsif ( $UnkOnly && ($numprob{"UNKNOWN"}>=$UnkOnly) ) {
|
||
|
countAndPrint($unk,$numprob{$unk},0);
|
||
|
exitcheck($UNKNOWN);
|
||
|
}
|
||
|
|
||
|
#Unknown in terms of # warning, critical, and unknown...
|
||
|
elsif (!$UnkOnly && ($numprob{"WARNING"} >= $unklevel{"WARNING"} ||
|
||
|
$numprob{"CRITICAL"} >= $unklevel{"CRITICAL"} ||
|
||
|
$numprob{"UNKNOWN"} >= $unklevel{"UNKNOWN"})) {
|
||
|
countAndPrint($unk,$total,1);
|
||
|
exitcheck($UNKNOWN);
|
||
|
}
|
||
|
|
||
|
# Everything is OK!
|
||
|
else {
|
||
|
print "State OK - $numok OK, $total problems\n";
|
||
|
exitcheck($OK);
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
############################
|
||
|
# Subroutines
|
||
|
############################
|
||
|
|
||
|
# Return the proper exit code for Critical, Warning, Unknown, or OK
|
||
|
sub exitcheck {
|
||
|
if ($ok) {
|
||
|
exit 0;
|
||
|
}
|
||
|
else {
|
||
|
exit $_[0];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
# Decide what to print for services:
|
||
|
sub countAndPrint {
|
||
|
my $state = $_[0];
|
||
|
my $count = $_[1];
|
||
|
my $alltypes = $_[2];
|
||
|
my $output = "State $state - ";
|
||
|
|
||
|
if ($details) {
|
||
|
if ($count<=$much_details) {
|
||
|
if ($alltypes) {
|
||
|
$output .= "$count problems: $much_output{$crit} $much_output{$warn} $much_output{$unk}";
|
||
|
}
|
||
|
else {
|
||
|
$output .= "$count \L$state\E: $much_output{$state}";
|
||
|
}
|
||
|
}
|
||
|
elsif ($count<=$details) {
|
||
|
if ($alltypes) {
|
||
|
$output .= "$count problems: $output{$crit} $output{$warn} $output{$unk}";
|
||
|
}
|
||
|
else {
|
||
|
$output .= "$count \L$state\E: $output{$state}";
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
if ($alltypes) {
|
||
|
$output .= "$numprob{$crit} critical, $numprob{$warn} warning, $numprob{$unk} unknown";
|
||
|
}
|
||
|
else {
|
||
|
$output .= "$count \L$state\E";
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
$output .= "$count problems";
|
||
|
}
|
||
|
|
||
|
print "$output\n";
|
||
|
}
|
||
|
|
||
|
|
||
|
# Add-in the problem found in the status log
|
||
|
sub addproblem {
|
||
|
|
||
|
$test = 1;
|
||
|
$type = $_[1];
|
||
|
my $diffout = "";
|
||
|
|
||
|
my @values = split /;/,$_[0];
|
||
|
|
||
|
if (!$host) {
|
||
|
my $namehold = $values[1];
|
||
|
if ($ack && ($values[13] eq "1")) {
|
||
|
if ($ack =~ "ok") {
|
||
|
$test = 0;
|
||
|
}
|
||
|
else {
|
||
|
$type = "\U$ack";
|
||
|
}
|
||
|
}
|
||
|
elsif ($hdown && grep /$namehold/, @hostdown) {
|
||
|
if ($hdown =~ "ok") {
|
||
|
$test = 0;
|
||
|
}
|
||
|
else {
|
||
|
$type = "\U$hdown";
|
||
|
$diffout = "$values[1] is down";
|
||
|
}
|
||
|
}
|
||
|
elsif ($dt && (($values[27] ne "0") || (grep /$namehold/, @hostdowntime))){
|
||
|
if ($dt =~ "ok") {
|
||
|
$test = 0;
|
||
|
}
|
||
|
else {
|
||
|
$type = "\U$dt";
|
||
|
}
|
||
|
}
|
||
|
elsif (exists $hostoktimes{$namehold}) {
|
||
|
# If the state change time of the host is more recent than the last
|
||
|
# service check, must wait until the next service check runs!
|
||
|
if ($hostoktimes{$namehold} > $values[6]) {
|
||
|
$test = 0;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
if ($ack && $values[5]) {
|
||
|
if ($ack =~ "ok") {
|
||
|
$test = 0;
|
||
|
}
|
||
|
else {
|
||
|
$type = "\U$ack";
|
||
|
}
|
||
|
}
|
||
|
elsif ($dt && ($values[17] ne "0")) {
|
||
|
if ($dt =~ "ok") {
|
||
|
$test = 0;
|
||
|
}
|
||
|
else {
|
||
|
$type = "\U$dt";
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if ($details && $test) {
|
||
|
if (!$host) {
|
||
|
if ($diffout) {
|
||
|
$much_output{$type} .= " $diffout;";
|
||
|
$output{$type} .= "$diffout;";
|
||
|
$much_ct{$type}++;
|
||
|
$ct{$type}++;
|
||
|
}
|
||
|
else {
|
||
|
if ($much_details && $much_ct{$type}<$much_details) {
|
||
|
$much_output{$type} .= " $values[2] on $values[1] $values[31];";
|
||
|
$much_ct{$type}++;
|
||
|
}
|
||
|
if ($ct{$type} < $details) {
|
||
|
$output{$type} .= " $values[2] on $values[1];";
|
||
|
$ct{$type}++;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
$much_output{$type} .= " $values[1] $_[1] $values[20],";
|
||
|
$much_ct{type}++;
|
||
|
$output{$type} .= " $values[1] HOST $_[1],";
|
||
|
$ct{$type}++;
|
||
|
}
|
||
|
}
|
||
|
if ($test) {
|
||
|
$numprob{$type}++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
################################
|
||
|
#
|
||
|
# Version and Help Information
|
||
|
#
|
||
|
################################
|
||
|
|
||
|
sub printVersion {
|
||
|
printf <<EndVersion;
|
||
|
$0 (nagios-plugins) 1.3
|
||
|
The nagios plugins come with ABSOLUTELY NO WARRANTY. You may redistribute
|
||
|
copies of the plugins under the terms of the GNU General Public License.
|
||
|
For more information about these matters, see the file named COPYING.
|
||
|
EndVersion
|
||
|
}
|
||
|
|
||
|
sub printHelp {
|
||
|
printf <<EOF;
|
||
|
|
||
|
This plugin parses through the Nagios status log and will return a
|
||
|
Critical, Warning, or Unknown state depending on the number of
|
||
|
Critical, Warning, and/or Unknown services found in the log
|
||
|
(or Down/Unreachable hosts when matching against hosts)
|
||
|
|
||
|
Usage: $0 -s <Status File> | -d <Nagios Log Directory>
|
||
|
[-w #[,#][,#]] [-c #[,#][,#]] [-u #[,#][,#]]
|
||
|
[--service=<RegEx> | --servhost=<RegEx> | --pattern=<RegEx> |
|
||
|
--host | --host=<RegEx>]
|
||
|
[--ack[=string]] [--dt[=string]] [--hostdown[=string]]
|
||
|
[-D #[,#]] [--ok] [-f <Log freshness in # minutes>]
|
||
|
$0 --help
|
||
|
$0 --version
|
||
|
NOTE: One of -s and -d must be specified
|
||
|
|
||
|
Options:
|
||
|
-s, --status=FILE_NAME
|
||
|
Location and name of status log (e.g. /usr/local/nagios/var/status.log)
|
||
|
-d, --dir=DIRECTORY_NAME
|
||
|
Directory that contains the nagios logs (e.g. /usr/local/nagios/var/)
|
||
|
-w, --warning=INTEGER[,INTEGER][,INTEGER]
|
||
|
#: Number of warnings to result in a WARNING state
|
||
|
OR
|
||
|
#,#: Warning,Criticals to result in a WARNING state
|
||
|
OR
|
||
|
#,#,#: Warning,Critical,Unknown to result in a WARNING state
|
||
|
Default: -w=1
|
||
|
-c, --critical=INTEGER[,INTEGER][,INTEGER]
|
||
|
#: Number of criticals to result in a CRITICAL state
|
||
|
OR
|
||
|
#,#: Warning,Criticals to result in a CRITICAL state
|
||
|
OR
|
||
|
#,#,#: Warning,Critical,Unknown to result in a CRITICAL state
|
||
|
Default: -c=1
|
||
|
-u, --unknown=INTEGER[,INTEGER][,INTEGER]
|
||
|
#: Number of unknowns to result in a UNKNOWN state
|
||
|
OR
|
||
|
#,#: Warning,Criticals to result in a UNKNOWN state
|
||
|
OR
|
||
|
#,#,#: Warning,Critical,Unknown to result in a UNKNOWN state
|
||
|
Default: -u=1
|
||
|
-r, --service[=REGEX]
|
||
|
Only match services [that match the RegEx]
|
||
|
(--service is default setting if no other matching arguments provided)
|
||
|
--servhost=REGEX
|
||
|
Only match services whose host match the RegEx
|
||
|
-p, --pattern=REGEX
|
||
|
Only parse for this regular expression (services only, not hosts)
|
||
|
--host[=REGEX]
|
||
|
Report on the state of hosts (whose name matches the RegEx if provided)
|
||
|
-a, --ack[=ok|warning|critical|unknown|down|unreachable]
|
||
|
Handle Acknowledged problems [--ack defaults to ok]
|
||
|
--dt, --downtime[=ok|warning|critical|unknown|down|unreachable]
|
||
|
Handle problems in scheduled downtime [--dt defaults to ok]
|
||
|
--hd, --hostdown[=ok|warning|critical|unknown|down|unreachable]
|
||
|
Handle services whose Host is down [--hd defaults to ok]
|
||
|
-D, --details=INTEGER[,INTEGER]
|
||
|
Amount of verbosity to output
|
||
|
If # problems:
|
||
|
<= 1st integer, return full details (each plugin's output)
|
||
|
<= 2nd integer, return some details (list each service host pair)
|
||
|
> 2nd integer, return the # of problems
|
||
|
-f, --freshness=INTEGER
|
||
|
Number of minutes old the log can be to make sure Nagios is running
|
||
|
(Default = 30 minutes)
|
||
|
--ok
|
||
|
Return an OK exit code, regardless of number of problems found
|
||
|
-h, --help
|
||
|
Print detailed help screen
|
||
|
-V, --version
|
||
|
Print version information
|
||
|
|
||
|
For service checking (use --service and/or --servhost):
|
||
|
1. The values of warning, critical, and unknown default to 1, i.e.
|
||
|
$0 will return CRITICAL if there is at least 1 critical service,
|
||
|
WARNING if there is at least 1 warning service, and UNKNOWN if there is
|
||
|
at least one unknown service.
|
||
|
|
||
|
2. If a service's host is DOWN or UNREACHABLE, $0 will use the
|
||
|
value of --hostdown to determine how to treat the service. Without that
|
||
|
argument, $0 will count the service as OK.
|
||
|
|
||
|
3. If a service's host is OK, but the last host-state change occurred more
|
||
|
recently than the last service check, $0 will ignore that service
|
||
|
(want to wait until the service has been checked after a host has recovered
|
||
|
or you may get service alert for services that still need to be checked)
|
||
|
|
||
|
4. If the --dt, --ack, or --hd tags are used, $0 will use the value
|
||
|
of the arguments to determine how to handle services in downtime, acknowledged,
|
||
|
or with down hosts (default=OK). For service checks, --dt will also check
|
||
|
if the service's host is in a downtime.
|
||
|
|
||
|
For host checking (use --host):
|
||
|
1. Using the --host argument, $0 will look for DOWN and UNREACHABLE
|
||
|
hosts. If any are found, $0 will return a CRITICAL. You can provide
|
||
|
an REGEX for --host to only check hosts with matching host names.
|
||
|
|
||
|
2. If the --dt or --ack tags are used, $0 will use the value of the
|
||
|
--dt/--ack arguments to determine the state of the host (default is OK)
|
||
|
|
||
|
EOF
|
||
|
}
|