diff --git a/check_smart/check_smart b/check_smart/check_smart index 5eff58e..7a392d6 100644 --- a/check_smart/check_smart +++ b/check_smart/check_smart @@ -53,23 +53,25 @@ # Dec 10, 2021: Claudio Kuenzler - Sec fix in path for pseudo-devices, add Erase_Fail_Count_Total, fix NVMe perfdata (6.12.0) # Dec 10, 2021: Claudio Kuenzler - Bugfix in interface handling (6.12.1) # Dec 16, 2021: Lorenz Kaestle - Bugfix when interface parameter was missing in combination with -g (6.12.2) +# Apr 27, 2022: Claudio Kuenzler - Allow skip temperature check (--skip-temp-check) (6.13.0) +# Apr 27, 2022: Peter Newman - Better handling of missing or non-executable smartctl command (6.13.0) use strict; use Getopt::Long; use File::Basename qw(basename); my $basename = basename($0); -my $revision = '6.12.2'; +my $revision = '6.13.0'; # Standard Nagios return codes my %ERRORS=('OK'=>0,'WARNING'=>1,'CRITICAL'=>2,'UNKNOWN'=>3,'DEPENDENT'=>4); -$ENV{'PATH'}='/bin:/usr/bin:/sbin:/usr/sbin:/usr/local/bin:/usr/local/sbin'; +my @sys_path = qw(/usr/bin /bin /usr/sbin /sbin /usr/local/bin /usr/local/sbin); $ENV{'BASH_ENV'}=''; $ENV{'ENV'}=''; -use vars qw($opt_b $opt_d $opt_g $opt_debug $opt_h $opt_i $opt_e $opt_E $opt_r $opt_s $opt_v $opt_w $opt_q $opt_l $opt_skip_sa); +use vars qw($opt_b $opt_d $opt_g $opt_debug $opt_h $opt_i $opt_e $opt_E $opt_r $opt_s $opt_v $opt_w $opt_q $opt_l $opt_skip_sa $opt_skip_temp); Getopt::Long::Configure('bundling'); GetOptions( "debug" => \$opt_debug, @@ -87,6 +89,7 @@ GetOptions( "w=s" => \$opt_w, "warn=s" => \$opt_w, "l" => \$opt_l, "ssd-lifetime" => \$opt_l, "skip-self-assessment" => \$opt_skip_sa, + "skip-temp-check" => \$opt_skip_temp, ); if ($opt_v) { @@ -102,7 +105,7 @@ if ($opt_h) { my ($device, $interface) = qw// // ''; if ($opt_d || $opt_g ) { unless($opt_i){ - print "must specify an interface for $opt_d using -i/--interface!\n\n" if $opt_d; + print "must specify an interface for $opt_d using -i/--interface!\n\n" if $opt_d; print "must specify an interface for $opt_g using -i/--interface!\n\n" if $opt_g; print_help(); exit $ERRORS{'UNKNOWN'}; @@ -181,8 +184,19 @@ if ($device eq "") { exit $ERRORS{'UNKNOWN'}; } +my $smart_command = undef; +foreach my $path (@sys_path) { + if (-x "$path/smartctl") { + $smart_command = "sudo $path/smartctl"; + last; + } +} + +if (!defined($smart_command)) { + print "UNKNOWN - Could not find executable smartctl in " . join(", ", @sys_path) . "\n"; + exit $ERRORS{'UNKNOWN'}; +} -my $smart_command = 'sudo smartctl'; my $exit_status = 'OK'; my $exit_status_local = 'OK'; my $status_string = ''; @@ -670,10 +684,12 @@ foreach $device ( split("\\|",$device) ){ if($current_temperature){ if($max_temperature){ push (@perfdata, "temperature=$current_temperature;;$max_temperature") if $opt_d; - if($current_temperature > $max_temperature){ - warn "(debug) Disk temperature is greater than max ($current_temperature > $max_temperature)\n\n" if $opt_debug; - push(@error_messages, 'Disk temperature is higher than maximum'); - escalate_status('CRITICAL'); + unless($opt_skip_temp) { + if($current_temperature > $max_temperature){ + warn "(debug) Disk temperature is greater than max ($current_temperature > $max_temperature)\n\n" if $opt_debug; + push(@error_messages, 'Disk temperature is higher than maximum'); + escalate_status('CRITICAL'); + } } } else{ @@ -793,6 +809,7 @@ sub print_help { print " -d/--device: a physical block device to be SMART monitored, eg /dev/sda. Pseudo-device /dev/bus/N is allowed.\n"; print " -g/--global: a glob pattern name of physical devices to be SMART monitored\n"; print " Example: '/dev/sd[a-z]' will search for all /dev/sda until /dev/sdz devices and report errors globally.\n"; + print " Example: '/dev/sd*[a-z]' will search for all /dev/sda until /dev/sdzzzz etc devices and report errors globally.\n"; print " It is also possible to use -g in conjunction with megaraid devices. Example: -i 'megaraid,[0-3]'.\n"; print " Does not output performance data for historical value graphing.\n"; print "Note that -g only works with a fixed interface (e.g. scsi, ata) and megaraid,N.\n"; @@ -810,6 +827,7 @@ sub print_help { print " -s/--selftest: Enable self-test log check\n"; print " -l/--ssd-lifetime: Check attribute 'Percent_Lifetime_Remain' available on some SSD drives\n"; print " --skip-self-assessment: Skip SMART self-assessment health status check\n"; + print " --skip-temp-check: Skip temperature comparison current vs. drive max temperature\n"; print " -h/--help: this help\n"; print " -q/--quiet: When faults detected, only show faulted drive(s) (only affects output when used with -g parameter)\n"; print " --debug: show debugging information\n"; diff --git a/check_smart/control b/check_smart/control index b48f60a..ef7686d 100644 --- a/check_smart/control +++ b/check_smart/control @@ -1,6 +1,6 @@ Homepage: https://github.com/Napsty/check_smart Uploaders: Jan Wagner Watch: https://raw.githubusercontent.com/Napsty/check_smart/master/check_smart.pl \$revision\ =\ '([0-9.]+)' -Version: 6.12.2 +Version: 6.13.0 Description: plugin to check SMART status of ATA/SCSI/NVMe drives, returning any usable metrics as perfdata. Recommends: perl-modules, monitoring-plugins-common | nagios-plugins-common