#!/usr/bin/perl -w ############################## check_snmp_process ############## # Version : 1.4 # Date : March 12 2007 # Author : Patrick Proy (patrick at proy.org) # Help : http://nagios.manubulon.com # Licence : GPL - http://www.fsf.org/licenses/gpl.txt # Contrib : Makina Corpus # TODO : put $o_delta as an option # Contrib : ############################################################### # # help : ./check_snmp_process -h ############### BASE DIRECTORY FOR TEMP FILE ######## my $o_base_dir="/tmp/tmp_Nagios_proc."; my $file_history=200; # number of data to keep in files. my $delta_of_time_to_make_average=300; # 5minutes by default use strict; use Net::SNMP; use Getopt::Long; # Nagios specific use lib "/usr/local/nagios/libexec"; use utils qw(%ERRORS $TIMEOUT); #my $TIMEOUT = 5; #my %ERRORS=('OK'=>0,'WARNING'=>1,'CRITICAL'=>2,'UNKNOWN'=>3,'DEPENDENT'=>4); # SNMP Datas my $process_table= '1.3.6.1.2.1.25.4.2.1'; my $index_table = '1.3.6.1.2.1.25.4.2.1.1'; my $run_name_table = '1.3.6.1.2.1.25.4.2.1.2'; my $run_path_table = '1.3.6.1.2.1.25.4.2.1.4'; my $proc_mem_table = '1.3.6.1.2.1.25.5.1.1.2'; # Kbytes my $proc_cpu_table = '1.3.6.1.2.1.25.5.1.1.1'; # Centi sec of CPU my $proc_run_state = '1.3.6.1.2.1.25.4.2.1.7'; # Globals my $Version='1.4'; my $o_host = undef; # hostname my $o_community =undef; # community my $o_port = 161; # port my $o_version2 = undef; #use snmp v2c my $o_descr = undef; # description filter my $o_warn = 0; # warning limit my @o_warnL= undef; # warning limits (min,max) my $o_crit= 0; # critical limit my @o_critL= undef; # critical limits (min,max) my $o_help= undef; # wan't some help ? my $o_verb= undef; # verbose mode my $o_version= undef; # print version my $o_noreg= undef; # Do not use Regexp for name my $o_path= undef; # check path instead of name my $o_inverse= undef; # checks max instead of min number of process my $o_get_all= undef; # get all tables at once my $o_timeout= 5; # Default 5s Timeout # SNMP V3 specific my $o_login= undef; # snmp v3 login my $o_passwd= undef; # snmp v3 passwd my $v3protocols=undef; # V3 protocol list. my $o_authproto='md5'; # Auth protocol my $o_privproto='des'; # Priv protocol my $o_privpass= undef; # priv password # SNMP Message size parameter (Makina Corpus contrib) my $o_octetlength=undef; # Memory & CPU my $o_mem= undef; # checks memory (max) my @o_memL= undef; # warn and crit level for mem my $o_mem_avg= undef; # cheks memory average my $o_cpu= undef; # checks CPU usage my @o_cpuL= undef; # warn and crit level for cpu my $o_delta= $delta_of_time_to_make_average; # delta time for CPU check # functions sub p_version { print "check_snmp_process version : $Version\n"; } sub print_usage { print "Usage: $0 [-v] -H -C [-2] | (-l login -x passwd) [-p ] -n [-w [,] -c [,max_proc] ] [-m, -a -u, ] [-t ] [-o ] [-f ] [-r] [-V] [-g]\n"; } sub isnotnum { # Return true if arg is not a number my $num = shift; if ( $num =~ /^-?(\d+\.?\d*)|(^\.\d+)$/ ) { return 0 ;} return 1; } # Get the alarm signal (just in case snmp timout screws up) $SIG{'ALRM'} = sub { print ("ERROR: Alarm signal (Nagios time-out)\n"); exit $ERRORS{"UNKNOWN"}; }; sub read_file { # Input : File, items_number # Returns : array of value : [line][item] my ($traffic_file,$items_number)=@_; my ($ligne,$n_rows)=(undef,0); my (@last_values,@file_values,$i); open(FILE,"<".$traffic_file) || return (1,0,0); while($ligne = ) { chomp($ligne); @file_values = split(":",$ligne); #verb("@file_values"); if ($#file_values >= ($items_number-1)) { # check if there is enough data, else ignore line for ( $i=0 ; $i< $items_number ; $i++ ) {$last_values[$n_rows][$i]=$file_values[$i]; } $n_rows++; } } close FILE; if ($n_rows != 0) { return (0,$n_rows,@last_values); } else { return (1,0,0); } } sub write_file { # Input : file , rows, items, array of value : [line][item] # Returns : 0 / OK, 1 / error my ($file_out,$rows,$item,@file_values)=@_; my $start_line= ($rows > $file_history) ? $rows - $file_history : 0; if ( open(FILE2,">".$file_out) ) { for (my $i=$start_line;$i<$rows;$i++) { for (my $j=0;$j<$item;$j++) { print FILE2 $file_values[$i][$j]; if ($j != ($item -1)) { print FILE2 ":" }; } print FILE2 "\n"; } close FILE2; return 0; } else { return 1; } } sub help { print "\nSNMP Process Monitor for Nagios version ",$Version,"\n"; print "GPL licence, (c)2004-2006 Patrick Proy\n\n"; print_usage(); print <, : Authentication protocol (md5|sha : default md5) : Priv protocole (des|aes : default des) -p, --port=PORT SNMP port (Default 161) -n, --name=NAME Name of the process (regexp) No trailing slash ! -r, --noregexp Do not use regexp to match NAME in description OID -f, --fullpath Use full path name instead of process name (Windows doesn't provide full path name) -w, --warn=MIN[,MAX] Number of process that will cause a warning -1 for no warning, MAX must be >0. Ex : -w-1,50 -c, --critical=MIN[,MAX] number of process that will cause an error ( -1 for no critical, MAX must be >0. Ex : -c-1,50 Notes on warning and critical : with the following options : -w m1,x1 -c m2,x2 you must have : m2 <= m1 < x1 <= x2 you can omit x1 or x2 or both -m, --memory=WARN,CRIT checks memory usage (default max of all process) values are warning and critical values in Mb -a, --average makes an average of memory used by process instead of max -u, --cpu=WARN,CRIT checks cpu usage of all process values are warning and critical values in % of CPU usage if more than one CPU, value can be > 100% : 100%=1 CPU -g, --getall In some cases, it is necessary to get all data at once because process die very frequently. This option eats bandwidth an cpu (for remote host) at breakfast. -o, --octetlength=INTEGER max-size of the SNMP message, usefull in case of Too Long responses. Be carefull with network filters. Range 484 - 65535, default are usually 1472,1452,1460 or 1440. -t, --timeout=INTEGER timeout for SNMP in seconds (Default: 5) -V, --version prints version number Note : CPU usage is in % of one cpu, so maximum can be 100% * number of CPU example : Browse process list :