diff --git a/check_iostat/Makefile b/check_iostat/Makefile new file mode 100644 index 0000000..cf9673d --- /dev/null +++ b/check_iostat/Makefile @@ -0,0 +1,3 @@ +#/usr/bin/make -f + +include ../common.mk diff --git a/check_iostat/check_iostat b/check_iostat/check_iostat new file mode 100644 index 0000000..69aeb6a --- /dev/null +++ b/check_iostat/check_iostat @@ -0,0 +1,352 @@ +#!/bin/bash +#----------check_iostat.sh----------- +# +# Version 0.0.2 - Jan/2009 +# Changes: added device verification +# +# by Thiago Varela - thiago@iplenix.com +# +# Version 0.0.3 - Dec/2011 +# Changes: +# - changed values from bytes to mbytes +# - fixed bug to get traffic data without comma but point +# - current values are displayed now, not average values (first run of iostat) +# +# by Philipp Niedziela - pn@pn-it.com +# +# Version 0.0.4 - April/2014 +# Changes: +# - Allow Empty warn/crit levels +# - Can check I/O, WAIT Time, or Queue +# +# by Warren Turner +# +# Version 0.0.5 - Jun/2014 +# Changes: +# - removed -y flag from call since iostat doesn't know about it any more (June 2014) +# - only needed executions of iostat are done now (save cpu time whenever you can) +# - fixed the obvious problems of missing input values (probably because of the now unimplemented "-y") with -x values +# - made perfomance data optional (I like to have choice in the matter) +# +# by Frederic Krueger / fkrueger-dev-checkiostat@holics.at +# +# Version 0.0.6 - Jul/2014 +# Changes: +# - Cleaned up argument checking, removed excess iostat calls, steamlined if statements and renamed variables to fit current use +# - Fixed all inputs to match current iostat output (Ubuntu 12.04) +# - Changed to take last ten seconds as default (more useful for nagios usage). Will go to "since last reboot" (previous behaviour) on -g flag. +# - added extra comments/whitespace etc to make add readability +# +# by Ben Field / ben.field@concreteplatform.com +# +# Version 0.0.7 - Sep/2014 +# Changes: +# - Fixed performance data for Wait check +# +# by Christian Westergard / christian.westergard@gmail.com +# +# Version 0.0.8 - Jan/2019 +# Changes: +# - Added Warn/Crit thresholds to performance output +# +# by Danny van Zunderd / danny_vz@live.nl +# +# Version 0.0.9 - Jun/2020 +# Changes: +# - Updated to use bash 4.4 mechanisms +# +# by Joseph Waggy / joseph.waggy@gmail.com + +iostat=$(which iostat 2>/dev/null) +bc=$(which bc 2>/dev/null) + +help() +{ +echo -e " +Usage: + +-d = +--Device to be checked. Example: \"-d sda\" + +Run only one of i, q, W: + +-i = IO Check Mode +--Checks Total Transfers/sec, Read IO/Sec, Write IO/Sec, Bytes Read/Sec, Bytes Written/Sec +--warning/critical = Total Transfers/sec,Read IO/Sec,Write IO/Sec,Bytes Read/Sec,Bytes Written/Sec + +-q = Queue Mode +--Checks Disk Queue Lengths +--warning/critial = Average size of requests, Queue length of requests + +-W = Wait Time Mode +--Check the time for I/O requests issued to the device to be served. This includes the time spent by the requests in queue and the time spent servicing them. +--warning/critical = Avg I/O Wait Time (ms), Avg Read Wait Time (ms), Avg Write Wait Time (ms), Avg Service Wait Time (ms), Avg CPU Utilization + +-w,-c = pass warning and critical levels respectively. These are not required, but with out them, all queries will return as OK. + +-p = Provide performance data for later graphing + +-g = Since last reboot for system (more for debugging that nagios use!) + +-h = This help +" +} + +# Ensuring we have the needed tools: +if [[ ! -f $iostat ]] || [[ ! -f $bc ]]; then +echo -e "ERROR: You must have iostat and bc installed in order to run this plugin\n\tuse: apt-get install systat bc\n" +exit -1 +fi + +io=0 +queue=0 +waittime=0 +printperfdata=0 +STATE="OK" +samples=2i +status=0 + +MSG="" +PERFDATA="" + +#------------Argument Set------------- + +while getopts "d:w:c:ipqWhg" OPT; do +case $OPT in +"d") +disk=$OPTARG +;; +"w") +warning=$OPTARG +;; +"c") +critical=$OPTARG +;; +"i") +io=1 +;; +"p") +printperfdata=1 +;; +"q") +queue=1 +;; +"W") +waittime=1 +;; +"g") +samples=1 +;; +"h") +echo "help:" +help +exit 0 +;; +\?) +echo "Invalid option: -$OPTARG" >&2 +help +exit -1 +;; +esac +done + +# Autofill if parameters are empty +if [[ -z "$disk" ]]; then +disk=sda +fi + +#Checks that only one query type is run +if [[ $((io+queue+waittime)) -ne "1" ]]; then +echo "ERROR: select one and only one run mode" +help +exit -1 +fi + +#set warning and critical to insane value is empty, else set the individual values +if [[ -z "$warning" ]]; then +warning=99999 +else +#TPS with IO, Request size with queue +warn_1=$(echo $warning | cut -d, -f1) +#Read/s with IO,Queue Length with queue +warn_2=$(echo $warning | cut -d, -f2) +#Write/s with IO +warn_3=$(echo $warning | cut -d, -f3) +#KB/s read with IO +warn_4=$(echo $warning | cut -d, -f4) +#KB/s written with IO +warn_5=$(echo $warning | cut -d, -f5) +#Crude hack due to integer expression later in the script +warning=1 +fi + +if [[ -z "$critical" ]]; then +critical=99999 +else +#TPS with IO, Request size with queue +crit_1=$(echo $critical | cut -d, -f1) +#Read/s with IO,Queue Length with queue +crit_2=$(echo $critical | cut -d, -f2) +#Write/s with IO +crit_3=$(echo $critical | cut -d, -f3) +#KB/s read with IO +crit_4=$(echo $critical | cut -d, -f4) +#KB/s written with IO +crit_5=$(echo $critical | cut -d, -f5) +#Crude hack due to integer expression later in the script +critical=1 +fi + +#------------Argument Set End------------- + +#------------Parameter Check------------- + +#Checks for sane Disk name: +if [[ ! -b "/dev/$disk" ]]; then +echo "ERROR: Device incorrectly specified" +help +exit -1 +fi + +#Checks for sane warning/critical levels +if [[ $warning -ne "99999" || $critical -ne "99999" ]]; then +if [[ "$warn_1" -gt "$crit_1" || "$warn_2" -gt "$crit_2" ]]; then +echo "ERROR: critical levels must be higher than warning levels" +help +exit -1 +elif [[ $io -eq "1" || $waittime -eq "1" ]]; then +if [[ "$warn_3" -gt "$crit_3" || "$warn_4" -gt "$crit_4" || "$warn_5" -gt "$crit_5" ]]; then +echo "ERROR: critical levels must be higher than warning levels" +help +exit -1 +fi +fi +fi + +#------------Parameter Check End------------- + +# iostat parameters: +# -m: megabytes +# -k: kilobytes +# first run of iostat shows statistics since last reboot, second one shows current vaules of hdd +# -d is the duration for second run, -x the rest + +TMPX=$($iostat $disk -x -k -d 10 $samples | grep $disk | tail -1) + +#------------IO Test------------- + +if [[ "$io" == "1" ]]; then + +TMPD=$($iostat $disk -k -d 10 $samples | grep $disk | tail -1) +#Requests per second: +tps=$(echo "$TMPD" | awk '{print $2}') +read_sec=$(echo "$TMPX" | awk '{print $4}') +written_sec=$(echo "$TMPX" | awk '{print $5}') + +#Kb per second: +kbytes_read_sec=$(echo "$TMPX" | awk '{print $6}') +kbytes_written_sec=$(echo "$TMPX" | awk '{print $7}') + +# "Converting" values to float (string replace , with .) +tps=${tps/,/.} +read_sec=${read_sec/,/.} +written_sec=${written_sec/,/.} +kbytes_read_sec=${kbytes_read_sec/,/.} +kbytes_written_sec=${kbytes_written_sec/,/.} + +# Comparing the result and setting the correct level: +if [[ "$warning" -ne "99999" ]]; then +if [[ "$(echo "$tps >= $warn_1" | bc)" == "1" || "$(echo "$read_sec >= $warn_2" | bc)" == "1" || "$(echo "$written_sec >= $warn_3" | bc)" == "1" || "$(echo "$kbytes_read_sec >= $warn_4" | bc -q)" == "1" || "$(echo "$kbytes_written_sec >= $warn_5" | bc)" == "1" ]]; then +STATE="WARNING" +status=1 +fi +fi +if [[ "$critical" -ne "99999" ]]; then +if [[ "$(echo "$tps >= $crit_1" | bc)" == "1" || "$(echo "$read_sec >= $crit_2" | bc -q)" == "1" || "$(echo "$written_sec >= $crit_3" | bc)" == "1" || "$(echo "$kbytes_read_sec >= $crit_4" | bc -q)" == "1" || "$(echo "$kbytes_written_sec >= $crit_5" | bc)" == "1" ]]; then +STATE="CRITICAL" +status=2 +fi +fi +# Printing the results: +MSG="$STATE - I/O stats: Transfers/Sec=$tps Read Requests/Sec=$read_sec Write Requests/Sec=$written_sec KBytes Read/Sec=$kbytes_read_sec KBytes_Written/Sec=$kbytes_written_sec" +PERFDATA=" | total_io_sec'=$tps;$warn_1;$crit_1; read_io_sec=$read_sec;$warn_2;$crit_2; write_io_sec=$written_sec;$warn_3;$crit_3; kbytes_read_sec=$kbytes_read_sec;$warn_4;$crit_4; kbytes_written_sec=$kbytes_written_sec;$warn_5;$crit_5;" +fi + +#------------IO Test End------------- + +#------------Queue Test------------- +if [[ "$queue" == "1" ]]; then +qsize=$(echo "$TMPX" | awk '{print $8}') +qlength=$(echo "$TMPX" | awk '{print $9}') + +# "Converting" values to float (string replace , with .) +qsize=${qsize/,/.} +qlength=${qlength/,/.} + +# Comparing the result and setting the correct level: +if [[ "$warning" -ne "99999" ]]; then +if [[ "$(echo "$qsize >= $warn_1" | bc)" == "1" || "$(echo "$qlength >= $warn_2" | bc)" == "1" ]]; then +STATE="WARNING" +status=1 +fi +fi +if [[ "$critical" -ne "99999" ]]; then +if [[ "$(echo "$qsize >= $crit_1" | bc)" == "1" || "$(echo "$qlength >= $crit_2" | bc)" == "1" ]]; then +STATE="CRITICAL" +status=2 +fi +fi + +# Printing the results: +MSG="$STATE - Disk Queue Stats: Average Request Size=$qsize Average Queue Length=$qlength" +PERFDATA=" | qsize=$qsize;$warn_1;$crit_1; queue_length=$qlength;$warn_2;$crit_2;" +fi + +#------------Queue Test End------------- + +#------------Wait Time Test------------- + +#Parse values. Warning - svc time will soon be deprecated and these will need to be changed. Future parser could look at first line (labels) to suggest correct column to return +if [[ "$waittime" == "1" ]]; then +avgwait=$(echo "$TMPX" | awk '{print $10}') +avgrwait=$(echo "$TMPX" | awk '{print $11}') +avgwwait=$(echo "$TMPX" | awk '{print $12}') +avgsvctime=$(echo "$TMPX" | awk '{print $13}') +avgcpuutil=$(echo "$TMPX" | awk '{print $14}') + +# "Converting" values to float (string replace , with .) +avgwait=${avgwait/,/.} +avgrwait=${avgrwait/,/.} +avgwwait=${avgwwait/,/.} +avgsvctime=${avgsvctime/,/.} +avgcpuutil=${avgcpuutil/,/.} + +# Comparing the result and setting the correct level: +if [[ "$warning" -ne "99999" ]]; then +if [[ "$(echo "$avgwait >= $warn_1" | bc)" == "1" || "$(echo "$avgrwait >= $warn_2" | bc -q)" == "1" || "$(echo "$avgwwait >= $warn_3" | bc)" == "1" || "$(echo "$avgsvctime >= $warn_4" | bc -q)" == "1" || "$(echo "$avgcpuutil >= $warn_5" | bc)" == "1" ]]; then +STATE="WARNING" +status=1 +fi +fi +if [[ "$critical" -ne "99999" ]]; then +if [[ "$(echo "$avgwait >= $crit_1" | bc)" == "1" || "$(echo "$avgrwait >= $crit_2" | bc -q)" == "1" || "$(echo "$avgwwait >= $crit_3" | bc)" == "1" || "$(echo "$avgsvctime >= $crit_4" | bc -q)" == "1" || "$(echo "$avgcpuutil >= $crit_5" | bc)" == "1" ]]; then +STATE="CRITICAL" +status=2 +fi +fi + +# Printing the results: +MSG="$STATE - Wait Time Stats: Avg I/O Wait Time (ms)=$avgwait Avg Read Wait Time (ms)=$avgrwait Avg Write Wait Time (ms)=$avgwwait Avg Service Wait Time (ms)=$avgsvctime Avg CPU Utilization=$avgcpuutil" +PERFDATA=" | avg_io_waittime_ms=$avgwait;$warn_1;$crit_1; avg_r_waittime_ms=$avgrwait;$warn_2;$crit_2; avg_w_waittime_ms=$avgwwait;$warn_3;$crit_3; avg_service_waittime_ms=$avgsvctime;$warn_4;$crit_4; avg_cpu_utilization=$avgcpuutil;$warn_5;$crit_5;" +fi + +#------------Wait Time End------------- + +# now output the official result +echo -n "$MSG" +if [[ "x$printperfdata" == "x1" ]]; then +echo -n "$PERFDATA" +fi +echo "" +exit $status +#----------/check_iostat.sh----------- diff --git a/check_iostat/control b/check_iostat/control new file mode 100644 index 0000000..513f24a --- /dev/null +++ b/check_iostat/control @@ -0,0 +1,5 @@ +Homepage: https://exchange.nagios.org/directory/Plugins/Operating-Systems/Linux/check_iostat--2D-I-2FO-statistics/details +Uploaders: Jan Wagner +Description: plugin shows the I/O usage of the specified disk. +Recommends: sysstat, bc +Version: 0.0.9