From 9df31e7682999bb92ca4c6b5b0005fcbc7900af5 Mon Sep 17 00:00:00 2001 From: Jan Wagner Date: Wed, 21 Jul 2021 13:11:36 +0200 Subject: [PATCH] Adding check_keepalived --- check_keepalived/Makefile | 3 + check_keepalived/check_keepalived | 254 ++++++++++++++++++++++++++++++ check_keepalived/control | 4 + check_keepalived/copyright | 7 + 4 files changed, 268 insertions(+) create mode 100644 check_keepalived/Makefile create mode 100644 check_keepalived/check_keepalived create mode 100644 check_keepalived/control create mode 100644 check_keepalived/copyright diff --git a/check_keepalived/Makefile b/check_keepalived/Makefile new file mode 100644 index 0000000..cf9673d --- /dev/null +++ b/check_keepalived/Makefile @@ -0,0 +1,3 @@ +#/usr/bin/make -f + +include ../common.mk diff --git a/check_keepalived/check_keepalived b/check_keepalived/check_keepalived new file mode 100644 index 0000000..c0e14f0 --- /dev/null +++ b/check_keepalived/check_keepalived @@ -0,0 +1,254 @@ +#!/bin/bash + +# Monitoring plugin to check the keepalived status +usage(){ + echo "Check: Is the keepalived service operate as it should. + --status | -s ) Target state of the system (MASTER, BACKUP) + --interface | -i ) Interface for vrrp instance + --ha-ip | -ip ) Vrrp-ip + --interface2 | -i2 ) Interface for 2nd vrrp instance + --ha-ip2 | -ip2 ) 2nd vrrp-ip + --help | -h ) Usage + " +} + +# Exit Codes +OK=0 +WARNING=1 +CRITICAL=2 +UNKNOWN=3 + +# Command definitions +if [ -x "$(which cat)" ]; +then + CAT="$(which cat)" +fi + +if [ -x "$(which grep)" ]; +then + GREP="$(which grep)" +fi + +if [ -x "$(which ip)" ]; +then + IP="$(which ip)" +fi + +if [ -x "$(which pgrep)" ]; +then + PG="$(which pgrep)" +fi + +if [ -x "$(which wc)" ]; +then + WC="$(which wc)" +fi + +if [ "$1" = "" ] +then + echo "CRITICAL: No arguments given. Take a look at the usage:" + usage + exit "${CRITICAL}" +fi + +# shifting through our command line arguments and setting our values +while [ "$1" != "" ]; do + case $1 in + --status | -s ) shift + TARGET_STATE="$1" + ;; + --interface | -i ) shift + IFACE="$1" + ;; + --ha-ip | -ip ) shift + HAIP="$1" + ;; + --interface2 | -i2 ) shift + IFACE2="$1" + SEC_IP=true + ;; + --ha-ip2 | -ip2 ) shift + HAIP2="$1" + ;; + --help | -h ) usage + exit + ;; + * ) usage + echo "CRITICAL: No valid arguments given. Take a look at the usage." + exit "${CRITICAL}" + esac + shift +done + +# Config and commands +STAT_FILE='/tmp/keepalived.status' +PID_FILE='/run/keepalived.pid' +PID=$("${CAT}" "${PID_FILE}" 2>/dev/null) +SERVICE=$("${PG}" keepalived) +STATUS=(MASTER BACKUP FAULT) +CHECK_HAIP=$("${IP}" 2>/dev/null addr sh "${IFACE}" | "${GREP}" "${HAIP}" | "${WC}" -l) +CHECK_HAIP2=$("${IP}" 2>/dev/null addr sh "${IFACE2}" | "${GREP}" "${HAIP2}" | "${WC}" -l) + +# Check files are valid +if [ ! -e "${STAT_FILE}" ] +then + echo "CRITICAL: Generated status file is missing. State could not be determined." + exit "${CRITICAL}" +fi + +FILE_CONT=$("${CAT}" "${STAT_FILE}" 2>/dev/null) + +if [ ! -e "${PID_FILE}" ] +then + echo "CRITICAL: PID file is missing, keepalived is not running." + exit "${CRITICAL}" +fi + +# Check variables exists +if [ -z "${TARGET_STATE}" ] +then + echo "CRITICAL: Parameter 'status' not given. Check usage:" + usage + exit "${CRITICAL}" +fi + +if [ -z "${IFACE}" ] +then + echo "CRITICAL: Parameter 'interface' not given. Check usage:" + usage + exit "${CRITICAL}" +fi + +if [ -z "${HAIP}" ] +then + echo "CRITICAL: Parameter 'ha-ip' not given. Check usage:" + usage + exit "${CRITICAL}" +fi + +# Check service is running +if [[ ! "${SERVICE}" =~ ${PID} ]] +then + echo "CRITICAL: keepalived is not running." + exit "${CRITICAL}" +fi + +# Confirm valid STATUS +if [[ ! "${STATUS[*]}" =~ ${FILE_CONT} ]] +then + echo "CRITICAL: Status file contains unknown status or is empty. Take a look at ${STAT_FILE}." + exit "${CRITICAL}" +fi + +# Check ha ip and status +if [ "${TARGET_STATE}" = "${STATUS[0]}" ] # Machine is defined as MASTER +then + case "${FILE_CONT}" in + MASTER ) STAT=ok + ;; + BACKUP ) STAT=fail + ;; + FAULT ) echo "CRITICAL: Machine status is FAULT." + exit "${CRITICAL}" + ;; + * ) echo "CRITICAL: Status file contains unknown status or is empty. Take a look at ${STAT_FILE}." + exit "${CRITICAL}" + esac + + if [ "${SEC_IP}" == "true" ] # 2nd ha instance + then + if [ "${CHECK_HAIP2}" == 0 ] && [ "${STAT}" == ok ] + then + echo "CRITICAL: 2nd HA IP ${HAIP2} is not up but machine is MASTER according to ${STAT_FILE}." + STAT_SEC_IP=CRIT + elif [ "${CHECK_HAIP2}" == 0 ] && [ "${STAT}" == fail ] + then + echo "CRITICAL: 2nd HA IP ${HAIP2} is not up, machine is BACKUP. Should be MASTER." + elif [ "${CHECK_HAIP2}" == 1 ] && [ "${STAT}" == fail ] + then + echo "CRITICAL: 2nd HA IP ${HAIP2} is up, but machine is BACKUP according to ${STAT_FILE}. Should be MASTER." + else + echo "OK: 2nd HA IP ${HAIP2} is up and machine is MASTER." + fi + fi + + if [ "${CHECK_HAIP}" == 0 ] && [ "${STAT}" == ok ] # Target-actual comparison machine state; IP should be up + then + echo "CRITICAL: HA IP ${HAIP} is not up but machine is MASTER according to ${STAT_FILE}." + exit "${CRITICAL}" + elif [ "${CHECK_HAIP}" == 0 ] && [ "${STAT}" == fail ] + then + echo "CRITICAL: HA IP ${HAIP} is not up, machine is BACKUP. Should be MASTER." + exit "${CRITICAL}" + elif [ "${CHECK_HAIP}" == 1 ] && [ "${STAT}" == fail ] + then + echo "CRITICAL: HA IP ${HAIP} is up, but machine is BACKUP according to ${STAT_FILE}. Should be MASTER." + exit "${CRITICAL}" + else + echo "OK: HA IP ${HAIP} is up and machine is MASTER." + if [ "${STAT_SEC_IP}" == CRIT ] + then + exit "${CRITICAL}" + else + exit "${OK}" + fi + + fi + +elif [ "${TARGET_STATE}" = "${STATUS[1]}" ] # Machine is defined as BACKUP +then + case "${FILE_CONT}" in + MASTER ) STAT=fail + ;; + BACKUP ) STAT=ok + ;; + FAULT ) echo "CRITICAL: Machine status is FAULT." + exit "${CRITICAL}" + ;; + * ) echo "CRITICAL: Status file contains unknown status or is empty." + exit "${CRITICAL}" + esac + + if [ "${SEC_IP}" == "true" ] # 2nd ha instance + then + if [ "${CHECK_HAIP2}" == 1 ] && [ "${STAT}" == ok ] + then + echo "CRITICAL: 2nd HA IP ${HAIP2} is up but machine is BACKUP according to ${STAT_FILE}." + STAT_SEC_IP=CRIT + elif [ "${CHECK_HAIP2}" == 1 ] && [ "${STAT}" == fail ] + then + echo "WARNING: 2nd HA IP ${HAIP2} is up, machine is MASTER. Should be BACKUP." + elif [ "${CHECK_HAIP2}" == 0 ] && [ "${STAT}" == fail ] + then + echo "CRITICAL: 2nd HA IP ${HAIP2} is not up but machine is MASTER according to ${STAT_FILE}. Should be BACKUP." + else + echo "OK: 2nd HA IP ${HAIP2} is not up and machine is BACKUP." + fi + fi + + if [ "${CHECK_HAIP}" == 1 ] && [ "${STAT}" == ok ] # Target-actual comparison machine state; IP should be down + then + echo "CRITICAL: HA IP ${HAIP} is up but machine is BACKUP according to ${STAT_FILE}." + exit "${CRITICAL}" + elif [ "${CHECK_HAIP}" == 1 ] && [ "${STAT}" == fail ] + then + echo "WARNING: HA IP ${HAIP} is up, machine is MASTER. Should be BACKUP." + exit "${WARNING}" + elif [ "${CHECK_HAIP}" == 0 ] && [ "${STAT}" == fail ] + then + echo "CRITICAL: HA IP ${HAIP} is not up but machine is MASTER according to ${STAT_FILE}. Should be BACKUP." + exit "${CRITICAL}" + else + echo "OK: HA IP ${HAIP} is not up and machine is BACKUP." + if [ "${STAT_SEC_IP}" == CRIT ] + then + exit "${CRITICAL}" + else + exit "${OK}" + fi + fi + +else + echo "Unknown: Unknown status given. Check ${STAT_FILE} and script usage." + exit "${UNKNOWN}" +fi diff --git a/check_keepalived/control b/check_keepalived/control new file mode 100644 index 0000000..31af5c4 --- /dev/null +++ b/check_keepalived/control @@ -0,0 +1,4 @@ +Uploaders: Jan Wagner +Description: plugin checking keepalived status +Recommends: procps +Version: 0.0.1 diff --git a/check_keepalived/copyright b/check_keepalived/copyright new file mode 100644 index 0000000..56a9345 --- /dev/null +++ b/check_keepalived/copyright @@ -0,0 +1,7 @@ +Copyright (C) 2021 Stella Sieber / TMT GmbH & Co. KG + +License: GPL+ + + On Debian systems, the complete text of the GNU General + Public License can be found in "/usr/share/common-licenses/GPL". +