Current File : //var/dcc/libexec/dcc-nagios
#! /bin/sh

# This is a simple Nagios plug-in to monitor a DCC client or server.
#   See `dcc-nagios -h` for some documentation.

# Copyright (c) 2012 by Rhyolite Software, LLC
#
# This agreement is not applicable to any entity which sells anti-spam
# solutions to others or provides an anti-spam solution as part of a
# security solution sold to other entities, or to a private network
# which employs the DCC or uses data provided by operation of the DCC
# but does not provide corresponding data to other users.
#
# Permission to use, copy, modify, and distribute this software without
# changes for any purpose with or without fee is hereby granted, provided
# that the above copyright notice and this permission notice appear in all
# copies and any distributed versions or copies are either unchanged
# or not called anything similar to "DCC" or "Distributed Checksum
# Clearinghouse".
#
# Parties not eligible to receive a license under this agreement can
# obtain a commercial license to use DCC by contacting Rhyolite Software
# at sales@rhyolite.com.
#
# A commercial license would be for Distributed Checksum and Reputation
# Clearinghouse software.  That software includes additional features.  This
# free license for Distributed ChecksumClearinghouse Software does not in any
# way grant permision to use Distributed Checksum and Reputation Clearinghouse
# software
#
# THE SOFTWARE IS PROVIDED "AS IS" AND RHYOLITE SOFTWARE, LLC DISCLAIMS ALL
# WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL RHYOLITE SOFTWARE, LLC
# BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES
# OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#
#	Rhyolite Software DCC 1.3.152-1.5 $Revision$
#	Generated automatically from dcc-nagios.in by configure.


help () {
cat <<EOF
    $USAGE

    This is a simple Nagios plug-in to monitor a DCC client or server.

    Use "$ME" to check whether a DCC client can reach a DCC server
    by parsing the output of "cdcc info".  In this mode $ME must
    run on the DCC client being monitored, because "cdcc info" shows
    information about known DCC servers in the local /var/dcc/map file.
    NRPE can be useful if system running Nagios is not the DCC client to
    be monitored.

    Use "$ME -s server" to check the health of a DCC server.
    In this mode $ME uses cdcc commands including "cdcc stats"
    to send UDP packets asking the DCC server how it feels.  NRPE can be
    useful if the DCC software is not installed on the Nagios system.

    The health of a DCC client is determined by the average delay a client
    has seen to the current fastest server.  The health of a single server
    is also determined the speed of its answers.

    -t msec can be used to change the default healthy delay threshold.
    A DCC server has no working DCC flooding peers delays its answers
    by an extra 400 milliseconds.  A DCC client that should be using
    a local DCC server might see delays increase from less than 100 ms.
    to several 100 ms.

    -x		debug script
    -h		this message
    -v		increase verbosity
    -s srvr	name or IP address of of DCC server
    -T /tmp	directory to keep file for this script
    -t msec	tolerable DCC server queing delay in milliseconds
    -C cdcc	alternative to /usr/local/bin/cdcc
    -m map	alternative to /var/dcc/map
    -i ID	client- or server-ID
    -p passwd	password for client- or server-ID
    -G on	check greylist server
EOF
}

ME=`basename $0`
USAGE="$ME: [-xhv] [-s server[,port]] [-T /tmp] [-t msec] [-C cdcc]
	[-m map-file] [-i ID] [-p passwd] [-G on|off]"

usage () {
    if test "$MODE" = help; then
	help 1>&2
    else
	echo "$USAGE" 1>&2
    fi
}

EXIT_OK=0
EXIT_WARN=1
EXIT_CRIT=2
EXIT_UNK=3

CDCC=/usr/local/bin/cdcc
VERBOSE=0
SRVR_PARMS=
CLNT_PARMS=
MODE=client
TMPDIR=/tmp
MAP=
GREY=
SRVR=
OK_DELAY=400
while getopts "xhvs:T:t:C:m:i:p:G:" c; do
    case $c in
	x) set -x;;
	h) MODE=help;;
	v) VERBOSE=`expr $VERBOSE + 1`;;
	s) MODE=srvr; SRVR="$OPTARG";;
	T) if test -d "$OPTARG" -a -w "$OPTARG"; then
		TMPDIR=$OPTARG
	    else
		echo "invalid temporary directory \"$OPTARG\"" 1>&2
	    fi
	    ;;
	t) if expr "$OPTARG" : '[0-9][0-9]*$' >/dev/null; then
		OK_DELAY=$OPTARG
	    else
		echo "invalid delay in -t $OPTARG" 1>&2
	    fi
	    ;;
	C) CDCC="$OPTARG";;
	m) MAP="$OPTARG";;
	i) SRVR_PARMS="$SRVR_PARMS id $OPTARG;";;
	p) SRVR_PARMS="$SRVR_PARMS password $OPTARG;";;
	G) case "$OPTARG" in
	    [oO][nN]) GREY='1,/^#.* greylist /d';;
	    [oO][fF][fF]) GREY= ;;
	    *) usage;;
	    esac
	    ;;
	*) usage; exit $EXIT_UNK;;
    esac
done
shift `expr $OPTIND - 1 || true`
if test "$#" -ne 0; then
    usage; exit $EXIT_UNK
fi

if test $VERBOSE -ge 3; then
    set -x
    VERBOSE=0
fi


# sed pattern to find server delay from `cdcc info` output
DELAY_PAT='/^# \*/,/requests ok/s/.*ok  *\([0-9]\{1,\}\)[-+.0-9]* ms.*/\1/p'


case $MODE in
help)
    help
    exit $EXIT_OK
    ;;



client)
    # Things are OK for a DCC client if there is at least one working server
    # and its average delay is less than the 400 ms that results from
    # having no work flood peers.
    # Things are critical if there is no working server.
    # Only warn if the best working server has long delays.

    if test -z "$GREY"; then
	GREY='/^# [0-9/]* [0-9:]* .* greylist /,$d'
	GREYLABEL="servers"
    else
	GREY='1,/^# [0-9/]* [0-9:]* .* greylist /d'
	GREYLABEL="greylist servers"
	CLNT_PARMS="$CLNT_PARMS grey on;"
    fi
    if test $VERBOSE -gt 0 -a -n "$SRVR_PARMS"; then
	echo "$ME: client mode does not use -i or -p"
    fi
    INFO=`$CDCC -q "$CLNT_PARMS quiet off; file ${MAP:=map}; info" 2>&1`
    SRVRS=`echo "$INFO"							\
	| sed -n -e "$GREY"						\
	    -e 's/.* total, \([0-9][0-9]*\) working servers.*$/\1/p'`
    if test -z "$SRVRS"; then
	echo "$ME: 'cdcc$CLNT_PARMS info' failed"
	if test $VERBOSE -ge 1; then
	    echo "$INFO"
	fi
	exit $EXIT_UNK
    fi
    if test $VERBOSE -ge 2; then
	echo "$INFO"
    fi
    if test "$SRVRS" -eq 0; then
	echo "DCC client CRITICAL: $SRVRS working $GREYLABEL"
	exit $EXIT_CRIT
    fi
    DELAY=`echo "$INFO" | sed -n -e "$GREY" -e "$DELAY_PAT"`
    if test -z "$DELAY"; then
	echo "$ME: failed to compute delay"
	exit $EXIT_UNK
    fi
    if test $DELAY -ge $OK_DELAY; then
	echo "DCC client WARNING: $SRVRS working $GREYLABEL; $DELAY ms delay"
	exit $EXIT_WARN
    fi
    echo "DCC client OK: $SRVRS working $GREYLABEL; $DELAY ms delay"
    exit $EXIT_OK
    ;;



srvr)
    # A DCC server is OK if it answers and its announced delay is less than
    # the 400 ms that results from having no working flood peers.
    # Warn about its status if it answers but with long delays.
    # Its status is critical if it does not answer.

    FFILE="$TMPDIR/.dcc-nagios-$SRVR-flood"

    if test $VERBOSE -gt 0 -a -n "$MAP$CLNT_PARMS"; then
	echo "$ME: -s or server mode does not use -m"
    fi
    if test -z "$GREY"; then
	GREYLABEL="DCC server $SRVR"
    else
	GREYLABEL="DCC greylist server $SRVR"
	SRVR_PARMS="grey on; $SRVR_PARMS"
    fi

    # see what the server says
    SOUT=`$CDCC -q "$SRVR_PARMS quiet off; host $SRVR; stats; info; flood list; clock check" 2>&1`
    if test $VERBOSE -ge 2; then
	echo "$SOUT"
    fi
    DELAY=`echo "$SOUT" | sed -n -e "$DELAY_PAT"`

    # critical problem if the server did not answer
    if test -z "$DELAY"; then
	/bin/rm -f "$FFILE"
	echo "$GREYLABEL CRITICAL: not answering"
	exit $EXIT_CRIT
    fi

    STATE="$DELAY ms delay"
    WARN=
    if test $DELAY -ge $OK_DELAY; then
	# possible problem if the server is slow
	WARN=yes
    fi

    # check flooding
    FLINE=`echo "$SOUT" | sed -n -e '/^ *flood/p'`
    FTOTAL=`expr "$FLINE" : '.* \([0-9][0-9]*\) streams .*'`
    FOUT=`expr "$FLINE" : '.* \([0-9][0-9]*\) out .*'`
    FIN=`expr "$FLINE" : '.* \([0-9][0-9]*\) in .*'`
    if test "$FIN" -le "$FOUT"; then
	F="$FIN"
    else
	F="$FOUT"
    fi
    FPASSIVE=`echo "$SOUT" | sed -n -e '/forced passive/p' | wc -l | tr -d ' '`
    ANAT=`echo "$SOUT" | sed -n -e '/auto-NAT/p' | wc -l | tr -d ' '`
    if test "$F" -ge "$FTOTAL" -a "$FPASSIVE$ANAT" -eq 0; then
	# do not mention missing peers of an isolated greylist server
	if test "$FTOTAL" -ne 0 -o -z "$GREY"; then
	    STATE="$STATE@1@$FTOTAL working flood peers"
	fi
	/bin/rm -f "$FFILE"
    else
	if test "$F" -eq 0; then
	    FMSG="flooding not working"
	else
	    if test "$F" -lt "$FTOTAL"; then
		FMSG="only $F of $FTOTAL flood peers working"
	    else
		if test "$FPASSIVE" -ne 0; then
		    if test "$FPASSIVE" -ne 1; then
			PLURAL=s
		    else
			PLURAL=
		    fi
		    FMSG="$FPASSIVE peer$PLURAL forcing passive flooding"
		else
		    if test "$ANAT" -ne 1; then
			PLURAL=s
		    else
			PLURAL=
		    fi
		    FMSG="using auto-NAT flooding with $ANAT peer$PLURAL"
		fi
	    fi
	fi
	if test ! -s "$FFILE"; then
	    echo "$FMSG" >"$FFILE"
	fi
	STATE="$STATE@1@$FMSG"
    fi

    # problem if flooding has been broken for at least 2 hours,
    OLDFILE=`find $FFILE -mtime +2h 2>/dev/null`
    if test -z "$OLDFILE"; then
	# deal with old version of `find` by waiting a day or perhaps 2
	OLDFILE=`find $FFILE -mtime +1 2>/dev/null`
    fi
    if test -n "$OLDFILE"; then
	WARN=yes
    fi

    # check the clock, while ignoring "invalid ADMN UNKNOWN" from old servers
    CLOCK_DELTA=`echo "$SOUT"						\
		| sed -n -e 's/.*clocks differ by about -*\([0-9]*\) .*/\1/p'`
    if test "$CLOCK_DELTA" -lt 5; then
	STATE=`echo "$STATE" | sed -e 's/@1@/ and /'`
    else
	STATE=`echo "$STATE" | sed -e 's/@1@/, /'`
	STATE="$STATE, and server clock differs by about $CLOCK_DELTA seconds"
	CLOCK_BAD=`echo "$SOUT"						\
		    | sed -n -e 's/.*which is more than .* allowed.*/yes/p'`
	if test -n "$CLOCK_BAD"; then
	    WARN=yes
	fi
    fi

    # announce a problem
    if test -n "$WARN"; then
	echo "$GREYLABEL WARNING: $STATE"
	exit $EXIT_WARN
    fi

    echo "$GREYLABEL OK: $STATE"
    exit $EXIT_OK
    ;;
esac