#! /bin/bash ############################################################################### # Copyright (c) Perforce Software, Inc., 2007-2016. All rights reserved # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1 Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PERFORCE # SOFTWARE, INC. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON # ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR # TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF # THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH # DAMAGE. ############################################################################### # # Last Modified: $Date$ # Submitted by: $Author$ # Revision: $Revision$ # # Usage: # USAGE=" Usage: check_helix_p4d_health -p [--all] [-u] check_helix_p4d_health -p [--licensecheck [-l ] ] check_helix_p4d_health -p [--pidcheck [-c ] ] check_helix_p4d_health -p [--p4monitorcheck [-m ] ] check_helix_p4d_health -p [--p4diskcheck [-d ] ] check_helix_p4d_health -p [--p4repcheck [-r ] check_helix_p4d_health -p [--version] [--help] The '--all' flag runs all tests. The '-u' flag specifies the P4D user name. This user must be an 'operator' user or must have 'super' access to the P4D server The '-p' flag specifies the P4D hostname and port. The '--licensecheck' flag tests if the license file is nearing it's expiry date. By default it checks for expiry within '30' days but this can be overriden with the '-l' flag. The '--p4diskcheck' flag checks for free disk space on the P4D drives using the Perforce command 'p4 diskspace' and warns if the disks are over 95% used. This value can be overriden by speciying a value between 0 and 99 using with the '-d' flag. The '--pidcheck' flag counts the number of connected p4d processes using 'netstat' and warns if the are over 500 processes running. This value can be overriden with the '-c' flag. The '--p4monitorcheck' flag counts the number of commands in the 'p4 monitor' table and warns if there are over 500 running. This value can be overriden with the '-m' flag. The '--p4repcheck' flags (REPLICA ONLY) checks the current replication status for this replica and warns if there is a differnce of over 100,000 bytes between master and replica. This value can be overriden with the '-r' flag. " # Description: # DESCRIPTION="Helix P4D health checker - Example Nagios monitoring script" # # This plugin will run the following checks against your Helix P4D server: # - Online? # - Licensed and not expiring soon # - P4D process count in acceptable range # - P4D monitor count in acceptable range # - Disk space available on P4D volumes # - Replication OK? # # Output: # # Notes: # # Examples: # # Run all checks against server on localhost:1666 # # check_helix_p4d_health -p localhost:1666 --all # # Check if license will expire in next 45 days # # check_helix_p4d_health -p localhost:1666 --licensecheck -l 45 # # Multiple arguments can be combined to run multiple tests # # check_helix_p4d_health -p localhost:1666 --licensecheck -l 45 --p4diskcheck # ############################################################################### # Defaults - Can edit or override at command invocation LICEXPIRE=30 PIDCOUNT=100 MONCOUNT=100 LOWDISKSPACE=95 REPDIFFERENCE=100000 PROGNAME=$0 P4BIN=/usr/local/bin/p4 PORT=${P4PORT} USER=${P4USER} STATE_OK=0 STATE_WARNING=1 STATE_CRITICAL=2 STATE_UNKNOWN=3 exitstatus=$STATE_OK set_status() { if [ $1 -gt $exitstatus ] then exitstatus=$1 fi } # Helper function to get word from line get_word(){ echo $1 | cut -f$2 -d" "; } license_check() { LICENSE_EXPIRE_TIP="Tip: Please contact sales@perforce.com to request a new license file. " License_line=$(echo "$RESULT" | grep 'Server license\:') if [ $(echo $License_line|grep ': none'|wc -l) -gt 0 ] then echo "WARNING: P4D server on $PORT is unlicensed." set_status $STATE_WARNING else Expires=$(echo $License_line| cut -f2 -d"(" | cut -f1 -d")"|rev|cut -f1 -d " "|rev) Expires_Sec=$(date -ud $Expires +'%s') Today=$(date +'%s') ((TIME_LEFT=($Expires_Sec - $Today)/60/60/24)) if [ $TIME_LEFT -lt $LICEXPIRE ] then echo "WARNING: License expires in $TIME_LEFT days." echo "$LICENSE_EXPIRE_TIP" set_status $STATE_WARNING fi fi } server_up() { SERVERDOWN_TIP="Tip: Check if the 'p4d' process is running on the box. Check the P4D log file for errors if it unexpectedly stopped. " RESULT=$($P4 info 2>&1) SERVERUP=`echo $RESULT | grep "Server license" | wc -l` case $SERVERUP in [1]) ;; *) echo "CRITICAL: P4D server not responding!" echo "$RESULT" echo "$SERVERDOWN_TIP" set_status $STATE_CRITICAL ;; esac } pid_check(){ PIDCHECK_TIP="Tip: This may be caused by a performance problem or by a script that has gone wild. Use 'netstat -anp' and '$P4 monitor show -ael' to find the culprits. " PIDS=$(netstat -anp 2>&1 | grep $PORT | wc -l) if [ $PIDS -gt $PIDCOUNT ] then echo "WARNING: $PIDS running p4d pids exceeded threshold $PIDCOUNT." echo "$PIDCHECK_TIP" set_status $STATE_WARNING fi } p4monitor_check() { P4MONCHECK_TIP="Tip: This may be caused by a performance problem or by a script that has gone wild. Run 'p4 monitor -ael' and look if most of the commands are run by the same user or if there is one very long running command. If you need assistance collect the 'p4 monitor -ael' output and the P4D log file and send the to 'support@perforce.com'. " RESULT=$($P4 monitor show -ael 2>&1) if [ $? -gt 0 ] then echo "WARNING: 'p4 monitor' check failed!" echo "$RESULT" set_status $STATE_WARNING else P4DRUNNING=$(echo "${RESULT}" | wc -l) if [ ${P4DRUNNING} -gt $MONCOUNT ] then echo "WARNING: ${P4DRUNNING} running p4d commands exceeded threshold $MONCOUNT." echo "$P4MONCHECK_TIP" set_status $STATE_WARNING fi fi } p4replication_check() { echo TBD P4REPCHECK_TIP="Tip: There may be a problem with the journal file on the master or the replica may have been switched off. Check if the P4D is runnning on the replica, that the replica has network connectivity to the master server and that there are no errors in the replica log file. " WARNING_NEEDED=0 RESULT=$($P4 servers --replication-status 2>&1) if [ $? -gt 0 ] then echo "WARNING: 'p4 server --replication-status' check failed!" echo "$RESULT" set_status $STATE_WARNING else while read line do REP_NAME=$( get_word "$line" 1 ) SRC_JNL=$( get_word "$line" 5 | sed "s/\///g" ) REP_JNL=$( get_word "$line" 6 | sed "s/\///g" ) JNL_DIFF=$(( ${SRC_JNL} - ${REP_JNL} )) if [ ${JNL_DIFF} -gt ${REPDIFFERENCE} ] then echo "WARNING: ${REP_NAME} replica may not be in sync with master." echo " ${line}" set_status ${STATE_WARNING} WARNING_NEEDED=1 fi done <<< "${RESULT}" if [ "${WARNING_NEEDED}" -eq 1 ]; then echo "$P4REPCHECK_TIP"; fi fi } p4disk_check() { P4DISKCHECK_TIP="Tip: Failure to provide enough disk space for temporary actions may cause corruption of the Helix system or backups." WARNING_NEEDED=0 RESULT=$($P4 diskspace 2>&1) if [ $? -gt 0 ] then echo "WARNING: Disk space check failed!" echo "$RESULT" set_status $STATE_WARNING else while read line do VOLUME=$( get_word "$line" 1 ) USED=$( get_word "$line" 11 | cut -f2 -d "("|cut -f1 -d "%") if [ $USED -ge $LOWDISKSPACE ] then echo "WARNING: ${USED}% disk space used on ${VOLUME} volume." set_status $STATE_WARNING WARNING_NEEDED=1 fi done <<< "${RESULT}" if [ "${WARNING_NEEDED}" -eq 1 ]; then echo "$P4DISKCHECK_TIP"; fi fi } print_usage() { echo "$USAGE" } print_help() { echo $PROGNAME $Revision$ echo "" echo " Description: $DESCRIPTION" echo "" print_usage echo "" } # Main() exitstatus=$STATE_OK #default while test -n "$1"; do case "$1" in --help|--version|-h|-V) print_help exit $STATE_OK ;; -u) USER=$2 shift ;; -p) PORT=$2 shift ;; -C|--pidcheck) PIDCHECK=true ;; -c) PIDCOUNT=$2 shift ;; -D|--p4diskcheck) P4DISKCHECK=true ;; -d) LOWDISKSPACE=$2 shift ;; -L|--licensecheck) LICCHECK=true ;; -l) LICEXPIRE=$2 shift ;; -M|--p4monitorcheck) P4MONCHECK=true ;; -m) MONCOUNT=$2 shift ;; -R|--p4repcheck) P4REPCHECK=true ;; -r) REPDIFFERENCE=$2 shift ;; --all) LICCHECK=true PIDCHECK=true P4MONCHECK=true P4DISKCHECK=true P4REPCHECK=true ;; --exitstatus) exitstatus=$2 exit $exitstatus shift ;; --debug) set -x ;; *) echo "Unknown argument: $1" print_usage exit $STATE_UNKNOWN ;; esac shift done # Setup p4 if [[ ! -f ${P4BIN} ]]; then echo "WARNING: ${P4BIN} is not executable. No checks run.";exit $STATE_WARNING ; fi P4="${P4BIN} -p ${PORT} -u ${USER}" # Run tests server_up if [ $exitstatus -lt 2 ] then if [ "$LICCHECK" == "true" ]; then license_check; fi if [ "$PIDCHECK" == "true" ]; then pid_check; fi if [ "$P4MONCHECK" == "true" ]; then p4monitor_check; fi if [ "$P4DISKCHECK" == "true" ]; then p4disk_check; fi if [ "$P4REPCHECK" == "true" ]; then p4replication_check; fi fi exit $exitstatus