#!/bin/bash #============================================================================== # Copyright and license info is available in the LICENSE file included with # the Server Deployment Package (SDP), and also available online: # https://swarm.workshop.perforce.com/projects/perforce-software-sdp/view/main/LICENSE #------------------------------------------------------------------------------ #------------------------------------------------------------------------------ # sdp_health_check.sh # # Checks the SDP environment and generates report. # # Goals: # * Do only fast checks, and avoid going interactive. # * Do only checks that can be done regardless of whether p4d is running. # * Require no command line arguments. # * Work for any SDP version since 2007. # * Check for risk of impact due to SDP-341. # * Reliably determine the SDP version range, at least enough to determine # a range of revisions, i.e. pre-2016.2, 2016.2->2018.1, 2018.2+ # * Be run as the user account under which 'p4d' runs. # # Assumptions: # * The SDP has always used /p4/common/bin/p4_vars as the shell environment # file. This is consistent across all SDP versions. # * The SDP has always included a script named /p4/common/bin/p4master_run # * Calling '/p4/common/bin/p4master_run N '(where N is in instance name) # will set the shell environment for that instance. # * All versions of P4D will have a $P4ROOT/db.domain file. # * Essential utilities are in the PATH, e.g.: grep, awk, id # # Caveats: # This should NOT be run when a live checkpoint is running. #============================================================================== # Declarations and Environment # Prefix glboal vars with HC_ to avoid name collisions. export HC_SDP_ENV=/p4/common/bin/p4_vars export HC_SDP_MRUN=/p4/common/bin/p4master_run export HC_SDP_P4CBIN=/p4/common/bin export HC_SDP_INSTANCE_LIST= export HC_SDP_INSTALLED_VERSION=Unknown export HC_THISSCRIPT=${0##*/} declare -i ServerOnline=0 declare -i WarningCount=0 declare -i ErrorCount=0 declare -i CheckCount=0 declare -i ExitCode=0 declare -i OverallExitCode=0 declare Version=1.0.1 declare H1="==============================================================================" declare H2="------------------------------------------------------------------------------" if [[ -n "$(which date 2>/dev/null)" ]]; then export LOGFILE=/tmp/sdp_health_check.$(date +'%Y%m%d-%H%M%s').log else export LOGFILE=/tmp/sdp_health_check.log fi #============================================================================== # Local Functions # Note: This script does not use SDP library files, as its purpose is to # verify the integrity of an SDP installation. Thus, it has its own # self-contained versions of some functions that would normally be # sourced in from files like /p4/common/lib/libcore.sh. # Micro-functions, one-liners used to avoid external dependencies. function msg () { echo -e "$*" >> $LOGFILE; } function errmsg () { msg "\nError: ${1:-Unknown Error}\n"; } function bail () { errmsg "${1:-Unknown Error}"; OverallExitCode=1; exit ${2:-1}; } function run () { local cmd="${1:-echo}" local desc="${2:-}" local -i showOutput="${3:-1}" local log=$(mktemp) local -i exitCode= [[ -n "$desc" ]] && msg "$desc" msg "Executing: $cmd" $cmd > "$log" 2>&1 exitCode="$?" if [[ "$showOutput" ]]; then echo "EXIT_CODE: $exitCode" >> "$log" cat "$log" >> $LOGFILE fi /bin/rm -f "$log" return $exitCode } #------------------------------------------------------------------------------ # Function: usage (required function) # # Input: # $1 - style, either -h (for short form) or -man (for man-page like format). # The default is -h. # # $2 - error message (optional). Specify this if usage() is called due to # user error, in which case the given message displayed first, followed by the # standard usage message (short or long depending on $1). If displaying an # errror, usually $1 should be -h so that the longer usage message doesn't # obsure the error message. # # Sample Usage: # usage # usage -man # usage -h "Incorrect command line usage." # # This last example generates a usage error message followed by the short # '-h' usage summary. #------------------------------------------------------------------------------ function usage { declare style=${1:--h} declare errorMessage=${2:-Unset} if [[ $errorMessage != Unset ]]; then echo -e "\n\nUsage Error:\n\n$errorMessage\n\n" >&2 fi echo "USAGE for sdp_health_check.sh v$Version: sdp_health_check.sh [<instance>] or sdp_health_check.sh -h|-man " if [[ $style == -man ]]; then echo -e "DESCRIPTION: This script validates the current SDP setup for the specified instance. Useful if you change anything, particularly after an SDP upgrade. OPTIONS: -D Set extreme debugging verbosity. HELP OPTIONS: -h Display short help message -man Display man-style help message EXAMPLES: This script is typically called after with no arguments. LOGGING: This script generates a log file and also displays it to stdout at the end of processing. By default, the log is: /tmp/sdp_health_check.<datestamp>.log or /tmp/sdp_health_check.log The exception is usage errors, which result an error being sent to stderr followed usage info on stdout, followed by an immediate exit. EXIT CODES: An exit code of 0 indicates no errors or warnigns were encounted. " fi exit 1 } #------------------------------------------------------------------------------ # Function: check_file # # Check that a file exists, and give an error if it does not. The error, # describing the missing file, is configurable. # # Inputs: # $1 - File path. Required. # $2 - Error message to display if the file is missing. Optional; default # is "Missing file" #------------------------------------------------------------------------------ function check_file () { local file="$1" local err_msg="${2:-Missing file}" CheckCount+=1 msg "Checking existence of file: $file" [[ -f "$file" ]] && return 1 msg "$err_msg [$file]." ErrorCount+=1 return 0 } #------------------------------------------------------------------------------ # Function: check_configurable # # Check that a configurable is set, and optionally check that it is set to # an expected value. # # Inputs: # $1 - SDP Instance. Required. # $2 - Configurable name. Required. # $3 - Configurable scope. Default is "any" # $4 - Expected value of variable. Optional. If defined, an additional check is # done, checking the current value against the expected value. # $5 - Optional error message to display if no value is defined. See code # below for the default message. # $6 - Optional error message to display if a value is defined but does not match # the expected value. See code below for the default message. # # Return Codes: # 1 - Verifications failed. # 0 - Verifications passed. # # Sample Usage: # check_configurable $SDP_INSTANCE journalPrefix # check_configurable $SDP_INSTANCE journalPrefix any "$CHECKPOINTS/$P4SERVER" # # check_configurable $SDP_INSTANCE journalPrefix any "$CHECKPOINTS/$P4SERVER" ||\ # bail "Yikes, journalPrefix is not set, all bets are off. Aborting." #------------------------------------------------------------------------------ function check_configurable () { local instance="$1" local configurable="$2" local scope="${3:-any}" local expectedValue="${4:-NoExpectedValue}" local errMsgMissing="${5:-No value defined}" local errMsgBadValue="${6:-Value does not match what is expected}" local value= CheckCount+=1 value=$($P4DBIN -r $P4ROOT -cshow | grep "^${scope}: ${configurable} = ") if [[ -n "$value" ]]; then value=${value##* = } msg "Verified: Configurable ${scope}:${configurable} is defined." else errmsg "$errMsgMissing for configurable [${scope}:${configurable}]." ErrorCount+=1 return 1 fi [[ "$expectedValue" == "NoExpectedValue" ]] && return 0 CheckCount+=1 if [[ "$value" == "$expectedValue" ]]; then msg "Verified: Configurable ${scope}:${configurable} has expected value [$value]." else errmsg "$errMsgBadValue for variable [${scope}:${configurable}]\n\tExpected value: [$expectedValue]\n\tActual value: [$value]" ErrorCount+=1 return 1 fi return 0 } #------------------------------------------------------------------------------ # Function: check_env_var # # Check that a shell environment variable is set when sourcing the SDP # environment. Optionally checks that variables are set to expected values. # # Inputs: # $1 - SDP Instance. Required. # $2 - Variable name. Required. # $3 - Expected value of variable. Optional. If defined, an additional check is # done, checking the current value against the expected value. # $4 - Optional error message to display if no value is defined. See code # below for the default message. # $5 - Optional error message to display if a value is defined but does not match # the expected value. See code below for the default message. # # Return Codes: # 1 - Verifications failed. # 0 - Verifications passed. # Sample Usage: # check_env_var $SDP_INSTANCE P4JOURNAL "/p4/$SDP_INSTANCE/logs/journal" # # check_env_var $SDP_INSTANCE P4JOURNAL "/p4/$SDP_INSTANCE/logs/journal" ||\ # bail "Yikes, P4JOURNAL is not set, all bets are off. Aborting." #------------------------------------------------------------------------------ function check_env_var () { local instance="$1" local var="$2" local expectedValue="${3:-NoExpectedValue}" local errMsgMissing="${4:-No value defined}" local errMsgBadValue="${5:-Value does not match what is expected}" local value= CheckCount+=1 eval unset ${var} source "$SDP_ENV" "$instance" set +u if [[ -n "$(eval echo \$${var})" ]]; then msg "Verified: Variable ${var} is defined." set -u else errmsg "$errMsgMissing for variable [$var]." ErrorCount+=1 return 1 set -u fi [[ "$expectedValue" == "NoExpectedValue" ]] && return 0 CheckCount+=1 value="$(eval echo \$${var})" if [[ "$value" == "$expectedValue" ]]; then msg "Verified: Variable ${var} has expected value [$value]." else errmsg "$errMsgBadValue for variable [$var]\n\tExpected value: [$expectedValue]\n\tActual value: [$value]" ErrorCount+=1 return 1 fi return 0 } #------------------------------------------------------------------------------ # Function: get_sdp_installed_version () function get_sdp_installed_version () { export HC_SDP_INSTALLED_VERSION=Unknown declare VCheckFile=/p4/common/bin/triggers/CaseCheckTrigger.py declare VCheckVer= if [[ -r "$VCheckFile" ]]; then VCheckVer=$(grep '# Version ' $VCheckFile) if [[ -n "$VCheckVer" ]]; then if [[ "$VCheckVer" == "2.0."* ]]; then export HC_SDP_INSTALLED_VERSION=2016.2.21193 else export HC_SDP_INSTALLED_VERSION=2018.1 fi else export HC_SDP_INSTALLED_VERSION=Pre-2016.2.21193 fi else export HC_SDP_INSTALLED_VERSION=Pre-2016.2.21103 fi } #------------------------------------------------------------------------------ # Function: do_preflight_checks () function do_preflight_checks () { ExitCode=0 msg "$H2\nDoing preflight sanity checks." msg "Preflight Check 1: cd $HC_SDP_P4CBIN" CheckCount+=1 cd "$HC_SDP_P4CBIN" >> $LOGFILE 2>&1 [[ "$?" -eq 0 ]] || bail "Could not cd to: $HC_SDP_P4CBIN" msg "Verified: cd works to: $HC_SDP_P4CBIN" msg "Preflight Check 2: Checking current user owns $HC_SDP_P4CBIN" export HC_SDP_OWNER=$(ls -ld . | awk '{print $3}') export ThisUser="$(id -n -u)" CheckCount+=1 if [[ "$ThisUser" == "$HC_SDP_OWNER" ]]; then msg "Verified: Current user [$ThisUser] owns $HC_SDP_P4CBIN" else ErrorCount+=1 errmsg "Current user [$ThisUser] does not own $HC_SDP_P4CBIN." ExitCode=1 OverallExitCode=1 fi return $ExitCode } #------------------------------------------------------------------------------ # Function: get_sdp_instances () # # Get the list of SDP instances after doing some preliminary sanity # checks. function get_sdp_instances () { export HC_SDP_INSTANCE_LIST= cd /p4 for e in $(ls); do if [[ -r "/p4/$e/root/db.counters" ]]; then export HC_SDP_INSTANCE_LIST+=" $e" fi done # Trim leading space. export HC_SDP_INSTANCE_LIST=$(echo $HC_SDP_INSTANCE_LIST) } #------------------------------------------------------------------------------ # Function: check_sdp_instance () function check_sdp_instance () { export SDP_INSTANCE=${1:-UnsetSDPInstance} if [[ "$SDPInstance" == UnsetSDPInstance ]]; then errmsg "Invalid call to check_sdp_instance(), no instance parameter. Skipping further checks for this instance." return 1 fi msg "$H1\nChecking SDP instance $SDPInstance." source "$HC_SDP_ENV" "$SDP_INSTANCE" if [[ $? -ne 0 ]]; then errmsg "Failed to load SDP environment for instance $SDP_INSTANCE. Skipping fuirther checks for this isntance." return 1 fi if [[ $ErrorCount -eq 0 ]]; then msg "\n${H1}\nALL CLEAN: $CheckCount verifications completed OK for instance $SDPInstance." else msg "\n${H1}\nVerifications completed, with $ErrorCount errors detected in $CheckCount checks for instance $SDPInstance." OverallExitCode=1 fi return 0 } #------------------------------------------------------------------------------ # Function: terminate function terminate { # Disable signal trapping. trap - EXIT SIGINT SIGTERM msg "$HC_THISSCRIPT: EXITCODE: $OverallExitCode" cat "$LOGFILE" exit $OverallExitCode } #============================================================================== # Command Line Processing declare -i shiftArgs=0 set +u while [[ $# -gt 0 ]]; do case $1 in (-h) usage -h;; (-man) usage -man;; (-D) set -x;; # Debug; use 'set -x' mode. (-*) usage -h "Unknown command line flag ($1).";; (*) usage -h "Unknown command line fragment ($1).";; esac # Shift (modify $#) the appropriate number of times. shift; while [[ $shiftArgs -gt 0 ]]; do [[ $# -eq 0 ]] && usage -h "Incorrect number of arguments." shiftArgs=$shiftArgs-1 shift done done set -u #============================================================================== # Main Program trap terminate EXIT SIGINT SIGTERM msg "$HC_THISSCRIPT v$Version Starting validation at $(date +'%a %Y-%m-%d %H:%M:%S %Z')." msg "This log file is: $LOGFILE" do_preflight_checks || bail "Preflight checks failed. Aborting." get_sdp_instances if [[ -n "$HC_SDP_INSTANCE_LIST" ]]; then msg "List of SDP Instances to verify: $HC_SDP_INSTANCE_LIST" else bail "No SDP instances detected." fi for i in $HC_SDP_INSTANCE_LIST; do check_sdp_instance "$i" done msg "If you have any questions about the output from this script, contact support@perforce.com" exit $OverallExitCode
# | Change | User | Description | Committed | |
---|---|---|---|---|---|
#29 | 30827 | C. Thomas Tyler | Added check for LastSDPCheckpoint* counters | ||
#28 | 30825 | C. Thomas Tyler | Updated to 1.12.0 from SDP package. | ||
#27 | 30290 | C. Thomas Tyler | Updated to match SDP dev branch version. | ||
#26 | 29982 | C. Thomas Tyler |
Added support for SDP installations that define P4SUPER. Some SDP environments define the P4SUPER variable that, if set, is distinct from the P4USER (the default super user). If P4SUPER is defined, use it instead of P4USER to run commands that require super access, such as 'p4 configure' or 'p4 journals'. #review-29983 |
||
#25 | 29546 | C. Thomas Tyler | Just bumped Version for last change. | ||
#24 | 29545 | Robert Cowham | Check for -cshow output in case user can't login | ||
#23 | 29490 | Robert Cowham |
List linux distribution List contents of /p4/$instance/bin/ List root files in reverse size order |
||
#22 | 28616 | C. Thomas Tyler | Corrected issue with a call to older versions of the 'p4login' script. | ||
#21 | 27841 | C. Thomas Tyler |
Cosmetic/grammar and content corrections to sdp_health_check.sh docs. Non-functional change. |
||
#20 | 27839 | C. Thomas Tyler |
sdp_health_check.sh v1.7.1: * Moved 'p4 journals -m 100' call to be per-instance, fixing an error for environments where the SDP shell environment is not set and 'p4' cannot be found. This also makes it so the command is called for each instance in multi-instance environments. |
||
#19 | 27798 | C. Thomas Tyler |
sdp_health_check.sh v1.7.0: Scan the small log files that are captured, such as checkpoint.log, and detect if they report errors. Any such errors are now included in the Summary displayed at the end of processing. This will prevent the summary from reporting that no errors were detected when errors appear in the scanned SDP log files. Note that calling 'verify_sdp.sh -online' directly by this sdp_health_check.sh script is not required, as that is done by the daily_checkpoint.sh (or live_checkpoint.sh) and captured in checkpoint.log. With this change, that output (which was already being captured) is now scanned, with errors now referred to in the summary. #review-27799 @clouie rwillyoung |
||
#18 | 27778 | C. Thomas Tyler |
sdp_health_check.sh v1.6.0: * Added listing of checkpoints* directories. * Added clarity to error message re: switch_db_files(). * Fixed shellcheck compliance issues. * Removed bogus check for /p4/Version (that was never actually used). |
||
#17 | 27722 | C. Thomas Tyler |
Refinements to @27712: * Resolved one out-of-date file (verify_sdp.sh). * Added missing adoc file for which HTML file had a change (WorkflowEnforcementTriggers.adoc). * Updated revdate/revnumber in *.adoc files. * Additional content updates in Server/Unix/p4/common/etc/cron.d/ReadMe.md. * Bumped version numbers on scripts with Version= def'n. * Generated HTML, PDF, and doc/gen files: - Most HTML and all PDF are generated using Makefiles that call an AsciiDoc utility. - HTML for Perl scripts is generated with pod2html. - doc/gen/*.man.txt files are generated with .../tools/gen_script_man_pages.sh. #review-27712 |
||
#16 | 27182 | C. Thomas Tyler |
Fixed typo in output message. No functional impact. |
||
#15 | 27180 | C. Thomas Tyler | Captured basic systemd or SysV init info. | ||
#14 | 27069 | C. Thomas Tyler | Removed 'aws' as a required command line utility. | ||
#13 | 27047 | C. Thomas Tyler |
sdp_health_check.sh v1.4.4: * Adds capture of certain small log files, e.g. checkpoint.log. * Fixed error in error message about missing file. * General enhancements to output format. |
||
#12 | 27046 | C. Thomas Tyler | sdp_health_check.sh v1.4.3: Added ServerID (server.id) file check. | ||
#11 | 27045 | C. Thomas Tyler |
sdp_health_check.sh v1.4.2: * Minor doc refinements. * Added 'p4login' for each instance. * Updated location of "Version" file for coming-soon SDP r20.1. * Fixed "carry over" error where one instance reporting bad would make all instances checked after report as bad. |
||
#10 | 26813 | C. Thomas Tyler | Added a few more checks per coordination with Support. | ||
#9 | 26812 | C. Thomas Tyler | Added a few more bits to capture. | ||
#8 | 26804 | C. Thomas Tyler | Added 'crontab -l' and 'df -h' capture. | ||
#7 | 26802 | C. Thomas Tyler |
Major upgrades to sdp_health_check.sh: * SDP version checks incorporated. * Various bits of info that might be useful for Support to have are gathered, including contents of key SDP files. * If verify_sdp.sh exists, it is called. #review-26803 @robert_cowham @clouie @amo @amoriss @vkanczes @josh |
||
#6 | 25373 | C. Thomas Tyler |
Added check for broken version of backup_functions.sh, with additional info to check. |
||
#5 | 24526 | C. Thomas Tyler |
Incorporated review comments. Thanks, Robert! To Do: Add test script. Submitting so I can send the next batch of updates. |
||
#4 | 24516 | C. Thomas Tyler | Fixed doc typo. | ||
#3 | 24514 | C. Thomas Tyler | Enhanced health check. | ||
#2 | 24513 | C. Thomas Tyler | Removed excess code cruft. | ||
#1 | 24512 | C. Thomas Tyler | Added sdp_health_check.sh script. |