verify_sdp.sh #6

  • //
  • guest/
  • perforce_software/
  • sdp/
  • main/
  • Server/
  • Unix/
  • p4/
  • common/
  • bin/
  • verify_sdp.sh
  • View
  • Commits
  • Open Download .zip Download (40 KB)
#!/bin/bash
#==============================================================================
# Copyright and license info is available in the LICENSE file included with
# the Server Deployment Package (SDP), and also available online:
# https://swarm.workshop.perforce.com/projects/perforce-software-sdp/view/main/LICENSE
#------------------------------------------------------------------------------

# verify_sdp.sh
# Verifies SDP structure and environment.

#==============================================================================
# Declarations and Environment

export VS_SDP_P4CBIN="/p4/common/bin"
export VS_SDP_ENV="$VS_SDP_P4CBIN/p4_vars"
export SDP_INSTANCE="${SDP_INSTANCE:-UnsetSDPInstance}"
export VS_SDP_OWNER

declare Version=5.12.0
declare SDPVersionA=
declare SDPVersionB=
declare SDPVersionC=
declare ThisScript="${0##*/}"
declare CmdArgs="$*"
declare CmdLine="$0 $CmdArgs"
declare -i ServerOnline=0
declare -i ErrorCount=0
declare -i WarningCount=0
declare -i CheckCount=0
declare -i SilentMode=0
declare -i DoCrontabTest=1
declare -i DoLicenseTest=1
declare -i DoVersionTest=1
declare -i DoExcessBinaryTest=1
declare -i DoMasterIDTest=1
declare -i ExcessServerBinariesFound=0
declare -i P4DServer=0
declare -i P4BrokerServer=0
declare -i P4ProxyServer=0
declare SkipTestList=
declare BadLog=
declare ThisUser=
declare LicenseInfo=
declare LicenseExpiration=
declare CurrentTime=
declare ExpirationTime=
declare TimeDiff=
declare DaysDiff=
declare LicenseDaysExpireWarning=14
declare LinkP4ROOT=
declare LinkOfflineDB=
declare H1="=============================================================================="
declare H2="------------------------------------------------------------------------------"
declare Log="Unset"
export P4TMP="Unset"
declare TmpFile=

#==============================================================================
# Local Functions

# Note: This script does not use SDP library files, as its purpose is to
# verify the integrity of an SDP installation.  Thus, it has its own
# self-contained versions of some functions for which similar versions
# would normally be sourced from files in /p4/common/lib, like libbcore.sh.

# Micro-functions, small or one-liners used to avoid external dependencies,
# which is critical for this particular verify_sdp.sh script.
function msg () { echo -e "$*"; }
function errmsg () { msg "\\nError: ${1:-Unknown Error}\\n"; ErrorCount+=1; }
function warnmsg () { msg "\\nWarning: ${1:-Unknown Warning}\\n"; WarningCount+=1; }
function bail () { errmsg "${1:-Unknown Error}"; exit "${2:-1}"
}

#------------------------------------------------------------------------------
# This function takes as input an SDP version string, and returns a version
# id of the form YYYY.N.CL, where YYYY is the year, N is an incrementing
# release ID with a given year, and CL is a changelist identifier. The
# YYYY.N together comprise the major version, often shortened to YY.N, e.g.
# r20.1 for the 2020.1 release.
#
# The full SDP Version string looks something like this:
# Rev. SDP/MultiArch/2019.3/26494 (2020/04/23).
#
# This function parses that full string and returns a value like: 2019.3.26494
function get_sdp_version_from_string () {
   local versionString="${1:-}"
   local version=
   version="20${versionString##*/20}"
   version="${version%% *}"
   version="${version/\//.}"

   [[ "$version" == "20" || "$version" == "200" ]] && version="Unknown"
   echo "$version"
}

#------------------------------------------------------------------------------
# Function: run ($cmd, $desc, $showOutput)
#
# Runs a command, with optional description, showing command line to execute
# and optionally also the output, and capturing and returning the exit code.
#
# Input:
# $1 - Command and arguments to execute. Defaults to 'echo'.
# $2 - Optional message to display describing what the command is doing.
# $3 - Numeric flag to show output; '1' indicates to show output, 0 to
#      suppress it.
#------------------------------------------------------------------------------
function run () {
   local cmd="${1:-echo}"
   local desc="${2:-}"
   local -i showOutput="${3:-1}"
   local -i exitCode=
   local log

   log="$(mktemp "$P4TMP/run.XXXXXXXXXXX")"

   [[ -n "$desc" ]] && msg "$desc"
   msg "Executing: $cmd"
   $cmd > "$log" 2>&1
   exitCode=$?

   if [[ "$showOutput" -eq 1 ]]; then
      echo "EXIT_CODE: $exitCode" >> "$log"
      cat "$log"
   fi

   /bin/rm -f "$log"
   return "$exitCode"
}

#------------------------------------------------------------------------------
# Function: usage (required function)
#
# Input:
# $1 - style, either -h (for short form) or -man (for man-page like format).
# The default is -h.
#
# $2 - error message (optional).  Specify this if usage() is called due to
# user error, in which case the given message displayed first, followed by the
# standard usage message (short or long depending on $1).  If displaying an
# errror, usually $1 should be -h so that the longer usage message doesn't
# obsure the error message.
#
# Sample Usage:
# usage 
# usage -man
# usage -h "Incorrect command line usage."
#
# This last example generates a usage error message followed by the short
# '-h' usage summary.
#------------------------------------------------------------------------------
function usage {
   declare style=${1:--h}
   declare errorMessage=${2:-Unset}

   if [[ $errorMessage != Unset ]]; then
      echo -e "\\n\\nUsage Error:\\n\\n$errorMessage\\n\\n" >&2
   fi

# tag::includeManual[]
   echo "USAGE for verify_sdp.sh v$Version:

verify_sdp.sh [<instance>] [-online] [-skip <test>[,<test2>,...]] [-si] [-L <log>|off ] [-D]

   or

verify_sdp.sh -h|-man
"
# end::includeManual[]
   if [[ $style == -man ]]; then
      # tag::includeManual[]
      echo -e "DESCRIPTION:

	This script verifies the current SDP setup for the specified instance,
	and also performs basic health checks of configured servers.

	This uses the SDP instance bin directory /p4/N/bin to determine
	what server binaries (p4d, p4broker, p4p) are expected to be configured
	on this machine.

	Existence of the '*_init' script indicates the given binary is
	expected. For example, for instance 1, if /p4/1/bin/p4d_1_init
	exists, a p4d server is expected to run on this machine.

	Checks may be executed or skipped depending on what servers are
	configured. For example, if a p4d is configured, the \$P4ROOT/server.id
	file should exist. If p4p is configured, the 'cache' directory
	should exist.

OPTIONS:
 <instance>
	Specify the SDP instances.  If not specified, the SDP_INSTANCE
	environment variable is used instead.  If the instance is not
	defined by a parameter and SDP_INSTANCE is not defined,
	exits immediately with an error message.

 -online
	Online mode.  Does additional checks that require P4D to be online.
 
 -skip <test>[,<test2>,...]

	Specify a comma-delimited list of test names to skip.
	
	Valid test names:

	* crontab: Skip crontab check. Use this if you do not expect crontab to
	be configured, perhaps if you use a different scheduler.
	* license: Skip license related checks.
	* version: Skip version checks.
	* excess: Skip checks for excess copies of p4d/p4p/p4broker in PATH.
	* masterid: Skip check to ensure ServerID of master starts with 'master'.

	As an alternative to using the '-skip' command, the shell environment
	variable VERIFY_SDP_SKIP_TEST_LIST can be set to a comma-separated
	list of test names to skip.  Using the command line parameter is the
	best choice for temporarily skipping tests, while specifying the
	environment variable is better for making permanent exceptions (e.g.
	always excluding the crontab check if crontabs are not used at this
	site).  The variable should be set in /p4/common/config/p4_N.vars.

	If the '-skip' option is provided, the VERIFY_SDP_SKIP_TEST_LIST
	variable is ignored (not appended to). So it may make sense to
	reference the variable on the command line. For example, if the
	value of the variable is 'crontab', to skip crontab and license
	checks, you could specify:

	-skip \$VERIFY_SDP_SKIP_TEST_LIST,license

 -si	Silent mode, useful for cron operation.  Both stdout and stderr
	are still captured in the log.  The '-si' option cannot be used
	with '-L off'.

 -L <log>
	Specify the log file to use.  The default is /p4/N/logs/verify_sdp.log
	The special value 'off' disables logging to a file.

	Note that '-L off' and '-si' are mutually exclusive.

 -D	Set extreme debugging verbosity.

HELP OPTIONS:
 -h	Display short help message
 -man	Display man-style help message

EXAMPLES:
	Example 1: Typical usage:

	This script is typically called after SDP update with only the instance
	name or number as an argument, e.g.:

	verify_sdp.sh 1

	Example 2: Skipping some checks.

	verify_sdp.sh 1 -skip crontab

	Example 3: Automation Usage

	If used from automation already doing its own logging, use -L off:

	verify_sdp.sh 1 -L off

LOGGING:
	This script generates a log file and also displays it to stdout at the
	end of processing.  By default, the log is:
	/p4/N/logs/verify_sdp.log.

	The exception is usage errors, which result an error being sent to
	stderr followed usage info on stdout, followed by an immediate exit.

	If the '-si' (silent) flag is used, the log is generated, but its
	contents are not displayed to stdout at the end of processing.

EXIT CODES:
	An exit code of 0 indicates no errors were encounted attempting to
	perform verifications, and that all checks verified cleanly.
"
   # end::includeManual[]

   fi

   exit 1
}

#------------------------------------------------------------------------------
# Function: terminate
function terminate
{
   # Disable signal trapping.
   trap - EXIT SIGINT SIGTERM

   [[ "$Log" == "off" ]] || msg "\\nLog is: $Log\\n${H1}\\n"

   # With the trap removed, exit.
   exit "$ErrorCount"
}

#------------------------------------------------------------------------------
# Function: do_preflight_checks ($instance)
#
# If preflight checks fail, further tests are aborted. Failure of the very
# basic preflight checks is an indication that the SDP structure is in
# need of repair.
#
# Sample Usage:
# do_preflght_checks "$SDP_INSTANCE" ||\
#    bail "Preflight checks failed. Aborting further checks."
#------------------------------------------------------------------------------
function do_preflight_checks () {

   local instance="${1:-}"
   local toolsList="awk date grep head id ls tail tee which"

   msg "$H2\\nDoing preflight sanity checks."
   msg "Preflight Check: Ensuring these utils are in PATH: $toolsList"

   for tool in $toolsList; do
      CheckCount+=1
      [[ -z "$(command -v "$tool")" ]] && \
         errmsg "Tool '$tool' not in PATH."
   done

   [[ $ErrorCount -eq 0 ]] || return 1

   msg "Verified: Essential tools are in the PATH."

   msg "Preflight Check: cd $VS_SDP_P4CBIN"

   CheckCount+=1
   if cd "$VS_SDP_P4CBIN"; then
      cd "$OLDPWD" || bail "Failed to cd to $OLDPWD. Aborting."
   else
      errmsg "Could not cd to: $VS_SDP_P4CBIN"
      return 1
   fi

   msg "Verified: cd works to: $VS_SDP_P4CBIN"

   msg "Preflight Check: Checking current user owns $VS_SDP_P4CBIN"

   # shellcheck disable=SC2012
   VS_SDP_OWNER="$(ls -ld . | awk '{print $3}')"
   ThisUser="$(id -n -u)"

   CheckCount+=1
   if [[ "$ThisUser" == "$VS_SDP_OWNER" ]]; then
      msg "Verified: Current user [$ThisUser] owns $VS_SDP_P4CBIN"
   else
      errmsg "Current user [$ThisUser] does not own $VS_SDP_P4CBIN. This most likely means this script is running as the wrong user.  It could also mean the $VS_SDP_P4CBIN directory is not owned by by the correct owner, which should be the OS account under which the p4d process runs."
      return 1
   fi

   msg "Preflight Check: Checking /p4 and /p4/<instance> are local dirs."
   if ! check_local_instance_home_dir "$instance"; then
      errmsg "The SDP /p4 and /p4/<instance> dirs are NOT local."
      return 1
   fi

   return 0
}

#------------------------------------------------------------------------------
# Function: check_file ($file, $errMsg, $warningOnly)
#
# Checks for existence of a file. Returns 0 if it exists, 1 otherwise.
#
# Allows optional custom error message describing the file, to be displayed if
# the file is missing.  Default error message is "Missing file [FILE]."
#------------------------------------------------------------------------------
function check_file () {
   local file=$1
   local errMsg=${2:-Missing file}
   local warningOnly=${3:-0}
   CheckCount+=1
   msg "Checking existence of file $file"
   if [[ "$warningOnly" -eq 0 ]]; then
      [[ -f "$file" ]] && return 0
      errmsg "$errMsg: [$file]."
   else
      [[ -f "$file" ]] && return 0
      warnmsg "$errMsg: [$file]."
   fi
   return 1
}

#------------------------------------------------------------------------------
# Function: check_file_x ($file, $errMsg, $warningOnly)
#
# Checks existence of a file with executable bit set. Returns 0 if it exists
# and is executable, 1 otherwise.
#
# Allows optional custom error message describing the file, to be displayed if
# the file is missing.  Default error message is "File not executable [FILE]."
#------------------------------------------------------------------------------
function check_file_x () {
   local file=$1
   local errMsg=${2:-File not executable}
   local warningOnly=${3:-0}
   CheckCount+=1
   msg "Checking executable file $file"
   if [[ "$warningOnly" -eq 0 ]]; then
      [[ -x "$file" ]] && return 0
      errmsg "$errMsg: [$file]."
   else
      [[ -x "$file" ]] && return 0
      warnmsg "$errMsg: [$file]."
   fi
   return 1
}

#------------------------------------------------------------------------------
# Function: check_file_dne ($file, $errMsg, $warningOnly)
#
# Confirm that a specific file does not exist, e.g. a semaphore file.  If the
# specified files does not exist, return 0, or 1 if it exists. This is the
# opposite of check_file().
#
# Allows optional custom error message describing the file, to be displayed if
# the file is found.  Default error message is "This file should not exist: [FILE]."
#------------------------------------------------------------------------------
function check_file_dne () {
   local file=$1
   local errMsg=${2:-This file should not exist}
   local warningOnly=${3:-0}
   CheckCount+=1
   msg "Confirming this file does not exist: $file"
   if [[ "$warningOnly" -eq 0 ]]; then
      [[ ! -f "$file" ]] && return 0
      errmsg "$errMsg: [$file]."
   else
      [[ ! -f "$file" ]] && return 0
      warnmsg "$errMsg: [$file]."
   fi
   return 1
}

#------------------------------------------------------------------------------
# Function: check_configurable ($instance, $configurable, $scope, $expectedVal, $errMsg1, $errMsg2)
#
# Check that a configurable is set, and optionally check that it is set to
# an expected value.
#
# Inputs:
# $1 - SDP Instance. Required.
# $2 - Configurable name. Required.
# $3 - Configurable scope/ServerID, as per 'p4 help configure'.  The default
#      is "any", meaning what it means with 'p4 configure set', i.e. that the
#      configurable is a global default.  The special value 'ALL' can
#      also be supplied parameter, which is has the special meaning of checking
#      if the configurable is defined for any ServerID, including the 'any'
#      value.  The value returned is that of the first setting encountered.
# $4 - Expected value of configurable. Optional. If defined, an additional check is
#      done, checking the current value against the expected value.  Optionally,
#      the special value UNDEF can be used, which reverses the exit code, such
#      that a happy zero is returned only if the value is not set.
# $5 - Optional error message to display if no value is defined.  See code
#      below for the default message.
# $6 - Optional error message to display if a value is defined but does not
#      match the expected value.  See code below for the default message.
#
# Return Codes:
# 1 - Verifications failed.
# 0 - Verifications passed.
# 
# Sample Usage: 
# check_configurable "$SDP_INSTANCE" journalPrefix
#
# check_configurable "$SDP_INSTANCE" journalPrefix any "$CHECKPOINTS/$P4SERVER"
#
# check_configurable "$SDP_INSTANCE" journalPrefix any "$CHECKPOINTS/$P4SERVER" ||\
#   bail "Yikes, journalPrefix is not set, all bets are off. Aborting."
#------------------------------------------------------------------------------
function check_configurable () {
   local instance="$1"
   local configurable="$2"
   local scope="${3:-any}"
   local expectedValue="${4:-NoExpectedValue}"
   local errMsgMissing="${5:-No value defined}"
   local errMsgBadValue="${6:-Value does not match what is expected}"
   local detectedScope=
   local value=

   CheckCount+=1

   if [[ ! -r "$P4ROOT"/db.config ]]; then
      warnmsg "Skipping check for configurable $configurable; no db.config."
      return 1
   fi

   if [[ "$scope" != "ALL" ]]; then
      value=$($P4DBIN -r "$P4ROOT" -cshow | grep "^${scope}: ${configurable} = ")
   else
      value=$($P4DBIN -r "$P4ROOT" -cshow | grep ": ${configurable} = " | head -1)
      detectedScope="$value"
      value=${value##* = }
      detectedScope="${detectedScope%%:*}"
   fi

   if [[ "$expectedValue" != "UNDEF" ]]; then
      if [[ -n "$value" ]]; then
         value=${value##* = }
         if [[ "$scope" != "ALL" ]]; then
            msg "Verified: Configurable ${scope}:${configurable} is defined."
         else
            msg "Verified: Configurable ${configurable} is defined for at least once."
         fi
      else
         errmsg "$errMsgMissing for configurable [${scope}:${configurable}]."
         return 1
      fi
   else
      if [[ -n "$value" ]]; then
         if [[ "$scope" != "ALL" ]]; then
            errmsg "Configurable ${configurable} should not be set with 'p4 configure set' but has a value for ServerID ${scope} of: ${value}"
            return 1
         else
            errmsg "Configurable ${configurable} should not be set with 'p4 configure set' but has a value for ServerID ${detectedScope} of: ${value} (and possibly for other ServerIDs)."
            return 1
         fi
      else
         if [[ "$scope" != "ALL" ]]; then
            msg "Verified: Configurable ${scope}:${configurable} is undefined."
         else
            msg "Verified: Configurable ${configurable} is undefined."
         fi
      fi
   fi

   [[ "$expectedValue" == "NoExpectedValue" ]] && return 0

   CheckCount+=1

   if [[ "$expectedValue" != "UNDEF" ]]; then
      if [[ "$value" == "$expectedValue" ]]; then
         msg "Verified: Configurable ${scope}:${configurable} has expected value [$value]."
      else
         errmsg "$errMsgBadValue for variable [${scope}:${configurable}]\\n\\tExpected value: [$expectedValue]\\n\\tActual value:   [$value]"
         return 1
      fi
   fi

   return 0
}

#------------------------------------------------------------------------------
# Function: check_env_var ($instance, $var, $expectedval, $msg1, $msg2)
#
# Check that a shell environment variable is set when sourcing the SDP
# environment. Optionally checks that variables are set to expected values.
#
# Inputs:
# $1 - SDP Instance. Required.
# $2 - Variable name. Required.
# $3 - Expected value of variable. Optional. If defined, an additional check is
#      done, checking the current value against the expected value.
# $4 - Optional error message to display if no value is defined.  See code
#      below for the default message.
# $5 - Optional error message to display if a value is defined but does not match
#      the expected value.  See code below for the default message.
# 
# Return Codes:
# 1 - Verifications failed.
# 0 - Verifications passed.
# Sample Usage: 
# check_env_var $SDP_INSTANCE P4JOURNAL "/p4/$SDP_INSTANCE/logs/journal"
#
# check_env_var $SDP_INSTANCE P4JOURNAL "/p4/$SDP_INSTANCE/logs/journal" ||\
#   bail "Yikes, P4JOURNAL is not set, all bets are off. Aborting."
#------------------------------------------------------------------------------
function check_env_var () {
   local instance="$1"
   local var="$2"
   local expectedValue="${3:-NoExpectedValue}"
   local errMsgMissing="${4:-No value defined}"
   local errMsgBadValue="${5:-Value does not match what is expected}"
   local value=
   CheckCount+=1

   eval unset "${var}"
   # shellcheck disable=SC1090
   source "$VS_SDP_ENV" "$instance"

   set +u
   if [[ -n "$(eval echo \$"${var}")" ]]; then
      msg "Verified: Variable ${var} is defined."
      set -u
   else
      errmsg "$errMsgMissing for variable [$var]."
      set -u
      return 1
   fi

   [[ "$expectedValue" == "NoExpectedValue" ]] && return 0

   CheckCount+=1
   value="$(eval echo \$"${var}")"

   if [[ "$value" == "$expectedValue" ]]; then
      msg "Verified: Variable ${var} has expected value [$value]."
   else
      errmsg "$errMsgBadValue for variable [$var]\\n\\tExpected value: [$expectedValue]\\n\\tActual value:   [$value]"
      return 1
   fi

   return 0
}

#------------------------------------------------------------------------------
# Function: check_local_instance_home_dir ($instance)
#
# Check that the '/p4' directory and the instance home directory '/p4/N' are
# local directories on the root volume, per SDP structural intent.
#
# Inputs:
# $1 - SDP Instance. Required.
#
# Return Codes:
# 0 - Verifications were able to at least run; ErrorCount is incremented
#     if tests fail.
# 1 - Verifications could not even complete. This is a pre-flight failure.
#
# This increments globals CheckCount and possibly ErrorCount.
#
# Sample Usage: 
# check_local_instance_home_dir "$SDP_INSTANCE" ||\
#    bail "Error checking p4dir and/dor local instance home dir."
#------------------------------------------------------------------------------
function check_local_instance_home_dir () {
   local instance="$1"
   local p4Dir="/p4"
   local p4HomeDir="$p4Dir/$instance"

   CheckCount+=1
   if [[ "$P4HOME" == "$p4HomeDir" ]]; then
      msg "Verified: P4HOME has expected value: $p4HomeDir"
   else
      errmsg "P4HOME has unexpected value: $p4HomeDir"
   fi

   CheckCount+=1
   if [[ -L "$p4HomeDir" ]]; then
      errmsg "This is a symlink; it should be a local directory: $p4HomeDir"
   else
      msg "Verified: This P4HOME path is not a symlink: $p4HomeDir"
   fi

   CheckCount+=1
   if cd "$p4Dir"; then
      msg "Verified: cd to $p4Dir OK."
      CheckCount+=1
      if [[ "$(pwd -P)" == "$p4Dir" ]]; then
         msg "Verified: Dir $p4Dir is a local dir."
      else
         errmsg "Dir $p4Dir is NOT a local dir."
      fi
      cd - > /dev/null || bail "Failed to cd to $OLDPWD. Aborting."

      CheckCount+=1
      if cd "$p4HomeDir"; then
         msg "Verified: cd to $p4HomeDir OK."
         CheckCount+=1
         if [[ "$(pwd -P)" == "$p4HomeDir" ]]; then
            msg "Verified: P4HOME dir $p4HomeDir is a local dir."
         else
            errmsg "P4HOME dir $p4HomeDir is NOT a local dir."
         fi
         cd - > /dev/null || bail "Failed to cd to $OLDPWD. Aborting."
      else
         errmsg "Could not cd to $p4HomeDir."
         return 1
      fi
   else
      errmsg "Could not cd to $p4Dir."
      return 1
   fi

   return 0
}

#==============================================================================
# Command Line Processing

declare -i shiftArgs=0

set +u
while [[ $# -gt 0 ]]; do
   case $1 in
      (-h) usage -h;;
      (-man) usage -man;;
      (-online) ServerOnline=1;;
      (-skip) SkipTestList="$2"; shiftArgs=1;;
      (-si) SilentMode=1;;
      (-L) Log="$2"; shiftArgs=1;;
      (-D) set -x;; # Debug; use 'set -x' mode.
      (-*) usage -h "Unknown command line option ($1).";;
      (*) export SDP_INSTANCE=$1;;
   esac

   # Shift (modify $#) the appropriate number of times.
   shift; while [[ $shiftArgs -gt 0 ]]; do
      [[ $# -eq 0 ]] && usage -h "Incorrect number of arguments."
      shiftArgs=$shiftArgs-1
      shift
   done
done
set -u

#==============================================================================
# Command Line Verification

[[ "$SDP_INSTANCE" == "UnsetSDPInstance" ]] && \
   usage -h "Missing <instance> parameter. The <instance> must be given as a parameter to this script, or else the \$SDP_INSTANCE environment variable defined.  It can be set by doing:\\n\\n\\tsource $VS_SDP_ENV <instance>\\n\\nor by passing in the instance name as a parameter to this script.\\n"

[[ "$SilentMode" -eq 1 && "$Log" == "off" ]] && \
   usage -h "Cannot use '-si' with '-L off'."

#==============================================================================
# Main Program

# shellcheck disable=SC1090
source "$VS_SDP_ENV" "$SDP_INSTANCE" ||\
   bail "Failed to load SDP environment for instance $SDP_INSTANCE."

# shellcheck disable=SC1090
source "$P4CBIN/backup_functions.sh" ||\
   bail "Failed to load backup_functions.sh."

[[ "${OSUSER:-Unset}" == "Unset" ]] &&\
   bail "The critical OSUSER setting is not defined in $VS_SDP_ENV. Aborting."

# If this verify_sdp.sh script is called by root, change user to OSUSER.
if [[ $(id -u) -eq 0 ]]; then
   exec su - "$OSUSER" -c "$VS_SDP_P4CBIN/${0##*/} $CmdArgs"
elif [[ $(id -u -n) != "${OSUSER:-UnknownOSUSER}" ]]; then
   bail "${0##*/} can only be run by root or $OSUSER"
fi

trap terminate EXIT SIGINT SIGTERM

if [[ -z "$SkipTestList" && -n "${VERIFY_SDP_SKIP_TEST_LIST:-}" ]]; then
   SkipTestList="$VERIFY_SDP_SKIP_TEST_LIST"
fi

if [[ -n "$SkipTestList" ]]; then
   for test in $(echo "$SkipTestList" | tr ',' ' '); do
      case "$test" in
         (cron|crontab) DoCrontabTest=0;;
         (license) DoLicenseTest=0;;
         (version) DoVersionTest=0;;
         (excess) DoExcessBinaryTest=0;;
         (masterid) DoMasterIDTest=0;;
         (*) errmsg "Invalid test name specified with '-skip': $test";;
      esac
   done
fi

# Logs should be defined to /p4/N/logs after sourcing the environment
# file above; default to /tmp for cases of incomplete environment where
# LOGS is not defined.
export LOGS="${LOGS:-/tmp}"

[[ "$Log" == "Unset" ]] && Log="${LOGS}/verify_sdp.log"

if [[ "$Log" != "off" ]]; then
   if [[ -f "$Log" ]]; then
      if [[ ! -w "$Log" ]]; then
         BadLog="$Log"
         Log="off"
         bail "Existing log file [$BadLog] is not writable. Aborting."
      fi
      rotate_log_file "$Log" ".gz"
   else
      if [[ ! -d "${LOGS}" ]]; then
         Log="off"
         bail "Logs directory [$LOGS] is not writable. Aborting."
      fi
   fi

   if ! touch "$Log"; then
      BadLog="$Log"
      Log="off"
      bail "Couldn't touch log file [$BadLog]. Aborting."
   fi

   # Redirect stdout and stderr to a log file.
   if [[ "$SilentMode" -eq 0 ]]; then
      exec > >(tee "$Log")
      exec 2>&1
   else
      exec >"$Log"
      exec 2>&1
   fi

   msg "${H1}\\nLog is: $Log"
fi

[[ "$P4TMP" != "Unset" && -d "$P4TMP" && -w "$P4TMP" ]] ||\
   bail "SDP environment must define required P4TMP variable. Value must be a directory that is writable; value is: $P4TMP"

msg "$ThisScript v$Version Starting SDP verification on host ${HOSTNAME%%.*} at $(date +'%a %Y-%m-%d %H:%M:%S %Z') with this command line:\\n$CmdLine"

msg "\\nIf you have any questions about the output from this script, contact support@perforce.com."

do_preflight_checks "$SDP_INSTANCE" ||\
   bail "Preflight checks failed. Aborting further checks."

msg "${H2}\\nChecking environment variables."
check_env_var "$SDP_INSTANCE" SDP_INSTANCE
check_env_var "$SDP_INSTANCE" P4ROOT "/p4/$SDP_INSTANCE/root"
check_env_var "$SDP_INSTANCE" P4JOURNAL "/p4/$SDP_INSTANCE/logs/journal"
check_env_var "$SDP_INSTANCE" P4MASTER_ID
check_env_var "$SDP_INSTANCE" P4MASTERHOST
check_env_var "$SDP_INSTANCE" P4MASTERPORT
check_env_var "$SDP_INSTANCE" SERVERID

if ! check_env_var "$SDP_INSTANCE" SDP_ADMIN_PASSWORD_FILE "$P4CCFG/.p4passwd.$P4SERVER.admin"; then
   check_file "$SDP_ADMIN_PASSWORD_FILE" "SDP admin password file doesn't exist"
fi

msg "${H2}\\nRunning standard checks typically called within SDP scripts."
check_vars
CheckCount+=1
set_vars

if [[ "$P4DServer" -eq 1 ]]; then
   check_dirs
   CheckCount+=1
fi

msg "${H2}\\nChecking *_init scripts in instance bin dir [/p4/$SDP_INSTANCE/bin] to see what servers are configured on this machine."

# shellcheck disable=SC2154
if [[ -e "$P4DInitScript" ]]; then
   msg "A p4d server is here."
   P4DServer=1
   check_file_x "$P4DInitScript"
fi

# shellcheck disable=SC2154
if [[ -e "$P4BrokerInitScript" ]]; then
   msg "A p4broker server is here."
   P4BrokerServer=1
   check_file_x "$P4BrokerInitScript"
fi

# shellcheck disable=SC2154
if [[ -e "$P4ProxyInitScript" ]]; then
   msg "A p4p server is here."
   P4ProxyServer=1
   check_file_x "$P4ProxyInitScript"
fi

if [[ $((P4DServer+P4BrokerServer+P4ProxyServer)) -eq 0 ]]; then
   CheckCount+=1
   errmsg "No servers (p4d, p4p, p4broker) are configured."
fi

if [[ -d /etc/system/system ]]; then
   msg "${H2}\\nChecking that systemd service files are readable."

   # shellcheck disable=SC2154
   if [[ -e "$P4DSystemdServiceFile" ]]; then
      CheckCount+=1
      if systemctl cat "${P4DBIN##*/}" > /dev/null 2>&1; then
         msg "Verified: Permissions OK to status w/o sudo for p4d service."
      else
         errmsg "Could not run 'systemctl status ${P4DBIN##*/}'."
      fi
   fi

   # shellcheck disable=SC2154
   if [[ -e "$P4BrokerSystemdServiceFile" ]]; then
      CheckCount+=1
      if systemctl cat "${P4BROKERBIN##*/}" > /dev/null 2>&1; then
         msg "Verified: Permissions OK to status w/o sudo for p4broker service."
      else
         errmsg "Could not run 'systemctl status ${P4BROKERBIN##*/}'."
      fi
   fi

   # shellcheck disable=SC2154
   if [[ -e "$P4ProxySystemdServiceFile" ]]; then
      CheckCount+=1
      if systemctl cat "${P4PBIN##*/}" > /dev/null 2>&1; then
         msg "Verified: Permissions OK to status w/o sudo for p4p service."
      else
         errmsg "Could not run 'systemctl status ${P4PBIN##*/}'."
      fi
   fi
fi

if [[ "$P4DServer" -eq 1 ]]; then
   msg "${H2}\\nChecking for a few database files."
   # Check db files exist
   for file in db.counters db.domain db.config; do
      for dir in "$P4ROOT" "$OFFLINE_DB"; do
         check_file "$dir/$file" "Expected database file doesn't exist."
      done
   done
fi

msg "${H2}\\nChecking for existence of key files."
check_file "$P4BIN" "The p4 binary (or symlink) doesn't exist."
check_file "$P4TICKETS" "The P4TICKETS file doesn't exist."

if [[ "$P4DServer" -eq 1 ]]; then
   check_file "$P4ROOT/server.id" "The required $P4ROOT/server.id file is missing."
   check_file "$OFFLINE_DB/offline_db_usable.txt" "Offline database not in a usable state."
   check_file_dne "$P4ROOT/P4ROOT_not_usable.txt" "P4ROOT is not in a usable state."
   check_file_dne "$OFFLINE_DB/P4ROOT_not_usable.txt" "Offline database has P4ROOT_not_usable.txt file."

   if [[ "$DoLicenseTest" -eq 1 ]]; then
      msg "${H2}\\nLicense Checks."

      #--------------------------------------------------------------------------
      # A sample license line looks like this:
      # License: Perforce Battle School 28 users (support ends 2020/06/01) (expires 2020/06/01)
      # Existence of 'support ends' indicates the license is valid.
      # Existence of 'expires' indicates a temp or subscription license.
      #--------------------------------------------------------------------------
      if check_file "$P4ROOT/license" "The \$P4ROOT/license file doesn't exist" 1; then
         LicenseInfo=$($P4DBIN -V | grep '^License:')
         CheckCount+=1
         if [[ -n "$LicenseInfo" ]]; then
            if [[ "$LicenseInfo" == *" expired "* ]]; then
               errmsg "The license is expired."
            elif [[ "$LicenseInfo" == *"expires"* ]]; then
               LicenseExpiration=${LicenseInfo##*(expires }
               LicenseExpiration=${LicenseExpiration%%)*}

               # Check License
               CheckCount+=1
               CurrentTime=$(date +%s 2>/dev/null)
               ExpirationTime=$(date +%s --date "$LicenseExpiration" 2>/dev/null)
               if [[ -n "$CurrentTime" && -n "$ExpirationTime" ]]; then
                  TimeDiff=$((ExpirationTime-CurrentTime))
                  DaysDiff=$((TimeDiff/(3600*24)))

                  msg "Info: License expires on $LicenseExpiration (in $DaysDiff days)."
                  if [[ "$DaysDiff" -le "$LicenseDaysExpireWarning" ]]; then
                     errmsg "License will expire within $LicenseDaysExpireWarning days."
                  fi
               else
                  msg "Skipping license check due to incompatible 'date' utility on this OS."
               fi
            elif [[ "$LicenseInfo" == *"support ends"* ]]; then
               msg "Info: License is perpetual."
            fi
         else
            errmsg "Could not determine license info from license file."
         fi
      fi
   else
      msg "Skipping license check per '-skip'."
   fi
fi

if [[ "$P4DServer" -eq 1 ]]; then
   msg "${H2}\\nChecking configurables values."

   check_configurable "$SDP_INSTANCE" P4JOURNAL ALL UNDEF
   msg "Note: P4JOURNAL must be defined as a shell environment variable and must not set in db.config.\\n"
   check_configurable "$SDP_INSTANCE" journalPrefix any "$CHECKPOINTS/$P4SERVER"
   check_configurable "$SDP_INSTANCE" server.depot.root any "$DEPOTS"
fi

if [[ "$P4DServer" -eq 1 ]]; then
   msg "${H2}\\nChecking P4ROOT and offline_db syminks."
   CheckCount+=1
   LinkP4ROOT="$(readlink "$P4ROOT")"

   if [[ "$LinkP4ROOT" == *"/db1" || "$LinkP4ROOT" == *"/db2" ]]; then
      msg "Verified: Symlink for P4ROOT points to a db1 or db2 path."
   else
      errmsg "Symlink for P4ROOT does not point to a db1 or db2 path."
   fi

   CheckCount+=1
   LinkOfflineDB="$(readlink "$OFFLINE_DB")"
   if [[ "$LinkOfflineDB" == *"/db1" || "$LinkOfflineDB" == *"/db2" ]]; then
      msg "Verified: Symlink for offline_db points to a db1 or db2 path."
   else
      errmsg "Symlink for offline_db does not point to a db1 or db2 path."
   fi

   CheckCount+=1
   if [[ "$LinkP4ROOT" != "$LinkOfflineDB" ]]; then
      msg "Verified: Symlinks for P4ROOT and offline_db do not point to the same target."
   else
      errmsg "Symlinks for P4ROOT and offline_db point to the same target."
   fi
fi

msg "${H2}\\nChecking for standard symlink/dir structure."

CheckCount+=1
if cd "$P4HOME"; then
   CheckCount+=1
   if [[ -d "$PWD/bin" ]]; then
      msg "Verified: $PWD/bin is a regular directory."
   else
      warnmsg "$PWD/bin is not a regular directory."
   fi

   linkList="logs tmp"
   [[ "$P4DServer" -eq 1 ]] && linkList+=" checkpoints depots root offline_db"
   [[ "$P4ProxyServer" -eq 1 ]] && linkList+=" cache"

   for link in $linkList; do
      CheckCount+=1
      if [[ -L "$PWD/$link" ]]; then
         msg "Verified: $PWD/$link is a symlink."
      else
         errmsg "$PWD/$link is not a symlink directory."
      fi
   done

   CheckCount+=1
   if [[ -L "/p4/common" ]]; then
      msg "Verified: /p4/common is a symlink."
   else
      errmsg "Path /p4/common is not a symlink."
   fi

   CheckCount+=1
   if [[ -L "/p4/sdp" ]]; then
      msg "Verified: /p4/sdp is a symlink."
   else
      errmsg "Path /p4/sdp is not a symlink."
   fi

   cd - > /dev/null || bail "Failed to cd to $OLDPWD. Aborting."
else
   errmsg "Could not cd to $P4HOME." 
fi

if [[ "$ServerOnline" -eq 1 ]]; then
   msg "${H2}\\nDoing online checks."

   CheckCount+=1
   if run "$P4BIN -s info -s" "Checking 'p4 -s info -s'" 1; then
      msg "Verified: 'p4 -s info -s' output is nominal."
   else
      errmsg "Could not verify the p4d server is online (P4PORT=$P4PORT)."
   fi

   CheckCount+=1
   if run "$P4BIN -s -p $P4MASTERPORT info -s" "Checking 'p4 -s info -s'" 1; then
      msg "Verified: 'p4 -s -p $P4MASTERPORT info -s' output is nominal."
   else
      errmsg "Could not verify the p4d server is online (P4MASTERPORT=$P4MASTERPORT)."
   fi

   CheckCount+=1
   if run "$P4CBIN/p4login" "Login check." 0; then
      msg "Verified: Login OK."
   else
      errmsg "Login as P4USER $P4USER to P4PORT $P4PORT could not be verified."
   fi

   CheckCount+=1
   if run "$P4CBIN/p4login -p $P4MASTERPORT" "Login check using P4MASTERPORT." 0; then
      msg "Verified: Login OK using P4MASTERPORT."
   else
      errmsg "Login as P4USER $P4USER to P4MASTERPORT $P4MASTERPORT could not be verified."
   fi
fi

if [[ "$DoCrontabTest" -eq 1 ]]; then
   msg "${H2}\\nChecking crontab for user $USER."
   CheckCount+=1
   TmpFile="$(mktemp "$P4TMP/crontab.XXXXXXXXXXX")"
   if crontab -l | grep -v '^#' > "$TmpFile"; then
      if grep -q -E '/(daily_checkpoint.sh|replica_status.sh) ' "$TmpFile"; then
         msg "Verified: Crontab for user $USER passed basic sanity check."
      else
         errmsg "Crontab for user $USER did not pass basic sanity check; missing call to daily_checkpoint.sh or replica_status.sh"
      fi
   else
      errmsg "Could not capture crontab."
   fi
   rm -f "$TmpFile"
else
   msg "Skipping crontab check per '-skip'."
fi

if [[ "$DoVersionTest" -eq 1 ]]; then
   msg "${H2}\\nChecking SDP Version Id (current and legacy methods)."

   CheckCount+=1
   if [[ -r /p4/sdp/Version ]]; then
      SDPVersionA=$(get_sdp_version_from_string "$(cat /p4/sdp/Version)")
   else
      SDPVersionA="Unknown"
      errmsg "Missing SDP Version file: /p4/sdp/Version"
   fi

   CheckCount+=1
   SDPVersionB=$(get_sdp_version_from_string "$SDP_VERSION")
   if [[ "$SDPVersionA" == "$SDPVersionB" ]]; then
      msg "SDP Version from /p4/common/bin/p4_vars matches."
   else
      errmsg "SDP Version from /p4/common/bin/p4_vars isn't set or doesn't match /p4/sdp/Version:\\n[$SDPVersionB] vs. [$SDPVersionA]"
   fi

   if [[ "$ServerOnline" -eq 1 ]]; then
      CheckCount+=1
      SDPVersionC=$(get_sdp_version_from_string "$("$P4BIN" counter SDP_VERSION)")

      if [[ "$SDPVersionA" == "$SDPVersionC" ]]; then
         msg "SDP Version from 'p4 counter SDP_VERSION' matches."
      else
         if [[ "$SDPVersionC" == "Unknown" ]]; then
            warnmsg "SDP Version from 'p4 counter SDP' is not set."
	 else
            errmsg "SDP Version from 'p4 counter SDP' doesn't match /p4/sdp/Version:\\n[$SDPVersionC] vs. [$SDPVersionA]"
	 fi
      fi
   fi
else
   msg "Skipping version checks per '-skip'."
fi

if [[ "$DoExcessBinaryTest" -eq 1 ]]; then
   msg "${H2}\\nChecking for Helix executables outside /p4/common/bin in PATH."
   CheckCount+=1

   # Check for excess Helix server binaries in PATH, and classify that as an error.
   for exe in p4d p4broker p4p; do
      for exeInPath in $(which -a $exe); do
         if [[ "${exeInPath%/*}" != *"$P4CBIN" ]]; then
            errmsg "Executable $exe found outside /p4/common/bin in PATH: ${exeInPath%/*}"
            ExcessServerBinariesFound=1
         fi
      done
   done

   # Check for excess Helix client binary in PATH, and classify that as a warning.
   for exeInPath in $(which -a p4); do
      if [[ "${exeInPath%/*}" != *"$P4CBIN" ]]; then
         warnmsg "Executable 'p4' found outside /p4/common/bin in PATH: ${exeInPath%/*}"
      fi
   done

   if [[ "$ExcessServerBinariesFound" -eq 0 ]]; then
      msg "Verified: No excess Helix server binaries found outside /p4/common/bin."
   fi
else
   msg "Skipping excess binary checks per '-skip'."
fi

if [[ "$DoMasterIDTest" -eq 1 ]]; then
   msg "${H2}\\nChecking that P4MASTER_ID value starts with 'master'."
   CheckCount+=1
   if [[ "$P4MASTER_ID" == "master"* ]]; then
      msg "Verified: The P4MASTER_ID value starts with 'master'."
   else
      errmsg "The P4MASTER_ID value ($P4MASTER_ID) does not start with 'master'."
   fi
fi

# TODO:
# - Ensure checkpoints dir contains a checkpoint or two.
# - service password verified as set for a replica (value doesn't matter, it just needs to be set).
# - Add flag to check less-critical SDP configurables, and generate
#   warnings (rather than errors) if they are not set as expected,
#   using SDP configure_new_servers.sh script as a guide.

if [[ "$ErrorCount" -eq 0 && "$WarningCount" -eq 0 ]]; then
   msg "\\n${H1}\\n\\nALL CLEAN: $CheckCount verifications completed OK."
elif [[ "$ErrorCount" -eq 0 ]]; then
   msg "\\n${H1}\\n\\nNO ERRORS: $CheckCount verifications completed, with $WarningCount warnings detected."
else
   msg "\\n${H1}\\n\\nVerifications completed, with $ErrorCount errors and $WarningCount warnings detected in $CheckCount checks."
fi

# See the terminate() function, which is really where this script exits.
exit 0
# Change User Description Committed
#24 31566 C. Thomas Tyler Released SDP 2024.2.31564 (2025/05/14).
Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#23 31369 C. Thomas Tyler Released SDP 2024.2.31367 (2025/03/23).
Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#22 31204 Will Kreitzmann Released SDP 2024.2.31193 (2025/01/17).
Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#21 31077 C. Thomas Tyler Released SDP 2024.2.31075 (2024/12/20).
Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#20 30915 C. Thomas Tyler Released SDP 2024.1.30913 (2024/11/20).
Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#19 30297 C. Thomas Tyler Released SDP 2023.2.30295 (2024/05/08).
Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#18 30043 C. Thomas Tyler Released SDP 2023.2.30041 (2023/12/22).
Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#17 29143 C. Thomas Tyler Released SDP 2022.1.29141 (2022/10/29).
Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#16 28858 C. Thomas Tyler Released SDP 2022.1.28855 (2022/05/27).
Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#15 28651 C. Thomas Tyler Released SDP 2021.2.28649 (2022/03/03).
Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#14 28259 C. Thomas Tyler Released SDP 2021.1.28253 (2021/11/13).
Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#13 28240 C. Thomas Tyler Released SDP 2021.1.28238 (2021/11/12).
Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#12 27921 C. Thomas Tyler Released SDP 2020.1.27919 (2021/07/19).
Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#11 27761 C. Thomas Tyler Released SDP 2020.1.27759 (2021/05/07).
Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#10 27416 C. Thomas Tyler Released SDP 2020.1.27414 (2021/02/07).
Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#9 27407 C. Thomas Tyler Released SDP 2020.1.27403 (2021/02/06).
Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#8 27400 C. Thomas Tyler Released SDP 2020.1.27398 (2021/02/06).
Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#7 27354 C. Thomas Tyler Released SDP 2020.1.27351 (2021/01/31).
Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#6 27331 C. Thomas Tyler Released SDP 2020.1.27325 (2021/01/29).
Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#5 26470 C. Thomas Tyler Released SDP 2019.3.26468 (2020/04/10).
Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#4 26403 C. Thomas Tyler Released SDP 2019.3.26400 (2020/03/28).
Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#3 26161 C. Thomas Tyler Released SDP 2019.3.26159 (2019/11/06).
Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#2 25380 C. Thomas Tyler Released SDP 2019.1.25374 (2019/03/21).
Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#1 25245 C. Thomas Tyler Released SDP 2019.1.25238 (2019/03/02).
Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
//guest/perforce_software/sdp/dev/Server/Unix/p4/common/bin/verify_sdp.sh
#2 25206 C. Thomas Tyler Removed logic that uses 'p4d -cset' to force the value for
P4JOURNAL, and also automatic journal rotation on server
startup.

Added related logic to verify_sdp.sh to ensure there is
one source of truth for the P4JOURNAL definition.

=== On Journal Rotation at Server Startup ===

The goal with journal rotation on server stratup is noble, to
make it so any potential journal corruption *always* appears
at the end of a numbered journal file, rather than being in
the middle of the active journal.  This can make it easier and
faster to recover from journal corruption caused by sudden power
loss, kernel panic, a p4d bug/crash, etc.

However, the implementation causes problems (noted below).

=== On Forcing P4JOURNAL ===

The goal of forcing the value of P4JOURNAL via db.config
is also noble, in that having a value anything other than
the SDP standard can really wreak havoc with things.  This
is generally not an issue in a 'fresh' SDP install, but can
be an issue (wreak havoc!) in cases where 'p4 configure' was
used to set a value for P4JOURNAL that conflicts with the
value defined by the SDP environment mechanism, which is in
turn passed to 'p4d' on the command line.  Even if the value
defined differently, it should be set in to exactly one value,
and exactly one mechanism.

The current implementation causes problems (noted below).

== Problems with setting P4JOURNAL in db.config ==

1. Things  Break

The forced P4JOURNAL set via 'p4d -cset' causes a mild form of
journal corruption that breaks 'standby' replicas using
journalcopy, as this type of replica is extremely sensitive to
the contents of every byte in the journal file, and doesn't
allow for use of 'p4d -cset' to modify the P4JOURNAL file.

While it does not cause any actual loss of data, it does require
manual reset to fix things.  In the case of a site-wide topology
with a mandatory standby replica, it causes global replication to
stall.

2. Not our Place (not the place of SDP scripts)

Based on the above and taking a step back, I think this
script behavior of forcing a back-door journal rotation is simply
too intrusive for what SDP scritps should be allowed to do.
They live to have some understanding of p4d workings, but shoulnd't
pretend to have too much insight into the inner workings of p4d.

== Problem with Always-On Journal Rotation on Start ==

1. What the wah?

This confuses admins by incrementing the journal counter
unexpectedly.  In Battle School training classes, for example,
students (Perforce admins) are confused by seemingly random
journal incrementing.  While this could be documented and trained
for, it violates the principal of least surprise, and is not
typical 'p4d' beavhior.

2. Always vs. Rare

It rotates the journal even when there is no corruption,
which of course 99.99999% or more of the time at any given site.
Anyone who has been through a corruption scenario is happy to
have the corruption at the end rather than in the middle of a
journal file -- as noted, the intent here is noble.  But before
we do any journal rotations, we should detect whether there is
corruption.  Turns out we have a means to detect journal corruption
at the end of the current/active journal file, and should employ
such detection and handle it in some approrpaite manner, e.g.
by expanding the 'force_start' logic in this p4d_base init
script.

Journal corrption detection and preliminary handling may be added
in a future SDP release.  When the journal is truly corrupted,
global replication will stall in any case, so measure like journal
file rotation may be called for in that scenario.

3. Accelerated Deletion of Backups

Increased journal counter rotations result in unexpectedly
fast removal of backups.  Admins are used to thinking that
roughly, "one journal rotation is roughly one day."  Settings
like KEEPLOGS, KEEPCKPS, and KEEPJNLS trigger off the number
of journal rotatations, not the number of actual calendar days.

Now, I think it's OK that journal rotations and days don't
match precisely.  In a typical "big deal" maintenance window,
for example, there might be an additional 1-3 journal rotations
induced by extra checkpoints or journals being created over the
course of a maintenance activity.  But there may be a dozen more
'p4d' restarts during sanity testing and playing around with things.

With the current logic, each restart causes another journal rotation.
By the end fo the weekend, your next call to daily_checkpoint might
remove more of your recent backups than you'd like or expect. (A
long standing safety feature always preserves the last few, but still
we don't want to delete more than desired.)

=== Foor for Thougt: KEEP* = numbrer of days? ===

Making it so KEEPLOGS/KEEPJNLS/KEEPCKPS mean literally number
of days rather than journal rotations is worthy of consideration.
That's beyond the scope of this change though.

#review @robert_cowham @josh
#1 24804 C. Thomas Tyler Terminology tweak, 'validate' -> 'verify'.

#review @robert_cowham
//guest/perforce_software/sdp/dev/Server/Unix/p4/common/bin/validate_sdp.sh
#3 24534 C. Thomas Tyler Various enhancements and internal refactoring for validate_sdp.sh.

Added important check that /p4/N is a dir, not a symlink.

#review-24532 @robert_cowham
#2 24356 C. Thomas Tyler Enhancements to validate_sdp.sh:
* Added simple bold ALL CLEAN message to look for.
* Added check_env_var() function to check shell environment
  variables, with some calls to it.
* Added check_configurable() to check for configurables.  This
  is implemented using back-door check methodology (using
  'p4d_N -cshow') so values can be checked with P4D offline.
  This replaced stub function check_var().
* Removed stub function check_configurables().  It's easier
  to understand if all checks in the Main section of the code.
* Changed so checks requiring p4d to be online are not done
  by default; added '-online' flag to run those tests.  This
  is because I anticpate typical usage of the validator to
  be requiring it to be report ALL CLEAN before starting
  P4D after a server upgrade.
* Added check for new $SDP_ADMIN_PASSWORD_FILE variable.
* Added check admin password file pointed to by $SDP_ADMIN_PASSWORD_FILE.
* Added errmsg() function, with corresponding tweak to bail().
* Consolidated Log an LOGIFLE to just LOGFILE.
* Removed a few items from TOOD comments that got done.
* Made a few tweaks for style normalization:
  - Functions are lowercase with undescore separators.
  - Functions vars are lowercase-initiated camelCase.
  - Indentation: 3 spaces for functions/loops/etc.
* Added run() function replacing cmd() stub function.
       * Enhanced p4login check.
* Added comment noting why this script uses self-contained
  copies of functions defined in other SDP files in /p4/common/lib.
* And other things.

Warning: In the short run, this may fail tests as the new
SDP_ADMIN_PASSWORD_FILE variable is also pending review.

#review @robert_cowham
#1 23640 Robert Cowham Super basic validation - placeholder for many more tests to come!