backup_functions.sh #57

#!/bin/bash
#==============================================================================
# Copyright and license info is available in the LICENSE file included with
# the Server Deployment Package (SDP), and also available online:
# https://swarm.workshop.perforce.com/projects/perforce-software-sdp/view/main/LICENSE
#------------------------------------------------------------------------------

set -u

# Global Variables.
export P4DInitScript=
export P4DSystemdServiceFile=
export P4BrokerInitScript=
export P4BrokerSystemdServiceFile=
export P4ProxyInitScript=
export P4ProxySystemdServiceFile=
export CKPTMPDIR=
export OFFLINE_DB=
export EDGESERVER=
export STANDBYSERVER=

# Common functions used in various SDP scripts.

#------------------------------------------------------------------------------
# Verify key variables in the shell environment exist, or else abort.
#
# If checks in this function fail, this function an 'echo' and 'exit 1'
# rather than calling 'log' or 'die', as this function is generally called
# early in processing, before the log is initialized.
#------------------------------------------------------------------------------
check_vars () {

   local CheckVarsPreflightOK=1
   CommonVars="SDP_INSTANCE P4HOME P4PORT P4ROOT P4JOURNAL P4BIN P4DBIN P4TICKETS P4TRUST KEEPCKPS KEEPJNLS KEEPLOGS CHECKPOINTS LOGS OSUSER"
   InstanceVars="P4MASTER_ID P4MASTERPORT"

   # First, check vars that should be set in /p4/common/bin/p4_vars.
   for var in $CommonVars; do
      # Detect unset variables, using ':-' to avoid 'unbound variable' errors.
      # shellcheck disable=SC1083
      if [[ -z "$(eval echo \${"$var":-})" ]]; then
         echo "Error: Required variable \$$var is NOT set. It should be set in /p4/common/bin/p4_vars."
         CheckVarsPreflightOK=0
      fi
   done

   # Next, check vars that should be set in /p4/common/config/p4_N.instance.
   # For some variables, provide additional details that help help users correct
   # the problem.
   for var in $InstanceVars; do
      # shellcheck disable=SC1083
      if [[ -z "$(eval echo \${"$var":-})" ]]; then
         echo "Error: Required variable \$$var is NOT set. It should be set in /p4/common/config/p4_N.vars, where N is the SDP instance name."

         if [[ "$var" == "P4MASTER_ID" ]]; then
            echo "The value for P4MASTER_ID should be the name of the ServerID of the master server."
         fi

         CheckVarsPreflightOK=0
      fi
   done

   if [[ "$CheckVarsPreflightOK" -eq 0 ]]; then
      echo "Use p4master_run or source p4_vars before calling this script."
      echo "Aborting to to errors in shell environment preflight checks."
      exit 1
   fi
}

#------------------------------------------------------------------------------
#  is_edge ($ServerID, $RootDir)
#
# Determine if a given ServerID is an edge server or not, checking a given
# database root dir (e.g. $P4ROOT or $OFFLINE_DB).
#
# Input:
# $1 - ServerID (required)
# $2 - RootDir (optional, defaults to $P4ROOT)
#
# Output YES if an edge server, NO otherwise.
#
#------------------------------------------------------------------------------
is_edge () {
   local ServerID=${1:-Unset}
   local RootDir=${2:-$P4ROOT}
   local ServicesData=
   local EdgeCheck=

   # Extract a slice of db.server referencing the given ServerID,
   # and then grab the field containing Services data.
   ServicesData=$("$P4DBIN" -r "$RootDir" -J off -L /dev/null -k db.server -jd - 2>&1 |\
      $GREP "@db.server@ @${ServerID}@" | "$CUT" -d '@' -f 13)

   # Do a bitwise operation to determine if the ServicesData value indicates
   # this is an edge server.
   if [[ -n "$ServicesData" ]]; then
      EdgeCheck=$((ServicesData & 4096))

      if [[ "$EdgeCheck" -gt 0 ]]; then
         echo YES
      else
         echo NO
      fi
   else
      echo NO
   fi
}

#------------------------------------------------------------------------------
# is_standby ($ServerID, $RootDir)
#
# Determine if a given ServerID is an standby server or not, checking a given
# database root dir (e.g. $P4ROOT or $OFFLINE_DB).
#
# Input:
# $1 - ServerID (required)
# $2 - RootDir (optional, defaults to $P4ROOT)
#
# Output YES if an standby server, NO otherwise.
#
#------------------------------------------------------------------------------
is_standby () {
   local ServerID="${1:-Unset}"
   local RootDir="${2:-$P4ROOT}"
   local ServicesData=

   # Extract a slice of db.server referencing the given ServerID,
   # and then grab the field containing Services data.
   ServicesData=$("$P4DBIN" -r "$RootDir" -J off -L /dev/null -k db.server -jd - 2>&1 |\
      "$GREP" "@db.server@ @${ServerID}@" | "$CUT" -d '@' -f 13 | tr -d ' ')

   # Do a check to see if the ServicesData value indicates
   # this is an standby server.
   if [[ -n "$ServicesData" ]]; then
      if [[ "$ServicesData" -eq '35141' ]]; then
         echo YES
      else
         echo NO
      fi
   else
      echo NO
   fi
}

#------------------------------------------------------------------------------
# Set variables for use in various scripts:
# OFFLINE_DB=path to offline db directory
# EDGESERVER=1 if this is an edge server, 0 otherwise.
# STANDBYSERVER=1 if this is a standby server, 0 otherwise.
#
# This must be called after loading the standard shell environment by
# doing:
# source /p4/common/bin/p4_vars N
#
# This sets P4HOME, SERVERID, etc. needed by this function.
#------------------------------------------------------------------------------
set_vars () {

   P4DInitScript="$P4HOME/bin/p4d_${SDP_INSTANCE}_init"
   P4DSystemdServiceFile="/etc/systemd/system/p4d_${SDP_INSTANCE}.service"
   P4BrokerInitScript="$P4HOME/bin/p4broker_${SDP_INSTANCE}_init"
   P4BrokerSystemdServiceFile="/etc/systemd/system/p4broker_${SDP_INSTANCE}.service"
   P4ProxyInitScript="$P4HOME/bin/p4p_${SDP_INSTANCE}_init"
   P4ProxySystemdServiceFile="/etc/systemd/system/p4p_${SDP_INSTANCE}.service"

   OFFLINE_DB="${P4HOME}/offline_db"
   CKPTMPDIR="${CHECKPOINTS}/ckp_tmp"

   # shellcheck disable=SC2153
   if [[ -n "$SERVERID" ]]; then
      if [[ "$(is_edge "$SERVERID")" == YES ]]; then
         export EDGESERVER=1
      else
         export EDGESERVER=0
      fi
   else
      export EDGESERVER=0
   fi

   if [[ -n "$SERVERID" ]]; then
      if [[ "$(is_standby "$SERVERID")" == YES ]]; then
         export STANDBYSERVER=1
         # Get commit server from P4TARGET setting in database
      else
         export STANDBYSERVER=0
      fi
   else
      export STANDBYSERVER=0
   fi

   # Ensure that SDP_ADMIN_PASSWORD_FILE is set, using existing value if set (e.g.
   # in p4_vars), otherise set it to the SDP standard value.
   export SDP_ADMIN_PASSWORD_FILE="${SDP_ADMIN_PASSWORD_FILE:-Unset}"
   if [[ "$SDP_ADMIN_PASSWORD_FILE" == Unset ]]; then
      export SDP_ADMIN_PASSWORD_FILE="$P4CCFG/.p4passwd.${P4SERVER}.admin"
   fi
}

#------------------------------------------------------------------------------
# Check if user is running as required OS user.
#------------------------------------------------------------------------------
check_uid () {
   user=$(id -un)
   if [[ "${user}" != "${OSUSER}" ]]; then
      die "Must be run by user: ${OSUSER}. Abort!"
   fi
}

#------------------------------------------------------------------------------
# Function log() - echo message to logfile or stdout.
#
# If $LOGFILE is defined, write message to the log file only; nothing goes to
# stdout.  Prepend a datestamp.
# If $LOGFILE isn't defined, just echo to stdout, w/o timestamp or.
# In all cases, support '-e' formatting.
# Input:
# $1 - message to log (must be quoted).
#------------------------------------------------------------------------------
log () {
   if [[ "${LOGFILE:-Unset}" != Unset ]]; then
      echo -n "$(date)" >> "$LOGFILE" 2>&1
      echo -e " $0: $*" >> "$LOGFILE" 2>&1
   else
      echo -e "$@"
   fi
}

#------------------------------------------------------------------------------
# Decide depending on our mail utility, how to specify sender (if we need to).
# Mail on some platforms sets sender by default.
# If the mail utility returns what looks like a version identifier
# when given the '-V' flag, use a '-S' flag.  If it does not return a
# verision identifier, don't set a mail sender option.
# Allow GNU Mailutils alternative flag instead.
#------------------------------------------------------------------------------
get_mail_sender_opt () {
   local mail_sender_opt=
   local mail_ver=
   if [[ -n "$MAILFROM" ]]; then
      mail_ver=$($SDPMAIL -V 2>&1)
      if [[ "$mail_ver" =~ "GNU Mailutils" ]]; then
         mail_sender_opt="-aFrom:$MAILFROM"
      elif  [[ "$mail_ver" =~ ^[0-9]+\.[0-9] ]]; then
         mail_sender_opt="-S from=$MAILFROM"
      fi
   fi
   echo "$mail_sender_opt"
}

#------------------------------------------------------------------------------
# Email the log file by $LOGFILE.
#------------------------------------------------------------------------------
mail_log_file () {
   local subject=$1
   local mail_sender_opt
   mail_sender_opt=$(get_mail_sender_opt)
   $SDPMAIL -s "$subject" "$mail_sender_opt" "$MAILTO" < "$LOGFILE"
}

#------------------------------------------------------------------------------
# Function die() - log message, send email, and exit.
# If $LOGFILE is defined, write message to the log file, email log,
# and exit.
# If $LOGFILE is not defined, write message to the stdout, and skip
# email.
# If in terminal session, display message to stderr as well.
#------------------------------------------------------------------------------
die () {
   # mail the error (with more helpful subject line than cron)
   log "ERROR!!! - $HOSTNAME $P4SERVER $0: $*"

   if [[ "${LOGFILE:-Unset}" != Unset ]]; then
      mail_log_file "ERROR!!! - $HOSTNAME $P4SERVER $0: $*"
   fi

   # if running from terminal, also send to stderr
   if tty >/dev/null; then
      echo -e "$@" >&2
   fi

   rm -f "${LOGS}/ckp_running.txt"

   exit 1
}

#------------------------------------------------------------------------------
# Convert various byte values (K,M,G,%) to bytes
# Pass in values such as 1024K, 512M, 1G or 10%
#------------------------------------------------------------------------------
convert_to_bytes () {
   local value=$1
   local totalsize=${2:-Undefined}
   local size=
   local unit=

   # Break up value into size (numeric) and unit (K,M,G)
   size=$("$GREP" -Eo '[[:alpha:]%]+|[0-9]+' <<< "$value" | head -1)
   unit=$("$GREP" -Eo '[[:alpha:]%]+|[0-9]+' <<< "$value" | tail -1)

   # Based on unit, convert to bytes
   case "$unit" in
      K)
         echo $((size * 1024))
         ;;
      M)
         echo $((size * 1024**2))
         ;;
      G)
         echo $((size * 1024**3))
         ;;
      %)
         echo $((totalsize * size / 100))
         ;;
   esac
}

#------------------------------------------------------------------------------
# Write a semaphore file, $LOGS/ckp_running.txt.  This file is written at
# the start of processing, and removed upon successful completion.  It
# prevents multiple concurrent operations from being launched accidentally
# e.g. by multiple human admins, or a human inadvertantly competing with a
# cron job.
#
# It is also intended to get human admins to determine the root cause of
# checkpoint failues.
#------------------------------------------------------------------------------
ckp_running() {
   if [[ -f "${LOGS}/ckp_running.txt" ]]; then
      die "Last checkpoint not complete. Check the backup process or contact support."
   fi
   echo "Checkpoint running." > "${LOGS}/ckp_running.txt"
}

#------------------------------------------------------------------------------
# Remove the ckp_running.txt semaphore file when checkpoint processing is
# complete.
#------------------------------------------------------------------------------
ckp_complete() {
   rm -f "${LOGS}/ckp_running.txt"
}

#------------------------------------------------------------------------------
# Ensure key directories are writable. Abort if they are not.
#------------------------------------------------------------------------------
check_dirs () {
   # Check that key dirs are writable
   declare -i dirsOK=1
   dirList="$OFFLINE_DB $CHECKPOINTS $LOGS"
   [[ "$EDGESERVER" -eq 1 ]] && dirList+=" ${CHECKPOINTS}.${SERVERID#p4d_}"
   for dir in $dirList; do
      if [[ ! -d "$dir" || ! -w "$dir" ]]; then
         log "Error: Dir $dir does not exist or is not writable."
         dirsOK=0
      fi
   done
   [[ "$dirsOK" -eq 1 ]] || die "Some expected dirs are missing or not writable. Aborting."
}

#------------------------------------------------------------------------------
# Add the results of df -h or df -m to the log file.
#------------------------------------------------------------------------------
check_disk_space () {
   log "Checking disk space..."
   $P4BIN diskspace >> "$LOGFILE" 2>&1
}

#------------------------------------------------------------------------------
# Check value of journal; ensure it is an integer.
#------------------------------------------------------------------------------
check_journalnum () {
   local JNLNUM=${1:-Unset}
   re='^[0-9]+$'
   if ! [[ $JNLNUM =~ $re ]] ; then
      die "The journal counter value [$JNLNUM] is invalid. It must be numeric."
   fi
}

#------------------------------------------------------------------------------
# Check the checkpoints directory for the oldest checkpoint
#------------------------------------------------------------------------------
get_ckpnum () {
   if [[ "$EDGESERVER" -eq 0 ]]; then
      # shellcheck disable=SC2034 disable=SC2012 disable=SC2016
      OLDESTCHECKPOINT=$(ls -1tr "${CHECKPOINTS}/" | "$GREP" ckp | "$GREP" -v md5 | head -n 1 | "$AWK" -F '.ckp.' '{ print $(2) }' | tr -d '.gz')
   else
      # shellcheck disable=SC2034 disable=SC2012 disable=SC2016
      OLDESTCHECKPOINT=$(ls -1tr "${CHECKPOINTS}.${SERVERID#p4d_}/" | "$GREP" ckp | "$GREP" -v md5 | head -n 1 | "$AWK" -F '.ckp.' '{ print $(2) }' | tr -d '.gz')
   fi
}

#------------------------------------------------------------------------------
# Determine journal counter by checking counter in db.counters.
#------------------------------------------------------------------------------
get_journalnum () {
   # get the current journal and checkpoint serial numbers.
   local nextCheckpointNum
   if [[ -r "$P4ROOT/db.counters" ]]; then
      nextCheckpointNum=$("$P4DBIN" -r "$P4ROOT" -k db.counters -jd - 2>&1 | grep @journal@ | cut -d '@' -f 8)

      if [[ -n "$nextCheckpointNum" ]]; then
         check_journalnum "$nextCheckpointNum"
         JOURNALNUM="$nextCheckpointNum"
      else
         # Special case: If db.counters is empty, then we have a new/empty data
         # set, so just set the value to 0.
         JOURNALNUM=0
      fi
   else
      # Special case: If db.counters doesn't exist, then we have a new/empty
      # data set, so just set the value to 0.
      JOURNALNUM=0
   fi

   # If we are on an edge server, the journal has already rotated, so we have to decrement the value
   # so that we replay the correct journal file and create the correct checkpoint number on the
   # edge server.
   #
   # In the case of a standby server, the journal rotation occurs on the master server,
   # so we don't need to increment the journal number again, so we decrement by 1.
   # Also, when replaying the journals to the offline db, we don't want to play to the live journal
   # because it is still being replicated.
   if [[ "$EDGESERVER" -eq 1 || "$STANDBYSERVER" -eq 1 ]]; then
      JOURNALNUM=$((JOURNALNUM - 1))
   fi
   CHECKPOINTNUM=$((JOURNALNUM + 1))
}

#------------------------------------------------------------------------------
# Determine journal space usage and minimum disk space requirement
#------------------------------------------------------------------------------
get_journal_stats () {
   # Get minimum disk space required on server journal filesystem before server rejects commands
   # This will return the configured and default value, but grab the configured value which shows first
   # If a configured value is not present, it will use the default value
   # shellcheck disable=SC2034 disable=SC2016
   P4JOURNALMIN=$("$P4BIN" configure show filesys.P4JOURNAL.min | "$AWK" '{ print $1 }' | $CUT -d'=' -f2 | head -1)
   # Get current journal free disk space
   # shellcheck disable=SC2034
   P4JOURNALFREE=$("$P4BIN" -ztag -F "%freeBytes%" diskspace P4JOURNAL)
   # Get total available disk space for journal
   # shellcheck disable=SC2034
   P4JOURNALTOTAL=$("$P4BIN" -ztag -F "%totalBytes%" diskspace P4JOURNAL)
}

#------------------------------------------------------------------------------
# Verify that the offline databases are usable by checking the existence
# of a 'offline_db_usable.txt' file that is written only when databases
# are in a known-good state, following successful recovery from a checkpoint.
#------------------------------------------------------------------------------
check_offline_db_usable () {
   # Check it is OK
   if [[ ! -f "$OFFLINE_DB/offline_db_usable.txt" ]]; then
      die "Offline database not in a usable state. Check the backup process."
   fi

   if [[ ! -f "$OFFLINE_DB/db.counters" ]]; then
      die "Offline database not found. Consider creating it with live_checkpoint.sh. Be aware that it locks the live system and can take a long time! Abort!"
   fi
}

#------------------------------------------------------------------------------
# Determine journal counter in offline databases.
#------------------------------------------------------------------------------
get_offline_journal_num () {
   # Get the journal number of the offline database
   check_offline_db_usable
   OFFLINEJNLNUM=$("$P4DBIN" -r "$OFFLINE_DB" -jd - db.counters 2>&1 | grep '@journal@' | cut -d "@" -f 8 2>> "$LOGFILE") || die "Cannot get the offline journal number. Abort!"
   check_journalnum "$OFFLINEJNLNUM"
   log "Offline journal number is: $OFFLINEJNLNUM"
}

#------------------------------------------------------------------------------
# Cleanup old log files.
#------------------------------------------------------------------------------
remove_old_checkpoints_and_journals () {
   declare CheckpointsDir=
   declare StandbyReplicaJournalsDir=
   declare FilePrefix=
   declare JournalPrefix=

   if [[ "$KEEPCKPS" -eq 0 ]]; then
      log "Skipping cleanup of old checkpoints because KEEPCKPS is set to 0."
   else
      log "Deleting obsolete checkpoints and journals. Keeping latest $KEEPCKPS  per KEEPCKPS setting in p4_vars."
      # For the master server, we can safely rely on the SDP standard that the journalPrefix
      # is of the form '/p4/N/checkpoints/p4_N'. For replicas and edge servers, determine
      # the values dynamically based on the current journalPrefix value for the given ServerID.
      if [[ "$SERVERID" == "$P4MASTER_ID" ]]; then
         # Refer to ckp/jnl files starting like: /p4/N/checkpoints/p4_N
         CheckpointsDir="${CHECKPOINTS}"
         FilePrefix="${P4SERVER}"
      else
         JournalPrefix="$("$P4DBIN" -r "$P4ROOT" -k db.config -jd - | grep "@${SERVERID}@ @journalPrefix@" | cut -d '@' -f 10)"
         [[ -n "$JournalPrefix" ]] || die "Could not determine journalPrefix for ServerID $SERVERID."
         CheckpointsDir="${JournalPrefix%/*}"
         FilePrefix="${JournalPrefix##*/}"
      fi

      if [[ -d "$CheckpointsDir" ]]; then
         # Remove selected checkpoint and journal files based on the KEEPCKPS
         # setting regardless of whether compressed or not.
         # We multiply KEEPCKP by 2 for the ckp files because of the md5 files.
         # shellcheck disable=SC2012
         for I_LOGFILE in $(ls -t "${CheckpointsDir}/${FilePrefix}".ckp.* 2>/dev/null | "$AWK" "NR > ($KEEPCKPS * 2)"); do
            log "rm -f $I_LOGFILE"
            rm -f "$I_LOGFILE"
         done

         # Use KEEPJNLS to allow for separate journal rotation at a higher
         # frequency.
         # shellcheck disable=SC2012
         for I_LOGFILE in $(ls -t "${CheckpointsDir}/${FilePrefix}".jnl.* 2>/dev/null | "$AWK" "NR > $KEEPJNLS"); do
            log "rm -f $I_LOGFILE"
            rm -f "$I_LOGFILE"
         done
      fi

      StandbyReplicaJournalsDir="${P4HOME}/journals.rep"
      if [[ -d "$StandbyReplicaJournalsDir" ]]; then
         # shellcheck disable=SC2012
         for I_LOGFILE in $(ls -t "$StandbyReplicaJournalsDir/${FilePrefix}".ckp.* 2>/dev/null | "$AWK" "NR > ($KEEPCKPS * 2)"); do
            log "rm -f $I_LOGFILE"
            rm -f "$I_LOGFILE"
         done

         # shellcheck disable=SC2012
         for I_LOGFILE in $(ls -t "${StandbyReplicaJournalsDir}/${FilePrefix}".jnl.* 2>/dev/null | "$AWK" "NR > $KEEPJNLS"); do
            log "rm -f $I_LOGFILE"
            rm -f "$I_LOGFILE"
         done
      fi
   fi
}

#------------------------------------------------------------------------------
# Shutdown p4d using systemd if configured for systemd. Otherwise call the
# underlying init script directly.
#
# Log the shutdown activity.
#
#------------------------------------------------------------------------------
stop_p4d () {
   log "Shutting down the ${P4DBIN##*/} server."

   if [[ -r "$P4DSystemdServiceFile" ]]; then
      { sudo systemctl stop "${P4DBIN##*/}"; } >> "$LOGFILE" 2>&1 ||\
         die "Failed to execute: sudo systemctl stop ${P4DBIN##*/}"

      # systemd will not reliably shutdown a service if it wasn't started
      # with systemd. So for good measure, follow up with a call to the
      # SysV init script.
      "$P4DInitScript" stop >> "$LOGFILE" 2>&1
   else
      "$P4DInitScript" stop >> "$LOGFILE" 2>&1
   fi

   log "Stopped ${P4DBIN##*/} server."
}

#------------------------------------------------------------------------------
# Shutdown p4broker using systemd if configured for systemd. Otherwise call the
# underlying init script directly.
#
# Log the shutdown activity.
#
#------------------------------------------------------------------------------
stop_p4broker () {
   log "Shutting down the ${P4BROKERBIN##*/} server."

   if [[ -r "$P4BrokerSystemdServiceFile" ]]; then
      { sudo systemctl stop "${P4BROKERBIN##*/}"; } >> "$LOGFILE" 2>&1 ||\
         die "Failed to execute: sudo systemctl stop ${P4BROKERBIN##*/}"

      "$P4BrokerInitScript" stop >> "$LOGFILE" 2>&1
   else
      "$P4BrokerInitScript" stop >> "$LOGFILE" 2>&1
   fi

   log "Stopped ${P4BROKERBIN##*/} server."
}

#------------------------------------------------------------------------------
# Shutdown p4p using systemd if configured for systemd. Otherwise call the
# underlying init script directly.
#
# Log the shutdown activity.
#
#------------------------------------------------------------------------------
stop_p4p () {
   log "Shutting down the ${P4PBIN##*/} server."

   if [[ -r "$P4ProxySystemdServiceFile" ]]; then
      { sudo systemctl stop "${P4PBIN##*/}"; } >> "$LOGFILE" 2>&1 ||\
         die "Failed to execute: sudo systemctl stop ${P4PBIN##*/}"

      "$P4ProxyInitScript" stop >> "$LOGFILE" 2>&1
   else
      "$P4ProxyInitScript" stop >> "$LOGFILE" 2>&1
   fi

   log "Stopped ${P4PBIN##*/} server."
}

#------------------------------------------------------------------------------
# Start p4d using systemd if configured for systemd. Otherwise call the
# underlying init script directly.
#
# Log the startup activity.
#
# Return status indicates whether the server started successfully or not.
#------------------------------------------------------------------------------
start_p4d () {

   log "Starting the ${P4DBIN##*/} server."

   if [[ -r "$P4DSystemdServiceFile" ]]; then
      { sudo systemctl start "${P4DBIN##*/}"; } ||\
         die "Failed to execute: sudo systemctl start ${P4DBIN##*/}"
   else
      "$P4DInitScript" start >> "$LOGFILE" 2>&1
   fi

   # Confirm that it started.  Success below means it did.
   if "$P4BIN" -u "$P4USER" -p "$P4PORT" info -s >/dev/null 2>&1 ; then
      log "Server $P4SERVER started successfully."
      return 0
   else
      log "Error: Server ${P4DBIN##*/} does not appear to have started. Tailing $P4LOG:"
      tail "$P4LOG" >> "$LOGFILE" 2>&1
      return 1
   fi
}

#------------------------------------------------------------------------------
# Start p4broker using systemd if configured for systemd. Otherwise call the
# underlying init script directly.
#
# Log the startup activity.
#------------------------------------------------------------------------------
start_p4broker () {

   log "Starting the ${P4BROKERBIN##*/} server."

   if [[ -r "$P4BrokerSystemdServiceFile" ]]; then
      { sudo systemctl start "${P4BROKERBIN##*/}"; } ||\
         die "Failed to execute: sudo systemctl start ${P4BROKERBIN##*/}"
   else
      "$P4BrokerInitScript" start >> "$LOGFILE" 2>&1
   fi
}

#------------------------------------------------------------------------------
# Start p4p using systemd if configured for systemd. Otherwise call the
# underlying init script directly.
#
# Log the startup activity.
#------------------------------------------------------------------------------
start_p4p () {

   log "Starting the ${P4PBIN##*/} server."

   if [[ -r "$P4ProxySystemdServiceFile" ]]; then
      { sudo systemctl start "${P4PBIN##*/}"; } ||\
         die "Failed to execute: sudo systemctl start ${P4PBIN##*/}"
   else
      "$P4ProxyInitScript" start >> "$LOGFILE" 2>&1
   fi
}

#------------------------------------------------------------------------------
# Do a front-door 'p4d admin journal' command to rotate the current/active
# journal file on the master server, starting a fresh new P4JOURNAL file.
#
# In a distributed topology with replicas/edge servers, this function must
# be called only on the master/commit server.
#------------------------------------------------------------------------------
truncate_journal () {
   declare CheckpointFile="${CHECKPOINTS}/${P4SERVER}.ckp.${CHECKPOINTNUM}.gz"
   declare JournalFile="${CHECKPOINTS}/${P4SERVER}.jnl.${JOURNALNUM}"

   if [[ "$SERVERID" == "$P4MASTER_ID" ]]; then
      [[ -f "$CheckpointFile" ]] && \
         die "Checkpoint $CheckpointFile already exists, check the backup process."
      [[ -f "$JournalFile" ]] && \
         die "Journal $JournalFile already exists, check the backup process."

      log "Truncating journal..."
      # During journal rotation, either by a front-door 'p4 admin journal' or a
      # back-door 'p4d -jj', p4d does a copy-then-delete rather than an mv at
      # the OS level.  During rotation, the perforce server will pause
      # responses to clients (just as with a checkpoint), but this should be
      # for a short period of time even for large data sets, as the journal
      # typically represents a single day of metadata.
      # Curly braces capture output of 'time'.
      "$P4CBIN"/p4login -p "$P4MASTERPORT"
      { time "$P4BIN" -p "$P4MASTERPORT" admin journal; } >> "$LOGFILE" 2>&1 || { die "Journal rotation failed. Abort!"; }
      # The test below waits until the journal file exists in the checkpoints directory before proceeding.
      test=1
      while [[ $test != 0 ]]; do
         sleep 5
         if [[ -f "$JournalFile" ]]; then
            test=0
         fi
      done
      "$P4CBIN"/p4login
   else
      log "Warning: The truncate_journal () function should only be called on the master server. It is ignored on edge and replica replica servers."
   fi
}

#------------------------------------------------------------------------------
# Call 'p4d -jj' to rotate the current/active journal file on the master
# server from an edge server, starting a fresh new P4JOURNAL file.
#
# In a distributed topology with edge and standby servers, this function can be
# used to trigger a journal rotation on master/commit server. It's not meant to
# be used from the master server itself.
#------------------------------------------------------------------------------
truncate_journal_on_master () {
   # Increment Edge journal number since the journal will increment on the master after calling journal rotation
   local EdgeJournalNum=$((JOURNALNUM + 1))
   local StandbyJournalNum=$((JOURNALNUM + 2)) # If using journalcopy, have to add 2 since live journal is in checkpoints folder
   local JournalFile=

   if [[ "$EDGESERVER" -eq 1 ]]; then
      # Refer to ckp/jnl files starting like (example ServerID=p4d_edge_nyc):
      # /p4/N/checkpoints.edge_nyc/p4_N.edge_nyc
      JournalFile="${CHECKPOINTS}.${SERVERID#p4d_}/${P4SERVER}.${SERVERID#p4d_}.jnl.${EdgeJournalNum}"
   elif [[ "$STANDBYSERVER" -eq 1 ]]; then
      JournalFile="${CHECKPOINTS}/${P4SERVER}.jnl.${StandbyJournalNum}"
   fi

   if [[ "$SERVERID" != "$P4MASTER_ID" ]]; then
      [[ -f "$JournalFile" ]] && \
         die "Journal $JournalFile already exists, check the backup process."

      log "Truncating journal on ${P4MASTERPORT}."
      # 'p4d -jj' does a copy-then-delete, instead of a simple mv.
      # During 'p4d -jj' the perforce server will hang the responses to clients,
      # this should be for a very short period of time even for large data
      # sets, as the journal represents a single day of metadata.
      # Curly braces capture output of 'time'.
      "$P4CBIN"/p4login -p "$P4MASTERPORT"
      { time "$P4BIN" -p "$P4MASTERPORT" admin journal; } >> "$LOGFILE" 2>&1 || { die "Journal rotation failed. Abort!"; }
      # The test below waits until the journal file exists in the checkpoints directory before proceeding.
      test=1
      while [[ $test != 0 ]]; do
         sleep 5
         if [[ -f "$JournalFile" ]]; then
            test=0
         fi
      done
      "$P4CBIN"/p4login -service
   else
      log "Warning: truncate_journal_on_master () function should not be called on a master server. Ignoring."
   fi
}

#------------------------------------------------------------------------------
# Similar to truncate_journal() above, p4d_truncate_journal() is intended to be
# usable form the p4d_base init script, to allow journal rotation on p4d
# start.  As it may be called from the init script, it may be called on the
# master, a replica, or the edge. However, it should will only do the journal
# rotation if called on the master.
#------------------------------------------------------------------------------
p4d_truncate_journal () {
   declare JournalFile="${CHECKPOINTS}/${P4SERVER}.jnl.${JOURNALNUM}"

   if [[ "$SERVERID" == "$P4MASTER_ID" ]]; then
      [[ -f "$JournalFile" ]] && \
         die "Journal $JournalFile already exists, check the backup process."
      log "Rotating journal prior to starting p4d."
      "$P4DBIN" -r "$P4ROOT" -J "$P4JOURNAL" -jj >> "$LOGFILE" 2>&1 ||\
         die "Failed to rotate journal. Aborting p4d server start."
   else
      log "Warning: The p4d_truncate_journal() function has no effect if called on a server other than the master. Ignoring."
   fi
}

#------------------------------------------------------------------------------
# Replay any and all numbered journal files into the offline databases.
#------------------------------------------------------------------------------
replay_journals_to_offline_db () {
   local CheckpointsDir=
   local FilePrefix=
   local NumberedJournal=
   local JournalPrefix=

   log "Replay any unreplayed journals to the offline database."

   # For the master server, we can safely rely on the SDP standard that the journalPrefix
   # is of the form '/p4/N/checkpoints/p4_N'. For replicas and edge servers, determine
   # the values dynamically based on the current journalPrefix value for the given ServerID.
   if [[ "$SERVERID" == "$P4MASTER_ID" ]]; then
      # Refer to ckp/jnl files starting like: /p4/N/checkpoints/p4_N
      CheckpointsDir="${CHECKPOINTS}"
      FilePrefix="${P4SERVER}"
   else
      JournalPrefix="$("$P4DBIN" -r "$P4ROOT" -k db.config -jd - | grep "@${SERVERID}@ @journalPrefix@" | cut -d '@' -f 10)"
      [[ -n "$JournalPrefix" ]] || die "Could not determine journalPrefix for ServerID $SERVERID."
      CheckpointsDir="${JournalPrefix%/*}"
      FilePrefix="${JournalPrefix##*/}"
   fi

   for (( j=OFFLINEJNLNUM; j <= JOURNALNUM; j++ )); do
      NumberedJournal="${CheckpointsDir}/${FilePrefix}.jnl.${j}"
      log "Replay journal $NumberedJournal to offline db."
      # Curly braces capture output of 'time'.
      { time "$P4DBIN" -r "$OFFLINE_DB" -jr -f "$NumberedJournal"; } >> "$LOGFILE" 2>&1 || { die "Offline journal replay failed. Abort!"; }
   done
}

#------------------------------------------------------------------------------
# Replay the live, active P4JOURNAL file into the offline databaes.
#------------------------------------------------------------------------------
replay_active_journal_to_offline_db () {
   log "Replay active journal to offline db."

   declare ActiveJournal=

   # On a standby server, the current/active journal is named /p4/N/logs/journal.<jnlNum>.
   # On the master and other server types, the active journal is $P4JOURNAL.
   if [[ "$STANDBYSERVER" -eq 1 ]]; then
      local _JNLNUM
      _JNLNUM=$("$P4DBIN" -r "$P4ROOT" -jd - db.counters 2>&1 | grep '@journal@' | cut -d "@" -f 8 2>> "$LOGFILE") || die "Cannot get $P4ROOT journal number. Abort!"
      ActiveJournal="$LOGS/journal.$_JNLNUM"
   else
      ActiveJournal="$P4JOURNAL"
   fi

   # Curly braces capture output of 'time'.
   { time "$P4DBIN" -r "$OFFLINE_DB" -jr -f "${ActiveJournal}"; } >> "$LOGFILE" 2>&1 || { die "Active Journal replay failed. Abort!"; }

}

#------------------------------------------------------------------------------
# Recreate offline databases from the latest checkpoint.
#------------------------------------------------------------------------------
recreate_offline_db_files () {
   local CheckpointsDir=
   local FilePrefix=
   local LastCheckpointMD5=
   local LastCheckpoint=

   # For the master server, we can safely rely on the SDP standard that the journalPrefix
   # is of the form '/p4/N/checkpoints/p4_N'. For replicas and edge servers, determine
   # the values dynamically based on the current journalPrefix value for the given ServerID.
   if [[ "$SERVERID" == "$P4MASTER_ID" ]]; then
      # Refer to ckp/jnl files starting like: /p4/N/checkpoints/p4_N
      CheckpointsDir="${CHECKPOINTS}"
      FilePrefix="${P4SERVER}"
   else
      JournalPrefix="$("$P4DBIN" -r "$P4ROOT" -k db.config -jd - | grep "@${SERVERID}@ @journalPrefix@" | cut -d '@' -f 10)"
      [[ -n "$JournalPrefix" ]] || die "Could not determine journalPrefix for ServerID $SERVERID."
      CheckpointsDir="${JournalPrefix%/*}"
      FilePrefix="${JournalPrefix##*/}"
   fi

   if [[ -z "$(find "${CheckpointsDir}/" -maxdepth 1 -type f -name "${FilePrefix}.ckp.*.gz" -printf 1 -quit)" ]]; then
      ckp_complete

      if [[ "$SERVERID" == "$P4MASTER_ID" ]]; then
         die "No checkpoints found in $CheckpointsDir with prefix $FilePrefix.  Consider running 'live_checkpoint.sh $SDP_INSTANCE'."
      else
         die "No checkpoints found in $CheckpointsDir with prefix $FilePrefix."
      fi
   fi

   # shellcheck disable=SC2012
   LastCheckpointMD5=$(ls -t "${CheckpointsDir}/${FilePrefix}".ckp.*.md5 | head -1)
   [[ -n "$LastCheckpointMD5" ]] || \
      die "Could not find *.md5 file for latest checkpoint. Abort!"

   # shellcheck disable=SC2129
   rm -f "${OFFLINE_DB}"/offline_db_usable.txt >> "$LOGFILE" 2>&1
   rm -f "${OFFLINE_DB}"/db.* >> "$LOGFILE" 2>&1
   rm -f "${OFFLINE_DB}"/save/db.* >> "$LOGFILE" 2>&1

   # Account for the idiosyncracy that MD5 files for checkpoints may look
   # like p4_N.ckp.gz.md5 or p4_N.ckp.md5.
   if [[ "$LastCheckpointMD5" == *".gz.md5" ]]; then
      LastCheckpoint="${LastCheckpointMD5%.md5}"
   else
      LastCheckpoint="${LastCheckpointMD5%.md5}.gz"
   fi

   [[ -r "$LastCheckpoint" ]] || \
      die "Missing last checkpoint file: $LastCheckpoint. Abort!"

   log "Recovering from last full checkpoint, $LastCheckpoint."
   # Curly braces capture output of 'time'.
   { time "$P4DBIN" -r "$OFFLINE_DB" -jr -z "${LastCheckpoint}"; } >> "$LOGFILE" 2>&1 || { die "Restore of checkpoint to $OFFLINE_DB failed!"; }
   echo "Offline db file restored successfully." > "${OFFLINE_DB}/offline_db_usable.txt"
}

#------------------------------------------------------------------------------
# Take a live checkpoint from db.* files in P4ROOT.
#------------------------------------------------------------------------------
checkpoint () {
   local CheckpointsDir=
   local FilePrefix=

   log "Create a new checkpoint from live db files in $P4ROOT."

   if [[ "$SERVERID" == "$P4MASTER_ID" ]]; then
      # Refer to ckp/jnl files starting like: /p4/N/checkpoints/p4_N
      CheckpointsDir="${CHECKPOINTS}"
      FilePrefix="${P4SERVER}"
   else
      die "Live checkpoints can only be run on the master server."
   fi

   # Curly braces capture output of 'time'.
   { time "$P4DBIN" -r "$P4ROOT" -jc -Z "${CheckpointsDir}/${FilePrefix}"; } >> "$LOGFILE" 2>&1 || { die "ERROR - New live checkpoint failed!"; }
}

#------------------------------------------------------------------------------A
# Take a checkpoint from the ROOTDIR, typically either /p4/N/root or
# /p4/N/offline_db.
#------------------------------------------------------------------------------
dump_checkpoint () {
   declare CheckpointsDir=
   declare NewCheckpoint=
   declare NewCheckpointMD5=
   declare FilePrefix=
   declare JournalPrefix=
   declare -i DoSnapshot=0
   declare -i SnapshotOK=1
   declare -i CheckpointOK=1

   # shellcheck disable=SC2153
   log "Dump out new checkpoint from db files in $ROOTDIR."

   # For the master server, we can safely rely on the SDP standard that the journalPrefix
   # is of the form '/p4/N/checkpoints/p4_N'. For replicas and edge servers, determine
   # the values dynamically based on the current journalPrefix value for the given ServerID.
   if [[ "$SERVERID" == "$P4MASTER_ID" ]]; then
      # Refer to ckp/jnl files starting like: /p4/N/checkpoints/p4_N
      CheckpointsDir="${CHECKPOINTS}"
      FilePrefix="${P4SERVER}"
   else
      JournalPrefix="$("$P4DBIN" -r "$P4ROOT" -k db.config -jd - | grep "@${SERVERID}@ @journalPrefix@" | cut -d '@' -f 10)"
      [[ -n "$JournalPrefix" ]] || die "Could not determine journalPrefix for ServerID $SERVERID."
      CheckpointsDir="${JournalPrefix%/*}"
      FilePrefix="${JournalPrefix##*/}"
   fi

   NewCheckpoint="${CheckpointsDir}/${FilePrefix}.ckp.${CHECKPOINTNUM}.gz"
   NewCheckpointMD5="${NewCheckpoint}.md5"

   if [[ -r "$NewCheckpoint" && -r "$NewCheckpointMD5" ]]; then
      log "Warning: Skipping generation of existing checkpoint $NewCheckpoint.\nVerified MD5 file exists: $NewCheckpointMD5."
      return
   fi

   # Curly braces capture output of 'time'.
   if { time "$P4DBIN" -r "$ROOTDIR" -jd -z "${CheckpointsDir}/${FilePrefix}.ckp.${CHECKPOINTNUM}.gz"; } >> "$LOGFILE" 2>&1; then
      CheckpointOK=1
   else
      CheckpointOK=0
   fi

   if [[ -n "${SNAPSHOT_SCRIPT:-}" ]]; then
      DoSnapshot=1
      log "Calling site-specific snapshot script: $SNAPSHOT_SCRIPT"
      if "$SNAPSHOT_SCRIPT" >> "$LOGFILE" 2>&1; then
         SnapshotOK=1
      else
         SnapshotOK=0
      fi
   fi

   if [[ "$DoSnapshot" -eq 0 ]]; then
      if [[ "$CheckpointOK" -eq 1 ]]; then
         log "New checkpoint dump succeeded."
      else
         die "New checkpoint dump FAILED."
      fi
   else
      if [[ "$CheckpointOK" -eq 0 && "$SnapshotOK" -eq 0 ]]; then
         die "Both checkpoint dump and snapshot FAILED."
      elif [[ "$CheckpointOK" -eq 1 && "$SnapshotOK" -eq 0 ]]; then
         die "New checkpoint dump succeeded, but snapshot FAILED."
      elif [[ "$CheckpointOK" -eq 0 && "$SnapshotOK" -eq 1 ]]; then
         die "New checkpoint dump FAILED, but snapshot succeeded."
      else
         log "New checkpoint dump and snapshot succeeded."
      fi
   fi
}

#------------------------------------------------------------------------------
# Compare journal numbers between live and offline databases, to ensure
# they can be safely swapped out.
#------------------------------------------------------------------------------
compare_journal_numbers () {
   # Get the journal number of the offline database
   if [[ ! -f "$OFFLINE_DB/offline_db_usable.txt" ]]; then
      die "Offline database not in a usable state. Check the backup process."
   fi

   if [[ ! -f "$OFFLINE_DB/db.counters" ]]; then
      die "Offline database not found. Consider creating it with live_checkpoint.sh. Be aware that it locks the live system and can take a long time! Abort!"
   fi

   local _OFFLINEJNLNUM
   _OFFLINEJNLNUM=$("$P4DBIN" -r "$OFFLINE_DB" -jd - db.counters 2>&1 | grep '@journal@' | cut -d "@" -f 8 2>> "$LOGFILE") || die "Cannot get $OFFLINE_DB journal number. Abort!"
   check_journalnum "$_OFFLINEJNLNUM"

   # Get the journal number of the root database
   if [[ ! -f "$P4ROOT/db.counters" ]]; then
      die "$P4ROOT database not found. Something is seriously wrong since the server was just running a minute ago! Contact support@perforce.com"
   fi
   local _JNLNUM
   _JNLNUM=$("$P4DBIN" -r "$P4ROOT" -jd - db.counters 2>&1 | grep '@journal@' | cut -d "@" -f 8 2>> "$LOGFILE") || die "Cannot get $P4ROOT journal number. Abort!"
   check_journalnum "$_JNLNUM"

   if [[ "$_JNLNUM" -gt "$_OFFLINEJNLNUM" ]]; then
      log "$P4ROOT journal number is: $_JNLNUM"
      log "$OFFLINE_DB journal number is: $_OFFLINEJNLNUM"
      die "$OFFLINE_DB journal number is less than $P4ROOT, cannot switch."
   fi
}

#------------------------------------------------------------------------------
# Swap out live db.* database files in P4ROOT with those in offline_db.
#------------------------------------------------------------------------------
switch_db_files () {
   # Compare the Offline and Master journal numbers before switching to make
   # sure they match.
   compare_journal_numbers

   log "Calling 'verify_sdp.sh -L off' before swapping db.* files."
   "$P4CBIN"/verify_sdp.sh -L off >> "$LOGFILE" 2>&1 ||\
      die "Error: Cannot confirm all with $P4CBIN/verify_sdp.sh. Aborting"

   log "Switching root and offline_db links."
   [[ -d "${P4ROOT}"/save ]] || mkdir -p "${P4ROOT}"/save

   # shellcheck disable=SC2129
   rm -f "${P4ROOT}"/save/db.* >> "$LOGFILE" 2>&1
   rm -rf "${P4ROOT}"/server.locks >> "$LOGFILE" 2>&1
   mv "${P4ROOT}"/db.* "${P4ROOT}"/save/. >> "$LOGFILE" 2>&1

   if [[ -r "$P4ROOT"/license ]]; then
      mv "${P4ROOT}"/license "${OFFLINE_DB}/." >> "$LOGFILE" 2>&1
   fi

   if [[ -n "$(ls "$P4ROOT"/license* 2>/dev/null)" ]]; then
      mv "${P4ROOT}"/license* "${OFFLINE_DB}/." >> "$LOGFILE" 2>&1
   fi

   if [[ -r "${P4ROOT}"/rdb.lbr ]]; then
      mv "${P4ROOT}"/rdb.lbr "${OFFLINE_DB}/." >> "$LOGFILE" 2>&1
   fi

   if [[ -n "$(ls "$P4ROOT"/state* 2>/dev/null)" ]]; then
      mv "${P4ROOT}"/state* "${OFFLINE_DB}/." >> "$LOGFILE" 2>&1
   fi

   if [[ -r "${P4ROOT}"/server.id ]]; then
      mv "${P4ROOT}"/server.id "${OFFLINE_DB}/." >> "$LOGFILE" 2>&1
   fi

   if [[ -n "$(ls "$P4ROOT"/server.id* 2>/dev/null)" ]]; then
      mv "${P4ROOT}"/server.id* "${OFFLINE_DB}/." >> "$LOGFILE" 2>&1
   fi

   rm -f "${OFFLINE_DB}/offline_db_usable.txt"
   LinkOfflineDB="$(readlink "$OFFLINE_DB")"
   LinkP4ROOT="$(readlink "$P4ROOT")"
   unlink "$OFFLINE_DB"
   unlink "$P4ROOT"

   ln -s "$LinkOfflineDB" "$P4ROOT" >> "$LOGFILE" 2>&1 ||\
      die "Link of $LinkOfflineDB to $P4ROOT failed."

   ln -s "$LinkP4ROOT" "$OFFLINE_DB" >> "$LOGFILE" 2>&1 ||\
      die "Link of $LinkP4ROOT to $OFFLINE_DB failed."
}

#------------------------------------------------------------------------------
# Rotate specified log files, and compress with gzip.
#------------------------------------------------------------------------------
rotate_log_file () {
   cd "$LOGS" || die "Could not cd to: $LOGS"
   ROTATE_LOGNAME=$1
   GZ_EXT=${2:-}
   LOGID=$(date +'%Y-%m-%d_%H-%M-%S')

   if [[ -f "${ROTATE_LOGNAME}" ]]; then
      if [[ -n "${LOGFILE:-}" ]]; then
         mv -f "${ROTATE_LOGNAME}" "${ROTATE_LOGNAME}.${LOGID}" >> "$LOGFILE" 2>&1
         [[ -n "$GZ_EXT" ]] && gzip "${ROTATE_LOGNAME}.${LOGID}" >> "$LOGFILE" 2>&1
      else
         mv -f "${ROTATE_LOGNAME}" "${ROTATE_LOGNAME}.${LOGID}"
         [[ -n "$GZ_EXT" ]] && gzip "${ROTATE_LOGNAME}.${LOGID}"
      fi
   fi
   cd - > /dev/null || die "Could not cd to: $OLDPWD"
}

#------------------------------------------------------------------------------
# At the start of each run for live_checkpoint.sh, daily_checkpoint.sh, and
# recreate_db_checkpoint.sh, before *any* logging activity occurs, rotate the
# logs from the most recent prior run, always named "checkpoint.log" or "log".
#------------------------------------------------------------------------------
rotate_last_run_logs () {
   # Rotate prior log file for the current script.
   rotate_log_file "$LOGFILE"

   # Rotate prior server log.
   rotate_log_file "log" ".gz"

   # Rotate prior broker log.
   rotate_log_file "p4broker.log" ".gz"

   # Rotate prior audit log.
   rotate_log_file "audit.log" ".gz"
}

#------------------------------------------------------------------------------
# Remove log files matching a specified name prefix, preserving a specified
# number of the recent logs.
#------------------------------------------------------------------------------
remove_log_files () {
   REMOVE_LOGNAME=$1
   KEEPNUM=$2

   # shellcheck disable=SC2012
   for I_LOGFILE in $(ls -t "${REMOVE_LOGNAME:?}"* 2>/dev/null | $AWK "NR > $KEEPNUM"); do
      log "rm -f $I_LOGFILE"
      rm -f "$I_LOGFILE"
   done
}

#------------------------------------------------------------------------------
# Remove old logs.
#------------------------------------------------------------------------------
remove_old_logs () {
   # Remove old Checkpoint Logs
   # Use KEEPJNLS rather than KEEPLOGS, so we keep the same number
   # of checkpoint logs as we keep checkpoints.
   cd "$LOGS" || die "Could not cd to: $LOGS"

   if [[ "$KEEPJNLS" -eq 0 ]]; then
      log "Skipping cleanup of old checkpoint logs because KEEPJNLS is set to 0."
   else
      log "Deleting old checkpoint logs.  Keeping latest $KEEPJNLS, per KEEPJNLS setting in p4_vars."
      remove_log_files "checkpoint.log" "$KEEPJNLS"
   fi

   if [[ "$KEEPLOGS" -eq 0 ]]; then
      log "Skipping cleanup of old server logs because KEEPLOGS is set to 0."
   else
      log "Deleting old server logs.  Keeping latest $KEEPLOGS, per KEEPLOGS setting in p4_vars."
      remove_log_files "log" "$KEEPLOGS"
      remove_log_files "p4broker.log" "$KEEPLOGS"
      remove_log_files "audit.log" "$KEEPLOGS"
      remove_log_files "sync_replica.log" "$KEEPLOGS"
      remove_log_files "recreate_offline_db.log" "$KEEPLOGS"
      remove_log_files "upgrade.log" "$KEEPLOGS"
      remove_log_files "p4login" "$KEEPLOGS"
      remove_log_files "p4verify.log" "$KEEPLOGS"
      remove_log_files "journal_watch.log" "$KEEPLOGS"
      remove_log_files "purge_revisions.log" "$KEEPLOGS"
   fi
   cd - > /dev/null || die "Could not cd to: $OLDPWD"
}

#------------------------------------------------------------------------------
# Set the SDP Checkpoint counter to indicate last successful SDP checkpoint
# operation. For standby servers, set the SDP Checkpoint counter on the master.
#------------------------------------------------------------------------------
set_counter() {
   "$P4CBIN/p4login"

   if [[ "$EDGESERVER" -eq 1 || "$STANDBYSERVER" -eq 1 ]]; then
      "$P4BIN" -u "$P4USER" -p "$P4MASTERPORT" counter "LastSDPCheckpoint.$SERVERID" "$(date +'%s (%Y/%m/%d %H:%M:%S %z %Z)')" > /dev/null
   else
      "$P4BIN" -u "$P4USER" -p "$P4PORT" counter "LastSDPCheckpoint.$SERVERID" "$(date +'%s (%Y/%m/%d %H:%M:%S %z %Z)')" > /dev/null
   fi
}

#------------------------------------------------------------------------------
# This is the function that is called to run the individual checkpoint
# dump or restores during a parallel run.
#------------------------------------------------------------------------------
function parallel_checkpoint_cmd ()
{
  echo "=== Running $* on $(date)." >> "$cmd_log"
  "$@" >> "$cmd_log" 2>&1
  status=$?
  if [[ "$status" -ne 0 ]]; then
    CkpFailed=1
  fi
  echo "=== $* completed on $(date)." >> "$cmd_log"
}

#------------------------------------------------------------------------------
# This function checks for running processes as part of the parallel dump and restore
#------------------------------------------------------------------------------
function check_running ()
{
   sleep 30
   #loop thread process id's and see if any have finished.
   spot=0
   run=()
   for p in "${ids[@]}"; do
      if [[ -n "$p" ]]; then
        # shellcheck disable=SC2009
        running=$(ps cax | grep "$p")
      fi
      if [[ -n "$running" ]]; then
         run[$spot]=$p
         spot=$((spot+1))
      else
         thread=$((thread-1))
      fi
   done

   if [[ "$spot" -ne 0 ]]; then
      ids=("${run[@]}")
   else
      ids=()
   fi
}

#------------------------------------------------------------------------------
# Dump db files in parallel from offline_db
#------------------------------------------------------------------------------
dump_parallel_ckp ()
{
   db_files=() # Clear array
   thread=0  # Set current threads to 0

   cd "$OFFLINE_DB" || die "Could not cd to: $OFFLINE_DB"

   [[ -d "${CKPTMPDIR}" ]] || mkdir "${CKPTMPDIR}"
   rm -f "${CKPTMPDIR:?}"/*

   # Build array of db_files in offline_db
   for f in db.*; do
      db_files+=( "$f" ) # Append db file to the array
   done

   # loop db_files running the number of them in parallel that is specified by the command line
   for f in "${db_files[@]}"; do
      # Loop to see if we are over our thread count.  If so wait until we drop below it again
      # shellcheck disable=SC2154
      while [[ $thread -ge "$Threads" ]]; do
         check_running
      done

      CkpCmd="${P4DBIN} -r ${OFFLINE_DB} -jd ${CKPTMPDIR}/$P4SERVER.ckp.${f} $f"
      echo "$CkpCmd" > greppattern.txt
      cmd_log="${LOG}-${f}.log"
      parallel_checkpoint_cmd "${P4DBIN}" -r "${OFFLINE_DB}" -jd "${CKPTMPDIR}/$P4SERVER.ckp.${f}" "$f" &
      sleep 1
      # shellcheck disable=SC2009
      pid=$(ps -ef | grep -F -f greppattern.txt | awk '{print $1;}')
      if [[ -n "$pid" ]]; then
         ids[$thread]=$pid # add the process ID into the array of running processes
         thread=$((thread+1))  #  add one to the thread count and start a new verify
      fi
   done

   rm greppattern.txt

   # now that we have started all of them wait until all of our processes have finished before continuing.
   while [[ $thread -gt 0 ]]; do
      check_running
   done

   cd "$CKPTMPDIR" || die "Could not cd to: $CKPTMPDIR"
   rm -f ./*.md5

   # now that the processes have finished combine all of the log file together
   for f in "${db_files[@]}"; do
      if [[ -f "${LOG}-${f}.log" ]]; then
         cat "${LOG}-${f}.log" >> "$LOGFILE"
         rm -f "${LOG}-${f}.log"
      fi
   done

   if [[ "$CkpFailed" -ne 0 ]]; then
      # shellcheck disable=SC2034
      StatusMessage="Error: Checkpoint failed.  Review the log [$LOGFILE]."
      ExitCode=1
   fi

   if [[ "$ExitCode" -ne 0 ]]; then
      die "New checkpoint dump failed!"
   fi

   msg "Completed parallel checkpoint at $(date)."
}

#------------------------------------------------------------------------------
# Restore from db files that have been extracted from a parallel checkpoint tgz file.
#------------------------------------------------------------------------------
restore_parallel_ckp ()
{
   db_files=() # Clear array
   thread=0  # Set current threads to 0

   [[ -d "${CKPTMPDIR}" ]] || die "$CKPTMPDIR doesn't exist! Restore failed."
   cd "$CKPTMPDIR" || die "Could not cd to: $CKPTMPDIR"

   rm -f "${OFFLINE_DB}"/offline_db_usable.txt >> "$LOGFILE" 2>&1
   rm -f "${OFFLINE_DB}"/db.* >> "$LOGFILE" 2>&1 

   # Build array of db_files in checkpoint temp dir
   for f in *; do
      db_files+=( "$f" ) # Append db file to the array
   done

   # loop db_files running the number of them in parallel that is specified by the command line
   for f in "${db_files[@]}"; do
      # Loop to see if we are over our thread count.  If so wait until we drop below it again
      while [[ $thread -ge "$Threads" ]]; do
         check_running
      done

      CkpCmd="${P4DBIN} -r ${OFFLINE_DB} -jr ${CKPTMPDIR}/${f}"
      echo "$CkpCmd" > greppattern.txt
      cmd_log="${LOG}-${f}.log"
      parallel_checkpoint_cmd "${P4DBIN}" -r "${OFFLINE_DB}" -jr "${CKPTMPDIR}/${f}" &
      sleep 1
      # shellcheck disable=SC2009
      pid=$(ps -ef | grep -F -f greppattern.txt | awk '{print $1;}')
      if [[ -n "$pid" ]]; then
         ids[$thread]=$pid # add the process ID into the array of running processes
         thread=$((thread+1))  #  add one to the thread count and start a new verify
      fi
   done

   rm greppattern.txt

   # now that we have started all of them wait until all of our processes have finished before continuing.
   while [[ $thread -gt 0 ]]; do
      check_running
   done

   # now that the processes have finished combine all of the log file together
   for f in "${db_files[@]}"; do
      if [[ -f "${LOG}-${f}.log" ]]; then
         cat "${LOG}-${f}.log" >> "$LOGFILE"
         rm -f "${LOG}-${f}.log"
      fi
   done

   if [[ "$CkpFailed" -ne 0 ]]; then
      # shellcheck disable=SC2034
      StatusMessage="Error: Checkpoint Restore failed.  Review the log [$LOGFILE]."
      ExitCode=1
   fi

   if [[ "$ExitCode" -ne 0 ]]; then
      die "Restore of checkpoint dump failed!"
   fi

   echo "Offline db file restored successfully." > "${OFFLINE_DB}/offline_db_usable.txt"
   msg "Completed parallel checkpoint restore at $(date)."
}

#------------------------------------------------------------------------------
# Create a tgz of the temporary checkpoint folder that contains individually dumped database checkpoints.
#------------------------------------------------------------------------------
create_tar_ckp ()
{
   cd "$CHECKPOINTS" || die "Could not cd to: $CHECKPOINTS"
   Ckptgz=${P4SERVER}.ckp.parallel.${CHECKPOINTNUM}.tgz
   [[ -f "$Ckptgz" ]] && die "$Ckptgz file already exists. Check the backup process!"
   { time tar cvzf "$Ckptgz" "${CKPTMPDIR}"; } >> "$LOGFILE" 2>&1 || { die "Failed to create tgz checkpoint file!"; }
   rm -rf "${CKPTMPDIR:?}"/*
}

#------------------------------------------------------------------------------
# Extract a tgz of the temporary checkpoint folder that contains individually dumped database checkpoints.
#------------------------------------------------------------------------------
extract_tar_ckp ()
{
   cd "$CHECKPOINTS" || die "Could not cd to: $CHECKPOINTS"
   Ckptgz=$1
   [[ -f $Ckptgz ]] || die "$Ckptgz doesn't exist!"
   { time tar xvzf "$Ckptgz"; } >> "$LOGFILE" 2>&1 || { die "Failed to extract $Ckptgz checkpoint file!"; }
}

#	Change	User	Description
#76	30388	C. Thomas Tyler	Released SDP 2024.1.30385 (2024/06/11). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#75	30297	C. Thomas Tyler	Released SDP 2023.2.30295 (2024/05/08). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#74	30043	C. Thomas Tyler	Released SDP 2023.2.30041 (2023/12/22). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#73	29954	C. Thomas Tyler	Released SDP 2023.1.29949 (2023/12/01). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#72	29891	C. Thomas Tyler	Released SDP 2023.1.29699 (2023/07/11). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#71	29612	C. Thomas Tyler	Released SDP 2023.1.29610 (2023/05/25). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#70	29443	C. Thomas Tyler	Released SDP 2022.2.29441 (2023/02/27). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#69	29401	C. Thomas Tyler	Released SDP 2022.2.29399 (2023/02/06). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#68	29252	C. Thomas Tyler	Released SDP 2022.2.29250 (2022/12/08). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#67	29143	C. Thomas Tyler	Released SDP 2022.1.29141 (2022/10/29). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#66	28858	C. Thomas Tyler	Released SDP 2022.1.28855 (2022/05/27). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#65	28651	C. Thomas Tyler	Released SDP 2021.2.28649 (2022/03/03). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#64	28412	C. Thomas Tyler	Released SDP 2021.2.28410 (2021/11/24). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#63	28240	C. Thomas Tyler	Released SDP 2021.1.28238 (2021/11/12). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#62	27761	C. Thomas Tyler	Released SDP 2020.1.27759 (2021/05/07). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#61	27463	C. Thomas Tyler	Released SDP 2020.1.27457 (2021/02/17). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#60	27331	C. Thomas Tyler	Released SDP 2020.1.27325 (2021/01/29). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#59	26496	C. Thomas Tyler	Released SDP 2019.3.26494 (2020/04/23). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#58	26480	C. Thomas Tyler	Released SDP 2019.3.26478 (2020/04/12). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#57	26475	C. Thomas Tyler	Released SDP 2019.3.26472 (2020/04/10). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#56	26470	C. Thomas Tyler	Released SDP 2019.3.26468 (2020/04/10). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#55	26411	C. Thomas Tyler	Released SDP 2019.3.26407 (2020/03/28). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#54	26403	C. Thomas Tyler	Released SDP 2019.3.26400 (2020/03/28). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#53	26246	C. Thomas Tyler	Released SDP 2019.3.26239 (2020/01/08). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#52	26161	C. Thomas Tyler	Released SDP 2019.3.26159 (2019/11/06). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#51	25596	C. Thomas Tyler	Released SDP 2019.2.25594 (2019/05/02). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#50	25380	C. Thomas Tyler	Released SDP 2019.1.25374 (2019/03/21). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#49	25279	C. Thomas Tyler	Released SDP 2019.1.25276 (2019/03/06). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#48	25245	C. Thomas Tyler	Released SDP 2019.1.25238 (2019/03/02). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#47	23331	C. Thomas Tyler	Released SDP 2017.4.23329 (2017/12/05). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#46	23044	C. Thomas Tyler	Released SDP 2017.3.23041 (2017/10/24). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#45	23006	C. Thomas Tyler	Released SDP 2017.3.23003 (2017/10/19). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#44	22685	Russell C. Jackson (Rusty)	Update main with current changes from dev.
#43	22185	C. Thomas Tyler	Released SDP 2017.2.22177 (2017/05/17). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#42	21723	C. Thomas Tyler	Released SDP 2017.1.21720 (2017/02/17). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#41	21338	C. Thomas Tyler	Released SDP 2016.2.21328 (2016/12/16). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#40	21193	Russell C. Jackson (Rusty)	Update main from dev.
#39	20974	C. Thomas Tyler	Released SDP 2016.2.20972 (2016/11/01). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#38	20858	C. Thomas Tyler	Released SDP 2016.2.20856 (2016/10/04). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#37	20767	C. Thomas Tyler	Released SDP 2016.2.20755 (2016/09/29). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#36	20353	C. Thomas Tyler	Released SDP 2016.1.20348. Copy Up using 'p4 copy -r -b perforce_software-sdp-dev', with selective removal of changes related to work-in-progress changes.
#35	19898	C. Thomas Tyler	Released SDP/MultiArch/2016.1/19888 (2016/07/07). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#34	19835	C. Thomas Tyler	Released Rev. SDP/MultiArch/2016.1/19768 (2016/06/24). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#33	19694	C. Thomas Tyler	Released SDP/MultiArch/2016.1/19661 (2016/06/08). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#32	19414	C. Thomas Tyler	Released SDP/MultiArch/2016.1/19410 (2016/05/17).
#31	18961	C. Thomas Tyler	Released: SDP/MultiArch/2016.1/18958 (2016/04/08).
#30	18619	Russell C. Jackson (Rusty)	Updating main with current changes.
#29	18530	Russell C. Jackson (Rusty)	Update main from dev.
#28	16155	Russell C. Jackson (Rusty)	Removed check code that probably hasn't ever worked. Deleted mirror_ldap* since that functionality is built into the server now.
#27	15856	C. Thomas Tyler	Replaced the big license comment block with a shortened form referencing the LICENSE file included with the SDP package, and also by the URL for the license file in The Workshop.
#26	15784	Russell C. Jackson (Rusty)	Added missing bracket.
#25	15780	C. Thomas Tyler	Per Robert: Style police causing problems :) Fixed!
#24	15777	C. Thomas Tyler	No functional changes. Style Policing only on bash scripts only. Normalized indentation and line breaks, removed offending tabs, and general whitespace usage.
#23	15609	C. Thomas Tyler	Pushing SDP 2015.1.15607 (2015/09/02).
#22	15197	Russell C. Jackson (Rusty)	Corrected versions from testing.
#21	15193	Russell C. Jackson (Rusty)	Added semaphore file to indicate state of the offline database and added check into the backup process to fail if the state of the offline db is not good.
#20	15190	Russell C. Jackson (Rusty)	Added a semaphore file to prevent the checkpoint process from running if another one hasn't finished. Added a check to make sure the journal number is numeric.
#19	13928	dsp	Set lastSDPCounter after a successfull SDP checkpoint p4 admin checkpoint sets lastCheckpointAction, which is useful for monitoring, in particular when checkpoint age should be observed from the outside through p4. However the SDP is using p4d directly to create checkpoints and will not set checkpoints. In order to distinguish human actions from the SDP cronjobs, set a new counter lastSDPCounter in a similar format.
#18	13908	C. Thomas Tyler	Pushing SDP 2015.1.13906.
#17	12171	Russell C. Jackson (Rusty)	Merge in changes to remove the need for p4master_run.
#16	11950	Russell C. Jackson (Rusty)	Made die function record ERROR!!! $HOSTNAME and $P4SERVER in subject. Cleaned up message passed to die command and corrected a typo.
#15	11929	Russell C. Jackson (Rusty)	Updated die function to just pass parameter to mail_log_file instead of echo.
#14	11919	Russell C. Jackson (Rusty)	Added a SERVERID variable to p4_vars and updated backup_functions to use it. Changed the location and the names of the config files so that they could live in /p4/common/config (You're welcome Tom). The files names are: p4_$INSTANCE.vars p4_$INSTANCE.p4review.cfg p4_$INSTANCE.vars will now set P4REPLICA to FALSE if SERVERID matches MASTERNAME, otherwise it is TRUE. This change means that a user must change server.id now in order to change the role of the server rather than changing the instance vars file. This makes more sense to a user that is reading the admin guide about server.id rather than overwriting the file based on a setting that isn't in the admin guide. Change mkdirs to reflect all of the above changes.
#13	11908	adrian_waters	Use set -u to trap unbounded variables
#12	11886	Russell C. Jackson (Rusty)	Changed $prog to $0 so that we don't have to set prog in the calling functions.
#11	11766	Robert Cowham	Missed a reference to $MAIL in @11758 Tweaked run_tests.sh to output more error messages on failure. Though this still doesn't show output of individual failed commands.
#10	11758	Russell C. Jackson (Rusty)	Change MAIL variable to SDPMAIL to avoid conflicts with customer variables. Changed sdp_sync.sh to use get_mail_opts from backup_functions to avoid duplicate functions.
#9	11730	Russell C. Jackson (Rusty)	Moved P4SERVER variable to p4_vars so that all scripts can use it properly. replica_status.sh referenced it, but it wasn't working since it was only in backup_functions.sh
#8	11710	Russell C. Jackson (Rusty)	Changed die function to call new email function. Added su to OSUSER functionality to p4master_run to avoid problems with people running scripts manually as root by mistake.
#7	11707	Robert Cowham	Refactored sending of mail to a common function. Make the setting of "MAILFROM" work for Ubuntu (GNU Mailutils) as well as CentOS
#6	11570	Russell C. Jackson (Rusty)	Brought in changes from Mark Foundry to add -S $MAILFROM to mail commands. Changed sync_replica.sh and weekly_sync_replica.sh to use $LOGFILE for consistency. Added mail command to both files as well.
#5	11540	Russell C. Jackson (Rusty)	Converted to unix format.
#4	11534	Russell C. Jackson (Rusty)	Added -f to -jr to cover offline obliterates where the entries are already removed from the offline database.
#3	11524	Russell C. Jackson (Rusty)	Released updated version of the SDP from Dev.
#2	11130	Robert Cowham	Check for the existence of offline database and log error message if not found.
#1	10148	C. Thomas Tyler	Promoted the Perforce Server Deployment Package to The Workshop.