#!/bin/bash
#==============================================================================
# Copyright and license info is available in the LICENSE file included with
# the Server Deployment Package (SDP), and also available online:
# https://swarm.workshop.perforce.com/projects/perforce-software-sdp/view/main/LICENSE
#------------------------------------------------------------------------------
set -u

#==============================================================================
# Global Variables.
export P4DInitScript=
export P4DRef=
export P4DSystemdServiceFile=
export P4BrokerInitScript=
export P4BrokerRef=
export P4BrokerSystemdServiceFile=
export P4ProxyInitScript=
export P4ProxyRef=
export P4ProxySystemdServiceFile=
export OFFLINE_DB=
export EDGESERVER=
export STANDBYSERVER=
declare -i DoParallelCheckpoints=

#==============================================================================
# Common functions used in various SDP scripts.

#------------------------------------------------------------------------------
# Verify key variables in the shell environment exist, or else abort.
#
# If checks in this function fail, this function an 'echo' and 'exit 1'
# rather than calling 'log' or 'die', as this function is generally called
# early in processing, before the log is initialized.
#------------------------------------------------------------------------------
function check_vars () {

   local CheckVarsPreflightOK=1
   CommonVars="SDP_INSTANCE P4HOME P4PORT P4ROOT P4JOURNAL P4BIN P4DBIN P4TICKETS P4TRUST KEEPCKPS KEEPJNLS KEEPLOGS CHECKPOINTS JOURNALS LOGS OSUSER SERVERID"
   InstanceVars="P4MASTER_ID P4MASTERPORT"

   # First, check vars that should be set in /p4/common/bin/p4_vars.
   for var in $CommonVars; do
      # Detect unset variables, using ':-' to avoid 'unbound variable' errors.
      # shellcheck disable=SC1083
      if [[ -z "$(eval echo \${"$var":-})" ]]; then
         echo "Error: Required variable \$$var is NOT set. It should be set in /p4/common/bin/p4_vars."
         CheckVarsPreflightOK=0
      fi
   done

   # Next, check vars that should be set in /p4/common/config/p4_N.instance.
   # For some variables, provide additional details that help help users correct
   # the problem.
   for var in $InstanceVars; do
      # shellcheck disable=SC1083
      if [[ -z "$(eval echo \${"$var":-})" ]]; then
         echo "Error: Required variable \$$var is NOT set. It should be set in /p4/common/config/$SERVERID.vars, where N is the SDP instance name."

         if [[ "$var" == "P4MASTER_ID" ]]; then
            echo "The value for P4MASTER_ID should be the name of the ServerID of the master server."
         fi

         CheckVarsPreflightOK=0
      fi
   done

   if [[ "$CheckVarsPreflightOK" -eq 0 ]]; then
      echo "Use p4master_run or source p4_vars before calling this script."
      echo "Aborting to to errors in shell environment preflight checks."
      exit 1
   fi
   AVAIL=$(df --output=avail /p4/${SDP_INSTANCE}/offline_db | tail -n 1)

   # REQAVAIL is ~2GB in 1K blocks
   REQAVAIL=209715

   if [[ $AVAIL -lt $REQAVAIL ]]; then
      die "Available space is less than the required free space of: $REQAVAIL 1K blocks."
      exit 1
   fi
}

#------------------------------------------------------------------------------
#  is_edge ($ServerID, $RootDir)
#
# Determine if a given ServerID is an edge server or not, checking a given
# database root dir (e.g. $P4ROOT or $OFFLINE_DB).
#
# Input:
# $1 - ServerID (required)
# $2 - RootDir (optional, defaults to $P4ROOT)
#
# Output YES if an edge server, NO otherwise.
#
#------------------------------------------------------------------------------
function is_edge () {
   local ServerID=${1:-Unset}
   local RootDir=${2:-$P4ROOT}
   local ServicesData=
   local EdgeCheck=

   # Extract a slice of db.server referencing the given ServerID,
   # and then grab the field containing Services data.
   ServicesData=$("$P4DBIN" -r "$RootDir" -J off -L /dev/null -k db.server -jd - 2>&1 |\
      $GREP "@db.server@ @${ServerID}@" | "$CUT" -d '@' -f 13)

   # Do a bitwise operation to determine if the ServicesData value indicates
   # this is an edge server.
   if [[ -n "$ServicesData" ]]; then
      EdgeCheck=$((ServicesData & 4096))

      if [[ "$EdgeCheck" -gt 0 ]]; then
         echo YES
      else
         echo NO
      fi
   else
      echo NO
   fi
}

#------------------------------------------------------------------------------
# is_replica ($ServerID, $RootDir)
#
# Determine if a given ServerID is a replica server or not, checking a given
# database root dir (e.g. $P4ROOT or $OFFLINE_DB).
#
# Input:
# $1 - ServerID (required)
# $2 - RootDir (optional, defaults to $P4ROOT)
#
# Output YES if an standby server, NO otherwise.
#
#------------------------------------------------------------------------------
is_replica () {
   local ServerID="${1:-Unset}"
   local RootDir="${2:-$P4ROOT}"
   local ServicesData=

   # Extract a slice of db.server referencing the given ServerID,
   # and then grab the field containing Services data.
   ServicesData=$("$P4DBIN" -r "$RootDir" -J off -L /dev/null -k db.server -jd - 2>&1 |\
      "$GREP" "@db.server@ @${ServerID}@" | "$CUT" -d '@' -f 13 | tr -d ' ')

   # Do a check to see if the ServicesData value indicates
   # this is an standby server.
   if [[ -n "$ServicesData" ]]; then
      if [[ "$ServicesData" -eq '2533' ]]; then
         echo YES
      else
         echo NO
      fi
   else
      echo NO
   fi
}

#------------------------------------------------------------------------------
# is_standby ($ServerID, $RootDir)
#
# Determine if a given ServerID is an standby server or not, checking a given
# database root dir (e.g. $P4ROOT or $OFFLINE_DB).
#
# Input:
# $1 - ServerID (required)
# $2 - RootDir (optional, defaults to $P4ROOT)
#
# Output YES if an standby server, NO otherwise.
#
#------------------------------------------------------------------------------
function is_standby () {
   local ServerID="${1:-Unset}"
   local RootDir="${2:-$P4ROOT}"
   local ServicesData=

   # Extract a slice of db.server referencing the given ServerID,
   # and then grab the field containing Services data.
   ServicesData=$("$P4DBIN" -r "$RootDir" -J off -L /dev/null -k db.server -jd - 2>&1 |\
      "$GREP" "@db.server@ @${ServerID}@" | "$CUT" -d '@' -f 13 | tr -d ' ')

   # Do a check to see if the ServicesData value indicates
   # this is an standby server.
   if [[ -n "$ServicesData" ]]; then
      if [[ "$ServicesData" -eq '35141' || "$ServicesData" -eq '35301' ]]; then
         echo YES
      else
         echo NO
      fi
   else
      echo NO
   fi
}

#------------------------------------------------------------------------------
# Set variables for use in various scripts:
# OFFLINE_DB=path to offline db directory
# EDGESERVER=1 if this is an edge server, 0 otherwise.
# STANDBYSERVER=1 if this is a standby server, 0 otherwise.
#
# This must be called after loading the standard shell environment by
# doing:
# source /p4/common/bin/p4_vars N
#
# This sets P4HOME, SERVERID, etc. needed by this function.
#------------------------------------------------------------------------------
function set_vars () {

   P4DInitScript="$P4HOME/bin/p4d_${SDP_INSTANCE}_init"
   P4DRef="${P4DInitScript%_init}"
   P4DSystemdServiceFile="/etc/systemd/system/p4d_${SDP_INSTANCE}.service"
   P4BrokerInitScript="$P4HOME/bin/p4broker_${SDP_INSTANCE}_init"
   P4BrokerRef="${P4BrokerInitScript%_init}"
   P4BrokerSystemdServiceFile="/etc/systemd/system/p4broker_${SDP_INSTANCE}.service"
   P4ProxyInitScript="$P4HOME/bin/p4p_${SDP_INSTANCE}_init"
   P4ProxyRef="${P4ProxyInitScript%_init}"
   P4ProxySystemdServiceFile="/etc/systemd/system/p4p_${SDP_INSTANCE}.service"

   # shellcheck disable=SC2072
   if [[ "$P4D_VERSION" > "2022.1" ]]; then
      DoParallelCheckpoints=1
      NumCheckPointThreads=16
   else
      DoParallelCheckpoints=0
   fi

   OFFLINE_DB="${P4HOME}/offline_db"

   # shellcheck disable=SC2153
   if [[ -n "$SERVERID" ]]; then
      if [[ "$(is_edge "$SERVERID")" == YES ]]; then
         export EDGESERVER=1
      else
         export EDGESERVER=0
      fi
   else
      export EDGESERVER=0
   fi

   if [[ -n "$SERVERID" ]]; then
      if [[ "$(is_replica "$SERVERID")" == YES ]]; then
         export REPLICASERVER=1
         # Get commit server from P4TARGET setting in database
      else
         export REPLICASERVER=0
      fi
   else
      export REPLICASERVER=0
   fi

   if [[ -n "$SERVERID" ]]; then
      if [[ "$(is_standby "$SERVERID")" == YES ]]; then
         export STANDBYSERVER=1
         # Get commit server from P4TARGET setting in database
      else
         export STANDBYSERVER=0
      fi
   else
      export STANDBYSERVER=0
   fi

   # Ensure that SDP_ADMIN_PASSWORD_FILE is set, using existing value if set (e.g.
   # in p4_vars), otherwise set it to the SDP standard value.
   export SDP_ADMIN_PASSWORD_FILE="${SDP_ADMIN_PASSWORD_FILE:-Unset}"
   if [[ "$SDP_ADMIN_PASSWORD_FILE" == Unset ]]; then
      export SDP_ADMIN_PASSWORD_FILE="$P4CCFG/.p4passwd.${P4SERVER}.admin"
   fi
}

#------------------------------------------------------------------------------
# Check if user is running as required OS user.
#------------------------------------------------------------------------------
function check_uid () {
   user=$(id -un)
   if [[ "${user}" != "${OSUSER}" ]]; then
      die "Must be run by user: ${OSUSER}. Abort!"
   fi
}

#------------------------------------------------------------------------------
# Function log() - echo message to logfile or stdout.
#
# If $LOGFILE is defined, write message to the log file only; nothing goes to
# stdout.  Prepend a datestamp.
# If $LOGFILE isn't defined, just echo to stdout, w/o timestamp or.
# In all cases, support '-e' formatting.
# Input:
# $1 - message to log (must be quoted).
#------------------------------------------------------------------------------
function log () {
   if [[ "${LOGFILE:-Unset}" != Unset ]]; then
      echo -n "$(date)" >> "$LOGFILE" 2>&1
      echo -e " $0: $*" >> "$LOGFILE" 2>&1
   else
      echo -e "$@"
   fi
}

#------------------------------------------------------------------------------
# Get old logfile timestamp.
function get_old_log_timestamp () {
   local log=${1:-}
   local oldLogTimestamp=
   [[ -n "$log" ]] || return

   if [[ "$(uname -s)" == "Darwin" ]]; then
      # shellcheck disable=SC2012
      oldLogTimestamp="$(stat -l -t '%Y%m%d-%H%M%S' "$log" 2>&1 | awk '{print $6}')"
   else
      # shellcheck disable=SC2012
      oldLogTimestamp="$(ls -l --time-style +'%Y%m%d-%H%M%S' "$log" 2>&1 | awk '{print $6}')"
   fi

   [[ -n "$oldLogTimestamp" ]] || oldLogTimestamp=$(date +'%Y%m%d-%H%M%S')

   echo "$oldLogTimestamp"
}

#------------------------------------------------------------------------------
# Decide depending on our mail utility, how to specify sender (if we need to).
# Mail on some platforms sets sender by default.
# If the mail utility returns what looks like a version identifier
# when given the '-V' flag, use a '-S' flag.  If it does not return a
# version identifier, don't set a mail sender option.
# Allow GNU Mailutils alternative flag instead.
#------------------------------------------------------------------------------
function get_mail_sender_opt () {
   local mail_sender_opt=
   local mail_ver=
   if [[ -n "$MAILFROM" ]]; then
      mail_ver=$($SDPMAIL -V 2>&1)
      # shellcheck disable=SC2076
      if [[ "$mail_ver" =~ "GNU Mailutils" ]]; then
         mail_sender_opt="-aFrom:$MAILFROM"
      elif  [[ "$mail_ver" =~ ^[0-9]+\.[0-9] ]]; then
         mail_sender_opt="-S from=$MAILFROM"
      fi
   fi
   echo "$mail_sender_opt"
}

#------------------------------------------------------------------------------
# Email the log file by $LOGFILE.
#------------------------------------------------------------------------------
function mail_log_file () {
   local subject=$1
   local mail_sender_opt
   mail_sender_opt=$(get_mail_sender_opt)
   $SDPMAIL -s "$subject" "$mail_sender_opt" "$MAILTO" < "$LOGFILE"
}

#------------------------------------------------------------------------------
# Deliver the $LOGFILE via AWS SNS.
#------------------------------------------------------------------------------
function sns_log_file () {
   # AWS SNS has a 100 character limit for subject field
   local subject="$1"
   local short_subject=
   # shellcheck disable=SC2116
   short_subject="$(echo "${subject:0:100}")"
   echo -e "Sending alert and log file contents to administrator via AWS SNS." >&2
   aws --region "$AWS_DEFAULT_REGION" sns publish --topic-arn "$SNS_ALERT_TOPIC_ARN" --subject "$short_subject" --message "$(cat "$LOGFILE")"
}

#------------------------------------------------------------------------------
# Function die() - log message, send email/SNS, and exit.
# If $LOGFILE is defined, write message to the log file, email/SNS log,
# and exit.
# If $LOGFILE is not defined, write message to the stdout, and skip
# email/SNS.
# If in terminal session, display message to stderr as well.
#------------------------------------------------------------------------------
function die () {
   # mail the error (with more helpful subject line than cron)
   log "ERROR!!! - $HOSTNAME $P4SERVER $0: $*"

   if [[ "${LOGFILE:-Unset}" != Unset ]]; then
       if [[ "${SNS_ALERT_TOPIC_ARN:-Unset}" != Unset ]]; then
           log "Using SNS for log file delivery..."
           sns_log_file "ERROR!!! - $HOSTNAME $P4SERVER $0: $*"
       else
           log "Using email for log file delivery..."
           mail_log_file "ERROR!!! - $HOSTNAME $P4SERVER $0: $*"
       fi
   fi
   
   # if running from terminal, also send to stderr
   if tty >/dev/null; then
      echo -e "$@" >&2
   fi
   
   rm -f "${LOGS}/ckp_running.txt"
   
   exit 1
}

#------------------------------------------------------------------------------
# Convert various byte values (K,M,G,%) to bytes
# Pass in values such as 1024K, 512M, 1G or 10%
#------------------------------------------------------------------------------
function convert_to_bytes () {
   local value=$1
   local totalsize=${2:-Undefined}
   local size=
   local unit=

   # Break up value into size (numeric) and unit (K,M,G)
   size=$("$GREP" -Eo '[[:alpha:]%]+|[0-9]+' <<< "$value" | head -1)
   unit=$("$GREP" -Eo '[[:alpha:]%]+|[0-9]+' <<< "$value" | tail -1)

   # Based on unit, convert to bytes
   case "$unit" in
      K)
         echo $((size * 1024))
         ;;
      M)
         echo $((size * 1024**2))
         ;;
      G)
         echo $((size * 1024**3))
         ;;
      %)
         echo $((totalsize * size / 100))
         ;;
   esac
}

#------------------------------------------------------------------------------
# Write a semaphore file, $LOGS/ckp_running.txt.  This file is written at
# the start of processing, and removed upon successful completion.  It
# prevents multiple concurrent operations from being launched accidentally
# e.g. by multiple human admins, or a human inadvertently competing with a
# cron job.
#
# It is also intended to get human admins to determine the root cause of
# checkpoint failures.
#------------------------------------------------------------------------------
function ckp_running() {
   if [[ -f "${LOGS}/ckp_running.txt" ]]; then
      die "Last checkpoint not complete. Check the backup process or contact support."
   fi
   echo "Checkpoint running." > "${LOGS}/ckp_running.txt"
}

#------------------------------------------------------------------------------
# Remove the ckp_running.txt semaphore file when checkpoint processing is
# complete.
#------------------------------------------------------------------------------
function ckp_complete() {
   rm -f "${LOGS}/ckp_running.txt"
}

#------------------------------------------------------------------------------
# Ensure key directories are writable. Abort if they are not.
#------------------------------------------------------------------------------
function check_dirs () {
   # Check that key dirs are writable
   local -i dirsOK=1
   dirList="$OFFLINE_DB $CHECKPOINTS $JOURNALS $LOGS"
   for dir in $dirList; do
      if [[ ! -d "$dir" || ! -w "$dir" ]]; then
         log "Error: Dir $dir does not exist or is not writable."
         dirsOK=0
      fi
   done
   [[ "$dirsOK" -eq 1 ]] || die "Some expected dirs are missing or not writable. Aborting."
}

#------------------------------------------------------------------------------
# Add the results of df -h or df -m to the log file.
#------------------------------------------------------------------------------
function check_disk_space () {
   log "Checking disk space..."
   $P4BIN diskspace >> "$LOGFILE" 2>&1
}

#------------------------------------------------------------------------------
# Check value of journal; ensure it is an integer.
#------------------------------------------------------------------------------
function check_journalnum () {
   local JNLNUM=${1:-Unset}
   re='^[0-9]+$'
   if ! [[ $JNLNUM =~ $re ]] ; then
      die "The journal counter value [$JNLNUM] is invalid. It must be numeric."
   fi
}

#------------------------------------------------------------------------------
# Check the checkpoints directory for the oldest checkpoint
#------------------------------------------------------------------------------
function get_ckpnum () {
   if [[ "$EDGESERVER" -eq 0 ]]; then
      # shellcheck disable=SC2034 disable=SC2012 disable=SC2016
      OLDESTCHECKPOINT=$(ls -1tr "${CHECKPOINTS}/" | "$GREP" ckp | "$GREP" -v md5 | head -n 1 | "$AWK" -F '.ckp.' '{ print $(2) }' | tr -d '.gz')
   else
      # shellcheck disable=SC2034 disable=SC2012 disable=SC2016
      OLDESTCHECKPOINT=$(ls -1tr "${CHECKPOINTS}.${SERVERID#p4d_}/" | "$GREP" ckp | "$GREP" -v md5 | head -n 1 | "$AWK" -F '.ckp.' '{ print $(2) }' | tr -d '.gz')
   fi
}

#------------------------------------------------------------------------------
# Determine journal counter by checking counter in db.counters.
#------------------------------------------------------------------------------
get_journalnum () {
   # get the current journal and checkpoint serial numbers.
   local nextCheckpointNum
   if [[ -r "$P4ROOT/db.counters" ]]; then
      nextCheckpointNum=$("$P4DBIN" -r "$P4ROOT" -k db.counters -jd - 2>&1 | grep @journal@ | cut -d '@' -f 8)

      if [[ -n "$nextCheckpointNum" ]]; then
         check_journalnum "$nextCheckpointNum"
         JOURNALNUM="$nextCheckpointNum"
      else
         # Special case: If db.counters is empty, then we have a new/empty data
         # set, so just set the value to 0.
         JOURNALNUM=0
      fi
   else
      # Special case: If db.counters doesn't exist, then we have a new/empty
      # data set, so just set the value to 0.
      JOURNALNUM=0
   fi

   # If we are on an edge server, the journal has already rotated, so we have to decrement the value
   # so that we replay the correct journal file and create the correct checkpoint number on the
   # edge server.
   #
   # In the case of a standby server, the journal rotation occurs on the master server,
   # so we don't need to increment the journal number again, so we decrement by 1.
   # Also, when replaying the journals to the offline db, we don't want to play to the live journal
   # because it is still being replicated.
   if [[ "$EDGESERVER" -eq 1 || "$REPLICASERVER" -eq 1 || "$STANDBYSERVER" -eq 1 ]]; then
      JOURNALNUM=$((JOURNALNUM - 1))
   fi
   CHECKPOINTNUM=$((JOURNALNUM + 1))
}

#------------------------------------------------------------------------------
# Determine journal space usage and minimum disk space requirement
#------------------------------------------------------------------------------
get_journal_stats () {
   # Get minimum disk space required on server journal filesystem before server rejects commands
   # This will return the configured and default value, but grab the configured value which shows first
   # If a configured value is not present, it will use the default value
   # shellcheck disable=SC2034 disable=SC2016
   P4JOURNALMIN=$("$P4BIN" configure show filesys.P4JOURNAL.min | "$AWK" '{ print $1 }' | $CUT -d'=' -f2 | head -1)
   # Get current journal free disk space
   # shellcheck disable=SC2034
   P4JOURNALFREE=$("$P4BIN" -ztag -F "%freeBytes%" diskspace P4JOURNAL)
   # Get total available disk space for journal
   # shellcheck disable=SC2034
   P4JOURNALTOTAL=$("$P4BIN" -ztag -F "%totalBytes%" diskspace P4JOURNAL)
}

#------------------------------------------------------------------------------
# Verify that the offline databases are usable by checking the existence
# of a 'offline_db_usable.txt' file that is written only when databases
# are in a known-good state, following successful recovery from a checkpoint.
#------------------------------------------------------------------------------
check_offline_db_usable () {
   # Check it is OK
   if [[ ! -f "$OFFLINE_DB/offline_db_usable.txt" ]]; then
      die "Offline database not in a usable state. Check the backup process."
   fi

   if [[ ! -f "$OFFLINE_DB/db.counters" ]]; then
      die "Offline database not found. Consider creating it with live_checkpoint.sh. Be aware that live_checkpoint.sh locks the live system and may take a long time. Aborting."
   fi
}

#------------------------------------------------------------------------------
# Determine journal counter in offline databases.
#------------------------------------------------------------------------------
get_offline_journal_num () {
   # Get the journal number of the offline database
   check_offline_db_usable
   OFFLINEJNLNUM=$("$P4DBIN" -r "$OFFLINE_DB" -jd - db.counters 2>&1 | grep '@journal@' | cut -d "@" -f 8 2>> "$LOGFILE") || die "Cannot get the offline journal number. Abort!"
   check_journalnum "$OFFLINEJNLNUM"
   log "Offline journal number is: $OFFLINEJNLNUM"
   OFFLINECHECKPOINTNUM=$(($OFFLINEJNLNUM))
   log "Offline checkpoint number is: $OFFLINECHECKPOINTNUM"
   # Reset checkpoint number here due to calling get_journalnum after rotating the journal. This makes sure that offline checkpoints
   # write the correct checkpoint number.
   CHECKPOINTNUM=$OFFLINECHECKPOINTNUM
}

#------------------------------------------------------------------------------
# Cleanup old checkpoint and numbered journal files.
#------------------------------------------------------------------------------
remove_old_checkpoints_and_journals () {
   local CheckpointsDir=
   local StandbyReplicaJournalsDir=
   local FilePrefix=
   local File=

   if [[ "$KEEPCKPS" -eq 0 ]]; then
      log "Skipping cleanup of old checkpoints because KEEPCKPS is set to 0."
   else
      log "Deleting obsolete checkpoints and journals. Keeping latest $KEEPCKPS per KEEPCKPS setting in p4_vars."
      CheckpointsDir="${CHECKPOINTS}"
      FilePrefix="${P4SERVER}"

      if [[ -d "$CheckpointsDir" ]]; then
         # This section cleans up both parallel and non-parallel checkpoints to keep things clean.
         # Remove parallel checkpoint dirs.
         for Dir in $(find "${CHECKPOINTS}/" -type d -name "${P4SERVER}.ckp.*" | cut -d "." -f 3 | sort -nr | "$AWK" "NR > $KEEPCKPS"); do
            log "rm -rf ${CHECKPOINTS}/${P4SERVER}.ckp.$Dir*"
            rm -rf ${CHECKPOINTS}/${P4SERVER}.ckp.$Dir*
         done
         # Remove selected checkpoint files based on the KEEPCKPS
         # setting regardless of whether compressed or not.
         # We multiply KEEPCKP by 2 for the ckp files because of the md5 files.
         # shellcheck disable=SC2012
         for File in $(find "${CHECKPOINTS}/" -type f -name "${P4SERVER}.ckp.*.gz" | cut -d "." -f 3 | sort -nr | "$AWK" "NR > $KEEPCKPS"); do
            log "rm -f ${CHECKPOINTS}/${P4SERVER}.ckp.$File*"
            rm -f ${CHECKPOINTS}/${P4SERVER}.ckp.$File*
         done

         # Use KEEPJNLS to allow for separate journal rotation at a higher
         # frequency.
         # shellcheck disable=SC2012
         for File in $(find "${CHECKPOINTS}/" -type f -name "${P4SERVER}.jnl.*" | cut -d "." -f 3 | sort -nr | "$AWK" "NR > $KEEPJNLS"); do
            log "rm -f ${CHECKPOINTS}/${P4SERVER}.jnl.$File*"
            rm -f ${CHECKPOINTS}/${P4SERVER}.jnl.$File*
         done
      fi

      StandbyReplicaJournalsDir="${P4HOME}/journals.rep"
      if [[ -d "$StandbyReplicaJournalsDir" ]]; then
         # shellcheck disable=SC2012
         for File in $(find "${StandbyReplicaJournalsDir}/" -type f -name "${FilePrefix}.ckp.*.gz" | cut -d "." -f 3 | sort -nr | "$AWK" "NR > $KEEPCKPS"); do
            log "rm -f ${StandbyReplicaJournalsDir}/${P4SERVER}.ckp.$File*"
            rm -f ${StandbyReplicaJournalsDir}/${P4SERVER}.ckp.$File*
         done

         # shellcheck disable=SC2012
         for File in $(find "${StandbyReplicaJournalsDir}/" -type f -name "${FilePrefix}.jnl.*" | cut -d "." -f 3 | sort -nr | "$AWK" "NR > $KEEPJNLS"); do
            log "rm -f ${StandbyReplicaJournalsDir}/${P4SERVER}.jnl.$File*"
            rm -f ${StandbyReplicaJournalsDir}/${P4SERVER}.jnl.$File*
         done
      fi

      # This is a workaround to cleanup $LOGS/journal.NNN files on standby replicas.
      # These files are normally removed by p4d during journal rotation on the standby
      # replica. Use only if standby journals are not removed due to a standby replica
      # sharing /hxdepots with its P4TARGET server. To use this workround, add this
      # line to the end of the /p4/common/config/p4_N.vars file:
      #
      # export SDP_REMOVE_STANDBY_JOURNALS=1
      #
      if [[ "${SDP_REMOVE_STANDBY_JOURNALS:-0}" == 1 && "$(is_standby "$SERVERID")" == YES ]]; then
         log "Removing excess journal.NNN files due to SDP_REMOVE_STANDBY_JOURNALS=1."
         # shellcheck disable=SC2012
         for File in $(ls -t "${LOGS}/journal."* 2>/dev/null | "$AWK" "NR > $KEEPJNLS"); do
            # Process only files named 'journal.NNN' in $LOGS.
            [[ "$File" =~ /journal.[0-9]+$ ]] || continue
            log "rm -f $File"
            rm -f "$File"
         done
      fi
   fi
}

#------------------------------------------------------------------------------
# Function: is_server_up ($server)
#
# Input:
# $1 - server, one of 'p4d', 'p4p', or 'p4broker'
#
# Output: None
#
# Return Codes:
# 0: Server is up.
# 1: Server is down.
# 2: Bad usage.
#
# Server up/down status is checked using the appropriate init script.
#------------------------------------------------------------------------------
function is_server_up () {
   local server="${1:-Unset}"

   case "$server" in
      (p4d)
         "$P4DInitScript" status > /dev/null 2>&1
         return $?
      ;;
      (p4broker)
         "$P4BrokerInitScript" status > /dev/null 2>&1
         return $?
      ;;
      (p4p)
        "$P4ProxyInitScript" status > /dev/null 2>&1
         return $?
      ;;
      (Unset)
         log "Internal Error: is_server_up(): No server type specified."
         return  2
      ;;
      (*)
         log "Internal Error: is_server_up(): Unknown server specified: $server"
         return 2
      ;;
   esac
}

#------------------------------------------------------------------------------
# Shutdown p4d using systemd if configured for systemd. Otherwise call the
# underlying init script directly.
#
# Log the shutdown activity.
#
#------------------------------------------------------------------------------
stop_p4d () {
   log "Shutting down the ${P4DBIN##*/} server."
   local -i maxStopDelay=${SDP_MAX_STOP_DELAY_P4D:-43200}
   local -i stopVerified=0
   local -i i=0
   local -i useSystemd=0
   local serviceName=

   if [[ -n "$(command -v systemctl)" ]]; then
      serviceName="${P4DBIN##*/}"
      if [[ -n "$(systemctl is-enabled "$serviceName" 2>/dev/null)" ]]; then
         useSystemd=1
      fi
   fi

   if [[ "$useSystemd" -eq 1 ]]; then
      { sudo systemctl stop "${P4DBIN##*/}"; } >> "$LOGFILE" 2>&1 ||\
         die "Failed to execute: sudo systemctl stop ${P4DBIN##*/}"

      # With systemd, we must independently confirm service stop,
      # waiting if needed.
      stopVerified=0
      i=0; while [[ "$i" -lt "$maxStopDelay" ]]; do
         if is_server_up p4d; then
            sleep 1
         else
            stopVerified=1
            break
         fi
         i+=1
      done
   else
      "$P4DInitScript" stop >> "$LOGFILE" 2>&1
      stopVerified=1
   fi

   if [[ "$stopVerified" -eq 1 ]]; then
      log "Stopped ${P4DBIN##*/} server."
      return 0
   else
      log "Error: Server ${P4DBIN##*/} did not stop after $maxStopDelay seconds. Tailing $P4LOG:"
      tail "$P4LOG" >> "$LOGFILE" 2>&1
      die "Aborting due to failed p4d stop."
   fi
}

#------------------------------------------------------------------------------
# Shutdown p4broker using systemd if configured for systemd. Otherwise call the
# underlying init script directly.
#
# Log the shutdown activity.
#
#------------------------------------------------------------------------------
stop_p4broker () {
   log "Shutting down the ${P4BROKERBIN##*/} server."
   local -i maxStopDelay=${SDP_MAX_STOP_DELAY_P4BROKER:-600}
   local -i stopVerified=0
   local -i i=0
   local -i useSystemd=0
   local serviceName=

   if [[ -n "$(command -v systemctl)" ]]; then
      serviceName="${P4BROKERBIN##*/}"
      if [[ -n "$(systemctl is-enabled "$serviceName" 2>/dev/null)" ]]; then
         useSystemd=1
      fi
   fi

   if [[ "$useSystemd" -eq 1 ]]; then
      { sudo systemctl stop "${P4BROKERBIN##*/}"; } >> "$LOGFILE" 2>&1 ||\
         die "Failed to execute: sudo systemctl stop ${P4BROKERBIN##*/}"

      # With systemd, we must independently confirm service stop,
      # waiting if needed.
      stopVerified=0
      i=0; while [[ "$i" -lt "$maxStopDelay" ]]; do
         if is_server_up p4broker; then
            sleep 1
         else
            stopVerified=1
            break
         fi
         i+=1
      done
   else
      "$P4BrokerInitScript" stop >> "$LOGFILE" 2>&1
      stopVerified=1
   fi

   if [[ "$stopVerified" -eq 1 ]]; then
      log "Stopped ${P4BROKERBIN##*/} server."
      return 0
   else
      die "Server ${P4BROKERBIN##*/} did not stop after $maxStartDelay seconds."
   fi
}

#------------------------------------------------------------------------------
# Shutdown p4p using systemd if configured for systemd. Otherwise call the
# underlying init script directly.
#
# Log the shutdown activity.
#
#------------------------------------------------------------------------------
stop_p4p () {
   log "Shutting down the ${P4PBIN##*/} server."
   local -i maxStopDelay=${SDP_MAX_STOP_DELAY_P4P:-600}
   local -i stopVerified=0
   local -i i=0
   local -i useSystemd=0
   local serviceName=

   if [[ -n "$(command -v systemctl)" ]]; then
      serviceName="${P4PBIN##*/}"
      if [[ -n "$(systemctl is-enabled "$serviceName" 2>/dev/null)" ]]; then
         useSystemd=1
      fi
   fi

   if [[ "$useSystemd" -eq 1 ]]; then
      { sudo systemctl stop "${P4PBIN##*/}"; } >> "$LOGFILE" 2>&1 ||\
         die "Failed to execute: sudo systemctl stop ${P4PBIN##*/}"

      # With systemd, we must independently confirm service stop,
      # waiting if needed.
      stopVerified=0
      i=0; while [[ "$i" -lt "$maxStopDelay" ]]; do
         if is_server_up p4p; then
            sleep 1
         else
            stopVerified=1
            break
         fi
         i+=1
      done
   else
      "$P4ProxyInitScript" stop >> "$LOGFILE" 2>&1
      stopVerified=1
   fi

   if [[ "$stopVerified" -eq 1 ]]; then
      log "Stopped ${P4PBIN##*/} server."
      return 0
   else
      die "Server ${P4PBIN##*/} did not stop after $maxStopDelay seconds."
   fi
}

#------------------------------------------------------------------------------
# Start p4d using systemd if configured for systemd. Otherwise call the
# underlying init script directly.
#
# Log the startup activity.
#
# This is a do-or-die function. It returns success upon successful server
# startup, or else dies.
#------------------------------------------------------------------------------
function start_p4d () {
   log "Starting the ${P4DBIN##*/} server."
   local -i maxStartDelay=${SDP_MAX_START_DELAY_P4D:-120}
   local -i startVerified=0
   local -i i=0
   local -i useSystemd=0
   local serviceName=

   if [[ -n "$(command -v systemctl)" ]]; then
      serviceName="${P4DBIN##*/}"
      if [[ -n "$(systemctl is-enabled "$serviceName" 2>/dev/null)" ]]; then
         useSystemd=1
      fi
   fi

   if [[ "$useSystemd" -eq 1 ]]; then
      { sudo systemctl start "${P4DBIN##*/}"; } ||\
         die "Failed to execute: sudo systemctl start ${P4DBIN##*/}"
   else
      "$P4DInitScript" start >> "$LOGFILE" 2>&1
   fi

   # Confirm that p4d started, waiting if needed.
   startVerified=0
   i=0; while [[ "$i" -lt "$maxStartDelay" ]]; do
      if is_server_up p4d; then
         startVerified=1
         break
      else
         sleep 1
      fi
      i+=1
   done

   if [[ "$startVerified" -eq 1 ]]; then
      log "Server ${P4DBIN##*/} started successfully."
      return 0
   else
      log "Error: Server ${P4DBIN##*/} did not start after $maxStartDelay seconds. Tailing $P4LOG:"
      tail "$P4LOG" >> "$LOGFILE" 2>&1
      die "Aborting due to failed p4d start."
   fi
}

#------------------------------------------------------------------------------
# Start p4broker using systemd if configured for systemd. Otherwise call the
# underlying init script directly.
#
# Log the startup activity.
#
# This is a do-or-die function. It returns success upon successful server
# startup, or else dies.
#------------------------------------------------------------------------------
function start_p4broker () {
   log "Starting the ${P4BROKERBIN##*/} server."
   local -i maxStartDelay=${SDP_MAX_START_DELAY_P4BROKER:-60}
   local -i startVerified=0
   local -i i=0
   local -i useSystemd=0
   local serviceName=

   if [[ -n "$(command -v systemctl)" ]]; then
      serviceName="${P4BROKERBIN##*/}"
      if [[ -n "$(systemctl is-enabled "$serviceName" 2>/dev/null)" ]]; then
         useSystemd=1
      fi
   fi

   if [[ "$useSystemd" -eq 1 ]]; then
      { sudo systemctl start "${P4BROKERBIN##*/}"; } ||\
         die "Failed to execute: sudo systemctl start ${P4BROKERBIN##*/}"
   else
      "$P4BrokerInitScript" start >> "$LOGFILE" 2>&1
   fi

   # Confirm that p4broker started, waiting if needed.
   startVerified=0
   i=0; while [[ "$i" -lt "$maxStartDelay" ]]; do
      if is_server_up p4broker; then
         startVerified=1
         break
      else
         sleep 1
      fi
      i+=1
   done

   if [[ "$startVerified" -eq 1 ]]; then
      log "Server ${P4BROKERBIN##*/} started successfully."
      return 0
   else
      die "Server ${P4BROKERBIN##*/} did not start after $maxStartDelay seconds."
   fi
}

#------------------------------------------------------------------------------
# Start p4p using systemd if configured for systemd. Otherwise call the
# underlying init script directly.
#
# Log the startup activity.
#
# This is a do-or-die function. It returns success upon successful server
# startup, or else dies.
#------------------------------------------------------------------------------
function start_p4p () {
   log "Starting the ${P4PBIN##*/} server."
   local -i maxStartDelay=${SDP_MAX_START_DELAY_P4P:-60}
   local -i startVerified=0
   local -i i=0
   local -i useSystemd=0
   local serviceName=

   if [[ -n "$(command -v systemctl)" ]]; then
      serviceName="${P4PBIN##*/}"
      if [[ -n "$(systemctl is-enabled "$serviceName" 2>/dev/null)" ]]; then
         useSystemd=1
      fi
   fi

   if [[ "$useSystemd" -eq 1 ]]; then
      { sudo systemctl start "${P4PBIN##*/}"; } ||\
         die "Failed to execute: sudo systemctl start ${P4PBIN##*/}"
   else
      "$P4ProxyInitScript" start >> "$LOGFILE" 2>&1
   fi

   # Confirm that p4p started, waiting if needed.
   startVerified=0
   i=0; while [[ "$i" -lt "$maxStartDelay" ]]; do
      if is_server_up p4p; then
         startVerified=1
         break
      else
         sleep 1
      fi
      i+=1
   done

   if [[ "$startVerified" -eq 1 ]]; then
      log "Server ${P4PBIN##*/} started successfully."
      return 0
   else
      die "Server ${P4PBIN##*/} did not start after $maxStartDelay seconds."
   fi
}

#------------------------------------------------------------------------------
# Do a front-door 'p4d admin journal' command to rotate the current/active
# journal file on the master server, starting a fresh new P4JOURNAL file.
#
# In a distributed topology with replicas/edge servers, this function must
# be called only on the master/commit server.
#------------------------------------------------------------------------------
function truncate_journal () {
   local CheckpointFile="${CHECKPOINTS}/${P4SERVER}.ckp.${CHECKPOINTNUM}.gz"
   local JournalFile="${JOURNALS}/${P4SERVER}.jnl.${JOURNALNUM}"

   if [[ "$SERVER_TYPE" == "p4d_master" ]]; then
      [[ -f "$CheckpointFile" ]] && \
         die "Checkpoint $CheckpointFile already exists, check the backup process."
      [[ -f "$JournalFile" ]] && \
         die "Journal $JournalFile already exists, check the backup process."

      log "Truncating journal..."
      # During journal rotation, either by a front-door 'p4 admin journal' or a
      # back-door 'p4d -jj', p4d does a copy-then-delete rather than an mv at
      # the OS level.  During rotation, the perforce server will pause
      # responses to clients (just as with a checkpoint), but this should be
      # for a short period of time even for large data sets, as the journal
      # typically represents a single day of metadata.
      # Curly braces capture output of 'time'.
      "$P4CBIN"/p4login -p "$P4MASTERPORT"
      { time "$P4BIN" -p "$P4MASTERPORT" admin journal; } >> "$LOGFILE" 2>&1 || { die "Journal rotation failed. Abort!"; }
      # The test below waits until the journal file exists in the checkpoints directory before proceeding.
      test=1
      while [[ $test != 0 ]]; do
         sleep 5
         if [[ -f "$JournalFile" ]]; then
            test=0
         fi
      done
      "$P4CBIN"/p4login
   fi
}

#------------------------------------------------------------------------------
# Call 'p4d -jj' to rotate the current/active journal file on the master
# server from an edge server, starting a fresh new P4JOURNAL file.
#
# In a distributed topology with edge and standby servers, this function can be
# used to trigger a journal rotation on master/commit server. It's not meant to
# be used from the master server itself.
#------------------------------------------------------------------------------
function truncate_journal_on_master () {
   # Increment Edge journal number since the journal will increment on the master after calling journal rotation
   local EdgeJournalNum=$((JOURNALNUM + 1))
   local StandbyJournalNum=$((JOURNALNUM + 2)) # If using journalcopy, have to add 2 since live journal is in checkpoints folder
   local JournalFile="${JOURNALS}/${P4SERVER}.jnl.${StandbyJournalNum}"


   if [[ "$SERVER_TYPE" == "p4d_master" ]]; then
      [[ -f "$JournalFile" ]] && \
         die "Journal $JournalFile already exists, check the backup process."

      log "Truncating journal on ${P4MASTERPORT}."
      # 'p4d -jj' does a copy-then-delete, instead of a simple mv.
      # During 'p4d -jj' the perforce server will hang the responses to clients,
      # this should be for a very short period of time even for large data
      # sets, as the journal represents a single day of metadata.
      # Curly braces capture output of 'time'.
      "$P4CBIN"/p4login -p "$P4MASTERPORT"
      { time "$P4BIN" -p "$P4MASTERPORT" admin journal; } >> "$LOGFILE" 2>&1 || { die "Journal rotation failed. Abort!"; }
      # The test below waits until the journal file exists in the checkpoints directory before proceeding.
      test=1
      while [[ $test != 0 ]]; do
         sleep 5
         if [[ -f "$JournalFile" ]]; then
            test=0
         fi
      done
      "$P4CBIN"/p4login -service
   fi
}

#------------------------------------------------------------------------------
# Similar to truncate_journal() above, p4d_truncate_journal() is intended to be
# usable form the p4d_base init script, to allow journal rotation on p4d
# start.  As it may be called from the init script, it may be called on the
# master, a replica, or the edge. However, it should will only do the journal
# rotation if called on the master.
#------------------------------------------------------------------------------
function p4d_truncate_journal () {
   local JournalFile="${JOURNALS}/${P4SERVER}.jnl.${JOURNALNUM}"

   if [[ "$SERVER_TYPE" == "p4d_master" ]]; then
      [[ -f "$JournalFile" ]] && \
         die "Journal $JournalFile already exists, check the backup process."
      log "Rotating journal prior to starting p4d."
      "$P4DBIN" -r "$P4ROOT" -J "$P4JOURNAL" -jj >> "$LOGFILE" 2>&1 ||\
         die "Failed to rotate journal. Aborting p4d server start."
   else
      log "Warning: The p4d_truncate_journal() function has no effect if called on a server other than the master. Ignoring."
   fi
}

#------------------------------------------------------------------------------
# Replay any and all numbered journal files into the offline databases.
#------------------------------------------------------------------------------
function replay_journals_to_offline_db () {
   local CheckpointsDir=
   local FilePrefix=
   local NumberedJournal=

   log "Replay any unreplayed journals to the offline database."
   check_offline_db_usable
   get_offline_journal_num

   # Refer to ckp/jnl files starting like: /p4/N/checkpoints/p4_N
   CheckpointsDir="${CHECKPOINTS}"
   FilePrefix="${P4SERVER}"

   cd ${CHECKPOINTS}
   touch ${P4SERVER}.jnl.0
   FIRSTJOURNALNUM=$(ls -t *.jnl.* | cut -d "." -f 3 | sort -nr | tail -n 2 | head -n 1)
   LASTJOURNALNUM=$(ls -t *.jnl.* | cut -d "." -f 3 | sort -nr | head -n 1)
   log "FIRSTJOURNALNUM=$FIRSTJOURNALNUM"
   log "LASTJOUNRALNUM=$LASTJOURNALNUM"
   log "OFFLINEJNLNUM=$OFFLINEJNLNUM"
   if (( $FIRSTJOURNALNUM <= $OFFLINEJNLNUM )); then
      for (( j=$OFFLINEJNLNUM; $j <= $LASTJOURNALNUM; j++ )); do
         NumberedJournal="${CheckpointsDir}/${FilePrefix}.jnl.${j}"
         log "Replay journal $NumberedJournal to offline db."
         rm -f "${OFFLINE_DB}/offline_db_usable.txt" >> "$LOGFILE" 2>&1
         # Curly braces capture output of 'time'.
         if [[ -f "${NumberedJournal}.gz" ]]; then
            { time "$P4DBIN" -r "$OFFLINE_DB" -jr -f "${NumberedJournal}.gz"; } >> "$LOGFILE" 2>&1 || { die "Offline journal replay failed. Abort!"; }
         else
            if [[ -f "${NumberedJournal}" ]]; then
               { time "$P4DBIN" -r "$OFFLINE_DB" -jr -f "${NumberedJournal}"; } >> "$LOGFILE" 2>&1 || { die "Offline journal replay failed. Abort!"; }
            fi
         fi
         echo "Offline journal files restored successfully." > "${OFFLINE_DB}/offline_db_usable.txt"
      done
   fi

   get_offline_journal_num

   cd ${JOURNALS}
   touch ${P4SERVER}.jnl.0
   FIRSTJOURNALNUM=$(ls -t *.jnl.* | cut -d "." -f 3 | sort -nr | tail -n 2 | head -n 1)
   LASTJOURNALNUM=$(ls -t *.jnl.* | cut -d "." -f 3 | sort -nr | head -n 1)
   log "FIRSTJOURNALNUM=$FIRSTJOURNALNUM"
   log "LASTJOUNRALNUM=$LASTJOURNALNUM"
   log "OFFLINEJNLNUM=$OFFLINEJNLNUM"

   if (( $FIRSTJOURNALNUM <= $OFFLINEJNLNUM )); then
      for (( j=$OFFLINEJNLNUM; $j <= $LASTJOURNALNUM; j++ )); do
         NumberedJournal="${JOURNALS}/${FilePrefix}.jnl.${j}"
         log "Replay journal $NumberedJournal to offline db."
         rm -f "${OFFLINE_DB}/offline_db_usable.txt" >> "$LOGFILE" 2>&1
         # Curly braces capture output of 'time'.
         if [[ -f "${NumberedJournal}.gz" ]]; then
            { time "$P4DBIN" -r "$OFFLINE_DB" -jr -f "${NumberedJournal}.gz"; } >> "$LOGFILE" 2>&1 || { die "Offline journal replay failed. Abort!"; }
         else
           if [[ -f "${NumberedJournal}" ]]; then
              { time "$P4DBIN" -r "$OFFLINE_DB" -jr -f "${NumberedJournal}"; } >> "$LOGFILE" 2>&1 || { die "Offline journal replay failed. Abort!"; }
           fi
         fi
         echo "Offline journal files restored successfully." > "${OFFLINE_DB}/offline_db_usable.txt"
      done
   fi
}

#------------------------------------------------------------------------------
# Replay the live, active P4JOURNAL file into the offline database.
#------------------------------------------------------------------------------
function replay_active_journal_to_offline_db () {
   log "Replay active journal to offline db."

   local ActiveJournal=

   # On a standby server, the current/active journal is named /p4/N/logs/journal.<jnlNum>.
   # On the master and other server types, the active journal is $P4JOURNAL.
   if [[ "$STANDBYSERVER" -eq 1 ]]; then
      local _JNLNUM
      _JNLNUM=$("$P4DBIN" -r "$P4ROOT" -jd - db.counters 2>&1 | grep '@journal@' | cut -d "@" -f 8 2>> "$LOGFILE") || die "Cannot get $P4ROOT journal number. Abort!"
      ActiveJournal="$LOGS/journal.$_JNLNUM"
   else
      ActiveJournal="$P4JOURNAL"
   fi

   # Curly braces capture output of 'time'.
   { time "$P4DBIN" -r "$OFFLINE_DB" -jr -f "${ActiveJournal}"; } >> "$LOGFILE" 2>&1 || { die "Active Journal replay failed. Abort!"; }

}

#------------------------------------------------------------------------------
# Recreate offline databases from the latest checkpoint.
#------------------------------------------------------------------------------
function recreate_offline_db_files () {
   local CheckpointCmd=
   local CheckpointsDir=
   local ParallelCheckpointDir=
   local ParallelCheckpointOKFile=
   local FilePrefix=
   local LastCheckpointMD5=
   local LastCheckpoint=

   CheckpointsDir="${CHECKPOINTS}"
   FilePrefix="${P4SERVER}"

   # Make sure we have at least one checkpoint available.
   if [[ "$DoParallelCheckpoints" -eq 1 ]]; then
      if [[ -z "$(ls "${CheckpointsDir}"/*.OK 2>/dev/null)" ]]; then
         ckp_complete

         if [[ "$SERVER_TYPE" == "p4d_master" ]]; then
            die "No parallel checkpoint dirs found in $CheckpointsDir with prefix ${FilePrefix}.  Consider running 'live_checkpoint.sh $SDP_INSTANCE'."
         else
            die "No parallel checkpoint dirs found in $CheckpointsDir with prefix ${FilePrefix}."
         fi
      fi
   else
      if [[ -z "$(ls "${CheckpointsDir}/${FilePrefix}".ckp.*.md5 2>/dev/null)" ]]; then
         ckp_complete

         if [[ "$SERVER_TYPE" == "p4d_master" ]]; then
            die "No checkpoints found in $CheckpointsDir with prefix $FilePrefix.  Consider running 'live_checkpoint.sh $SDP_INSTANCE'."
         else
            die "No checkpoints found in $CheckpointsDir with prefix $FilePrefix."
         fi
      fi
   fi
   
   # shellcheck disable=SC2012
   if [[ "$DoParallelCheckpoints" -eq 1 ]]; then
      ParallelCheckpointOKFile="$(ls -t "${CheckpointsDir}"/*.OK 2>/dev/null|head -1)"
      [[ -n "$ParallelCheckpointOKFile" ]] || \
         die "Could not find valid parallel checkpoint dir for latest checkpoint. Abort!"
   else
      LastCheckpointMD5=$(ls -t "${CheckpointsDir}/${FilePrefix}".ckp.*.md5 | head -1)
      [[ -n "$LastCheckpointMD5" ]] || \
         die "Could not find *.md5 file for latest checkpoint. Abort!"
   fi

   # shellcheck disable=SC2129
   rm -f "${OFFLINE_DB}"/offline_db_usable.txt >> "$LOGFILE" 2>&1
   rm -f "${OFFLINE_DB}"/db.* >> "$LOGFILE" 2>&1
   rm -f "${OFFLINE_DB}"/save/db.* >> "$LOGFILE" 2>&1

   if [[ "$DoParallelCheckpoints" -eq 1 ]]; then
      ParallelCheckpointDir="${ParallelCheckpointOKFile%.OK}"
      log "Recovering from last parallel checkpoint dir, $ParallelCheckpointDir."
      CheckpointCmd="$P4DBIN -r $OFFLINE_DB -z -jrp -N ${NumCheckPointThreads} ${ParallelCheckpointDir}"
   else
      # Account for the idiosyncrasy that MD5 files for checkpoints may look
      # like p4_N.ckp.gz.md5 or p4_N.ckp.md5.
      if [[ "$LastCheckpointMD5" == *".gz.md5" ]]; then
         LastCheckpoint="${LastCheckpointMD5%.md5}"
      else
         LastCheckpoint="${LastCheckpointMD5%.md5}.gz"
      fi

      [[ -r "$LastCheckpoint" ]] || \
         die "Missing last checkpoint file: $LastCheckpoint. Abort!"

      log "Recovering from last full checkpoint, $LastCheckpoint."
      CheckpointCmd="$P4DBIN -r $OFFLINE_DB -jr -z ${LastCheckpoint}"
   fi

   log "Running: $CheckpointCmd"
   # Curly braces capture output of 'time'.
   { time $CheckpointCmd; } >> "$LOGFILE" 2>&1 || { die "Restore of checkpoint to $OFFLINE_DB failed!"; }

   echo "Offline db file restored successfully." > "${OFFLINE_DB}/offline_db_usable.txt"
}

#------------------------------------------------------------------------------
# Take a live checkpoint from db.* files in P4ROOT.
#------------------------------------------------------------------------------
function checkpoint () {
   local CheckpointCmd=
   local CheckpointsDir=
   local FilePrefix=
   local ParallelCheckpointDir=
   local ParallelCheckpointOKFile=

   log "Create a new checkpoint from live db files in $P4ROOT."

   if [[ "$SERVER_TYPE" == "p4d_master" ]]; then
      # Refer to ckp/jnl files starting like: /p4/N/checkpoints/p4_N
      CheckpointsDir="${CHECKPOINTS}"
      FilePrefix="${P4SERVER}"
   else
      die "Live checkpoints can only be run on the master server."
   fi

   # Checkpoint written to journals location so that the rotated journal will end up in the correct place
   # and not break replication. We move the checkpoint at the end of the function.
   if [[ "$DoParallelCheckpoints" -eq 1 ]]; then
      ParallelCheckpointDir="${CheckpointsDir}/${FilePrefix}.ckp.${CHECKPOINTNUM}"
      ParallelCheckpointOKFile="${ParallelCheckpointDir}.OK"
      CheckpointCmd="$P4DBIN -r $P4ROOT -Z -jcpm -N ${NumCheckPointThreads} ${JOURNALS}/${FilePrefix}"
   else
      CheckpointCmd="$P4DBIN -r $P4ROOT -jc -Z ${JOURNALS}/${FilePrefix}"
      fi

   log "Running: $CheckpointCmd"

   # Curly braces capture output of 'time'.
   { time $CheckpointCmd; } >> "$LOGFILE" 2>&1 || { die "ERROR - New live checkpoint failed!"; }

   if [[ "$DoParallelCheckpoints" -eq 1 ]]; then
      touch "$ParallelCheckpointOKFile"
      log "Live Parallel checkpoint completed OK. Writing: $ParallelCheckpointOKFile"
   else
      log "Live checkpoint completed OK."
   fi
   log "Move checkpoint from journals directory to the checkpoints directory."
   mv ${JOURNALS}/*.ckp.* ${CheckpointsDir}/
}

#------------------------------------------------------------------------------A
# Take a checkpoint from the ROOTDIR, typically either /p4/N/root or
# /p4/N/offline_db.
#------------------------------------------------------------------------------
function dump_checkpoint () {
   local CheckpointCmd=
   local CheckpointsDir=
   local NewCheckpoint=
   local NewCheckpointMD5=
   local FilePrefix=
   local -i DoSnapshot=0
   local -i SnapshotOK=1
   local -i CheckpointOK=1
   local ParallelCheckpointDir=
   local ParallelCheckpointOKFile=

   # shellcheck disable=SC2153
   log "Dump out new checkpoint from db files in $ROOTDIR."

   CheckpointsDir="${CHECKPOINTS}"
   FilePrefix="${P4SERVER}"

   NewCheckpoint="${CheckpointsDir}/${FilePrefix}.ckp.${CHECKPOINTNUM}.gz"
   NewCheckpointMD5="${NewCheckpoint}.md5"

   if [[ -r "$NewCheckpoint" && -r "$NewCheckpointMD5" ]]; then
      log "Warning: Skipping generation of existing checkpoint $NewCheckpoint.\\nVerified MD5 file exists: $NewCheckpointMD5."
      return
   fi

   if [[ "$DoParallelCheckpoints" -eq 1 ]]; then
      ParallelCheckpointDir="${CheckpointsDir}/${FilePrefix}.ckp.${CHECKPOINTNUM}"
      ParallelCheckpointOKFile="${ParallelCheckpointDir}.OK"
      CheckpointCmd="$P4DBIN -r $ROOTDIR -z -jdpm -N ${NumCheckPointThreads} $ParallelCheckpointDir"
   else
      CheckpointCmd="$P4DBIN -r $ROOTDIR -jd -z ${CheckpointsDir}/${FilePrefix}.ckp.${CHECKPOINTNUM}.gz"
   fi

   log "Running: $CheckpointCmd"

   # Curly braces capture output of 'time'.
   if { time $CheckpointCmd; } >> "$LOGFILE" 2>&1; then
      CheckpointOK=1
      if [[ "$DoParallelCheckpoints" -eq 1 ]]; then
         log "Parallel checkpoint dump completed OK. Writing: $ParallelCheckpointOKFile"
         touch "$ParallelCheckpointOKFile"
      else
         log "Checkpoint dump completed OK."
      fi
   else
      CheckpointOK=0
   fi

   if [[ -n "${SNAPSHOT_SCRIPT:-}" ]]; then
      DoSnapshot=1
      log "Calling site-specific snapshot script: $SNAPSHOT_SCRIPT"
      if "$SNAPSHOT_SCRIPT" >> "$LOGFILE" 2>&1; then
         SnapshotOK=1
      else
         SnapshotOK=0
      fi
   fi

   if [[ "$DoSnapshot" -eq 0 ]]; then
      if [[ "$CheckpointOK" -eq 1 ]]; then
         log "New checkpoint dump succeeded."
      else
         die "New checkpoint dump FAILED."
      fi
   else
      if [[ "$CheckpointOK" -eq 0 && "$SnapshotOK" -eq 0 ]]; then
         die "Both checkpoint dump and snapshot FAILED."
      elif [[ "$CheckpointOK" -eq 1 && "$SnapshotOK" -eq 0 ]]; then
         die "New checkpoint dump succeeded, but snapshot FAILED."
      elif [[ "$CheckpointOK" -eq 0 && "$SnapshotOK" -eq 1 ]]; then
         die "New checkpoint dump FAILED, but snapshot succeeded."
      else
         log "New checkpoint dump and snapshot succeeded."
      fi
   fi
}

#------------------------------------------------------------------------------
# Compare journal numbers between live and offline databases, to ensure
# they can be safely swapped out.
#------------------------------------------------------------------------------
function compare_journal_numbers () {

   local _OFFLINEJNLNUM
   _OFFLINEJNLNUM=$("$P4DBIN" -r "$OFFLINE_DB" -jd - db.counters 2>&1 | grep '@journal@' | cut -d "@" -f 8 2>> "$LOGFILE") || die "Cannot get $OFFLINE_DB journal number. Abort!"
   check_journalnum "$_OFFLINEJNLNUM"

   # Get the journal number of the root database
   if [[ ! -f "$P4ROOT/db.counters" ]]; then
      die "$P4ROOT database not found. Something is seriously wrong since the server was just running a minute ago! Contact support-helix-core@perforce.com"
   fi
   local _JNLNUM
   _JNLNUM=$("$P4DBIN" -r "$P4ROOT" -jd - db.counters 2>&1 | grep '@journal@' | cut -d "@" -f 8 2>> "$LOGFILE") || die "Cannot get $P4ROOT journal number. Abort!"
   check_journalnum "$_JNLNUM"

   if [[ "$_JNLNUM" -gt "$_OFFLINEJNLNUM" ]]; then
      log "$P4ROOT journal number is: $_JNLNUM"
      log "$OFFLINE_DB journal number is: $_OFFLINEJNLNUM"
      die "$OFFLINE_DB journal number is less than $P4ROOT, cannot switch."
   fi
}

#------------------------------------------------------------------------------
# Swap out live db.* database files in P4ROOT with those in offline_db.
#------------------------------------------------------------------------------
function switch_db_files () {
   # Compare the Offline and Master journal numbers before switching to make
   # sure they match.
   compare_journal_numbers

   log "Switching root and offline_db links."
   [[ -d "${P4ROOT}"/save ]] || mkdir -p "${P4ROOT}"/save >> "$LOGFILE" 2>&1

   # shellcheck disable=SC2129
   echo "P4ROOT is not available during switch_db_files() processing." > "$P4ROOT/P4ROOT_not_usable.txt" 2>> "$LOGFILE"
   echo "P4ROOT is not available during switch_db_files() processing." > "$OFFLINE_DB/P4ROOT_not_usable.txt" 2>> "$LOGFILE"

   # shellcheck disable=SC2129
   rm -f "${P4ROOT}"/save/db.* >> "$LOGFILE" 2>&1
   rm -rf "${P4ROOT}"/server.locks >> "$LOGFILE" 2>&1
   mv "${P4ROOT}"/db.* "${P4ROOT}"/save/. >> "$LOGFILE" 2>&1

   if [[ -r "$P4ROOT"/license ]]; then
      mv "${P4ROOT}"/license "${OFFLINE_DB}/." >> "$LOGFILE" 2>&1
   fi

   if [[ -n "$(ls "$P4ROOT"/license* 2>/dev/null)" ]]; then
      mv "${P4ROOT}"/license* "${OFFLINE_DB}/." >> "$LOGFILE" 2>&1
   fi

   if [[ -r "${P4ROOT}"/rdb.lbr ]]; then
      mv "${P4ROOT}"/rdb.lbr "${OFFLINE_DB}/." >> "$LOGFILE" 2>&1
   fi

   if [[ -n "$(ls "$P4ROOT"/state* 2>/dev/null)" ]]; then
      mv "${P4ROOT}"/state* "${OFFLINE_DB}/." >> "$LOGFILE" 2>&1
   fi

   if [[ -r "${P4ROOT}"/server.id ]]; then
      mv "${P4ROOT}"/server.id "${OFFLINE_DB}/." >> "$LOGFILE" 2>&1
   fi

   if [[ -n "$(ls "$P4ROOT"/server.id* 2>/dev/null)" ]]; then
      mv "${P4ROOT}"/server.id* "${OFFLINE_DB}/." >> "$LOGFILE" 2>&1
   fi

   cp "${P4ROOT}"/sdp_server_type.txt "${OFFLINE_DB}/." >> "$LOGFILE" 2>&1

   rm -f "${OFFLINE_DB}/offline_db_usable.txt" >> "$LOGFILE" 2>&1
   LinkOfflineDB="$(readlink "$OFFLINE_DB")"
   LinkP4ROOT="$(readlink "$P4ROOT")"
   unlink "$OFFLINE_DB"
   unlink "$P4ROOT"

   ln -s "$LinkOfflineDB" "$P4ROOT" >> "$LOGFILE" 2>&1 ||\
      die "Link of $LinkOfflineDB to $P4ROOT failed."

   ln -s "$LinkP4ROOT" "$OFFLINE_DB" >> "$LOGFILE" 2>&1 ||\
      die "Link of $LinkP4ROOT to $OFFLINE_DB failed."

   rm -f "$P4ROOT/P4ROOT_not_usable.txt" >> "$LOGFILE" 2>&1
   rm -f "$OFFLINE_DB/P4ROOT_not_usable.txt" >> "$LOGFILE" 2>&1
}

#------------------------------------------------------------------------------
# Rotate specified log files, and compress with gzip.
#------------------------------------------------------------------------------
function rotate_log_file () {
   local LogToRotate="${1:-}"
   local GzExt="${2:-}"
   local -i i=1
   local Datestamp=
   local RotatedLog=
   local RotatedZippedLog=

   [[ -n "$LogToRotate" ]] || return

   if [[ -n "${LOGFILE:-}" ]]; then
      pushd "$LOGS" > /dev/null 2>> "$LOGFILE" || die "Could not cd to: $LOGS"
   else
      pushd "$LOGS" > /dev/null || die "Could not cd to: $LOGS"
   fi

   Datestamp=$(date +'%Y-%m-%d_%H-%M-%S')
   RotatedLog="${LogToRotate}.${JOURNALNUM}.${Datestamp}"

   if [[ -f "${LogToRotate}" ]]; then
      if [[ -n "${LOGFILE:-}" ]]; then
         mv -f "${LogToRotate}" "${RotatedLog}" >> "$LOGFILE" 2>&1

         if [[ -n "$GzExt" ]]; then
            RotatedZippedLog="${RotatedLog}${GzExt}"

            # If needed, move existing zipped log aside.
            if [[ -e "$RotatedZippedLog" ]]; then
               while [[ -e "${LogToRotate}.${Datestamp}.${i}${GzExt}" ]]; do
                  i+=1
               done
               log "Moving pre-existing $RotatedZippedLog aside to ${LogToRotate}.${Datestamp}.${i}${GzExt}" >> "$LOGFILE" 2>&1
               mv -f "$RotatedZippedLog" "${LogToRotate}.${Datestamp}.${i}${GzExt}" >> "$LOGFILE" 2>&1
            fi

            gzip "$RotatedLog" >> "$LOGFILE" 2>&1
         fi
      else
         mv -f "${LogToRotate}" "${RotatedLog}"

         if [[ -n "$GzExt" ]]; then
            RotatedZippedLog="${RotatedLog}${GzExt}"

            # If needed, move existing zipped log aside.
            if [[ -e "$RotatedZippedLog" ]]; then
               while [[ -e "${LogToRotate}.${Datestamp}.${i}${GzExt}" ]]; do
                  i+=1
               done
               log "Moving pre-existing $RotatedZippedLog aside to ${LogToRotate}.${Datestamp}.${i}${GzExt}"
               mv -f "$RotatedZippedLog" "${LogToRotate}.${Datestamp}.${i}${GzExt}"
            fi

            gzip "$RotatedLog"
         fi
      fi
   fi

   if [[ -n "${LOGFILE:-}" ]]; then
      popd > /dev/null 2>> "$LOGFILE" || die "Could not cd to: $OLDPWD"
   else
      popd > /dev/null || die "Could not cd to: $OLDPWD"
   fi
}

#------------------------------------------------------------------------------
# At the start of each run for live_checkpoint.sh, daily_checkpoint.sh, and
# recreate_db_checkpoint.sh, before *any* logging activity occurs, rotate the
# logs from the most recent prior run, always named "checkpoint.log" or "log".
#------------------------------------------------------------------------------
function rotate_last_run_logs () {
   # Rotate prior log file for the current script.
   rotate_log_file "$LOGFILE"

   # Rotate prior server log.
   rotate_log_file "log" ".gz"

   # Rotate prior broker log.
   rotate_log_file "p4broker.log" ".gz"

   # Rotate prior audit log.
   rotate_log_file "audit.log" ".gz"

   # Rotate prior replica cleanup log.
   rotate_log_file "replica_cleanup.log" ".gz"

   # Rotate prior monitor metrics log.
   rotate_log_file "monitor_metrics.log" ".gz"

}

#------------------------------------------------------------------------------
# Remove log files matching a specified name prefix, preserving a specified
# number of the recent logs.
#------------------------------------------------------------------------------
function remove_log_files () {
   REMOVE_LOGNAME=$1
   KEEPNUM=$2

   # shellcheck disable=SC2012
   for I_LOGFILE in $(ls -t "${REMOVE_LOGNAME:?}"* 2>/dev/null | $AWK "NR > $KEEPNUM"); do
      log "rm -f $I_LOGFILE"
      rm -f "$I_LOGFILE"
   done
}

#------------------------------------------------------------------------------
# Remove old logs.
#------------------------------------------------------------------------------
function remove_old_logs () {
   # Remove old Checkpoint Logs
   # Use KEEPJNLS rather than KEEPLOGS, so we keep the same number
   # of checkpoint logs as we keep checkpoints.
   pushd "$LOGS" > /dev/null 2>> "$LOGFILE" || die "Could not cd to: $LOGS"

   if [[ "$KEEPJNLS" -eq 0 ]]; then
      log "Skipping cleanup of old checkpoint logs because KEEPJNLS is set to 0."
   else
      log "Deleting old checkpoint logs.  Keeping latest $KEEPJNLS, per KEEPJNLS setting in p4_vars."
      remove_log_files "checkpoint.log" "$KEEPJNLS"
   fi

   if [[ "$KEEPLOGS" -eq 0 ]]; then
      log "Skipping cleanup of old server logs because KEEPLOGS is set to 0."
   else
      log "Deleting old server logs.  Keeping latest $KEEPLOGS, per KEEPLOGS setting in p4_vars."
      remove_log_files "log" "$KEEPLOGS"
      remove_log_files "p4broker.log" "$KEEPLOGS"
      remove_log_files "broker_rotate.log" "$KEEPLOGS"
      remove_log_files "audit.log" "$KEEPLOGS"
      remove_log_files "sync_replica.log" "$KEEPLOGS"
      remove_log_files "replica_status.log" "$KEEPLOGS"
      remove_log_files "replica_cleanup.log" "$KEEPLOGS"
      remove_log_files "request_checkpoint.log" "$KEEPLOGS"
      remove_log_files "recreate_offline_db.log" "$KEEPLOGS"
      remove_log_files "edge_shelf_replicate.log" "$KEEPLOGS"
      remove_log_files "upgrade.log" "$KEEPLOGS"
      remove_log_files "p4login" "$KEEPLOGS"
      remove_log_files "p4verify.log" "$KEEPLOGS"
      remove_log_files "journal_watch.log" "$KEEPLOGS"
      remove_log_files "refresh_P4ROOT_from_offline_db.log" "$KEEPLOGS"
      remove_log_files "purge_revisions.log" "$KEEPLOGS"
      remove_log_files "monitor_metrics.log" "$KEEPLOGS"
   fi
   popd > /dev/null 2>>"$LOGFILE" || die "Could not cd to: $OLDPWD"
}

#------------------------------------------------------------------------------
# Gzip and move old rotated journals to the checkpoints folder.
#------------------------------------------------------------------------------
function gzip_mv_journals () {
   local CheckpointsDir=
   local FilePrefix=
   local NumberedJournal=

   log "gzip journals and move to checkpoints volume."

   CheckpointsDir="${CHECKPOINTS}"
   FilePrefix="${P4SERVER}"

   LASTJOURNALNUM=0
   FIRSTJOURNALNUM=0

   cd ${JOURNALS}
   NumJnls=$(ls *.jnl.* | wc -l)
   if (( $NumJnls > 2 )); then
     LASTJOURNALNUM=$(ls -t *.jnl.* | grep -i -v ".gz" | cut -d "." -f 3 | sort -nr | head -n 5 | tail -n 1)
     FIRSTJOURNALNUM=$(ls -t *.jnl.* | grep -i -v ".gz" | cut -d "." -f 3 | sort -nr | tail -n 2 | head -n 1)
     log "LASTJOURNALNUM=$LASTJOURNALNUM"
     log "FIRSTJOURNALNUM=$FIRSTJOURNALNUM"
     for (( j=$FIRSTJOURNALNUM; $j<=$LASTJOURNALNUM; j++ )); do
        NumberedJournal="${JOURNALS}/${FilePrefix}.jnl.${j}"
        log "gzipping ${NumberedJournal}"
        { time gzip "${NumberedJournal}"; } >> "$LOGFILE" 2>&1 || { die "gzip of ${NumberedJournal} failed. Abort!"; }
        mv ${NumberedJournal}.gz ${CheckpointsDir}/
     done
  fi
}


#------------------------------------------------------------------------------
# Set the SDP Checkpoint counter to indicate last successful SDP checkpoint
# operation. For standby servers, set the SDP Checkpoint counter on the master.
#------------------------------------------------------------------------------
function set_counter() {
   "$P4CBIN/p4login"

   if [[ "$EDGESERVER" -eq 1 || "$STANDBYSERVER" -eq 1 ]]; then
      "$P4BIN" -u "$P4USER" -p "$P4MASTERPORT" counter "LastSDPCheckpoint.$SERVERID" "$(date +'%s (%Y/%m/%d %H:%M:%S %z %Z)')" > /dev/null
   else
      "$P4BIN" -u "$P4USER" -p "$P4PORT" counter "LastSDPCheckpoint.$SERVERID" "$(date +'%s (%Y/%m/%d %H:%M:%S %z %Z)')" > /dev/null
   fi
}

