#!/bin/bash #============================================================================== # Copyright and license info is available in the LICENSE file included with # the Server Deployment Package (SDP), and also available online: # https://swarm.workshop.perforce.com/projects/perforce-software-sdp/view/main/LICENSE #------------------------------------------------------------------------------ set -u #============================================================================== # Global Variables. export P4DInitScript= export P4DRef= export P4DSystemdServiceFile= export P4BrokerInitScript= export P4BrokerRef= export P4BrokerSystemdServiceFile= export P4ProxyInitScript= export P4ProxyRef= export P4ProxySystemdServiceFile= export OFFLINE_DB= export EDGESERVER= export STANDBYSERVER= export CHECKPOINT= declare -i IsP4DCaseSensitive= declare -i CreateParallelCheckpoint= declare -i CreateMultifileParallelCheckpoint= declare -i LoadParallelCheckpoint= declare -i CheckpointDumpSkipped=0 declare -i UseGrepM= #============================================================================== # Common functions used in various SDP scripts. #------------------------------------------------------------------------------ # Verify key variables in the shell environment exist, or else abort. # # If checks in this function fail, this function an 'echo' and 'exit 1' # rather than calling 'log' or 'die', as this function is generally called # early in processing, before the log is initialized. #------------------------------------------------------------------------------ function check_vars () { local CheckVarsPreflightOK=1 CommonVars="SDP_INSTANCE P4HOME P4PORT P4ROOT P4JOURNAL P4BIN P4DBIN P4TICKETS P4TRUST KEEPCKPS KEEPJNLS KEEPLOGS CHECKPOINTS LOGS OSUSER" InstanceVars="P4MASTER_ID P4MASTERPORT" # First, check vars that should be set in /p4/common/bin/p4_vars. for var in $CommonVars; do # Detect unset variables, using ':-' to avoid 'unbound variable' errors. # shellcheck disable=SC1083 if [[ -z "$(eval echo \${"$var":-})" ]]; then echo "Error: Required variable \$$var is NOT set. It should be set in /p4/common/bin/p4_vars." CheckVarsPreflightOK=0 fi done # Next, check vars that should be set in /p4/common/config/p4_N.instance. # For some variables, provide additional details that help help users correct # the problem. for var in $InstanceVars; do # shellcheck disable=SC1083 if [[ -z "$(eval echo \${"$var":-})" ]]; then echo "Error: Required variable \$$var is NOT set. It should be set in /p4/common/config/p4_N.vars, where N is the SDP instance name." if [[ "$var" == "P4MASTER_ID" ]]; then echo "The value for P4MASTER_ID should be the name of the ServerID of the master server." fi CheckVarsPreflightOK=0 fi done if [[ "$CheckVarsPreflightOK" -eq 0 ]]; then echo "Use p4master_run or source p4_vars before calling this script." echo "Aborting to to errors in shell environment preflight checks." exit 1 fi } #------------------------------------------------------------------------------ # Function: copy_jd_table ($TableName, $RootDir) # # Copies the specified table to a temp dir for dumping to avoid locks taken by # p4d -jd if done against P4ROOT # # The caller must ensure the specified table exists in the specified root dir. # # Input: # $1 - TableName (required) # $2 - RootDir (required, root or offline_db) # # Exports JDTmpDir #------------------------------------------------------------------------------ function copy_jd_table () { local TableName=${1:-Unset} local RootDir=${2:-Unset} JDTmpDir=$(mktemp -d 2>/dev/null) if [[ ! -d "$JDTmpDir" ]]; then JDTmpDir=$(mktemp -d -p "$P4TMP" -t 'tmp_jdtmpdir.XXXXXXXX') fi if [[ ! -d "$JDTmpDir" ]]; then echo -e "\\nError: Could not initialize JDTmpDir [$JDTmpDir]\\n" exit 1 fi export JDTmpDir cp "$RootDir/$TableName" "$JDTmpDir/." || die "Failed to copy $RootDir/$TableName to $JDTmpDir/." } #------------------------------------------------------------------------------ # Function: remove_jd_tables () # # Cleanup the JDTmpDir, with extra precautions to avoid doing an 'rm -rf' on the # wrong path. #------------------------------------------------------------------------------ function remove_jd_tables () { if [[ -n "${JDTmpDir:-}" && -d "$JDTmpDir" ]]; then if [[ "$JDTmpDir" =~ ^/tmp/tmp. || "$JDTmpDir" == "$P4TMP/tmp_jdtmpdir."* ]]; then rm -rf "${JDTmpDir:-/tmp/does_not_exist}/" fi fi } #------------------------------------------------------------------------------ # is_edge ($ServerID, $RootDir) # # Determine if a given ServerID is an edge server or not, checking a given # database root dir (e.g. $P4ROOT or $OFFLINE_DB). # # Input: # $1 - ServerID (required) # $2 - RootDir (optional, defaults to $P4ROOT) # # Output YES if an edge server, NO otherwise. # #------------------------------------------------------------------------------ function is_edge () { local ServerID=${1:-Unset} local RootDir=${2:-$P4ROOT} local ServicesData= local EdgeCheck= local grepCmd= if [[ -r "$RootDir/db.server" ]]; then copy_jd_table "db.server" "$RootDir" else echo NO return fi # Prepare grep to be case sensitive or insensitive based on SDP instance # configuration. IsP4DCaseSensitive is set in set_vars(). # Sensitive data can have case-varied server specs, but exactly one will # match grep without the -i. Insensitive data can have only one server # spec. grepCmd="$GREP" [[ "${IsP4DCaseSensitive:-0}" -eq 0 ]] && grepCmd+=" -i" [[ "${UseGrepM:-0}" -eq 1 ]] && grepCmd+=" -m 1" # Extract a slice of db.server referencing the given ServerID, and then grab # the field containing Services data. if [[ "${UseGrepM:-0}" -eq 1 ]]; then ServicesData=$("$P4DBIN" -r "$JDTmpDir" -J off -L /dev/null -k db.server -v track=-1 -jd - 2>&1 |\ $grepCmd "@db.server@ @${ServerID}@" | "$CUT" -d '@' -f 13) else ServicesData=$("$P4DBIN" -r "$JDTmpDir" -J off -L /dev/null -k db.server -v track=-1 -jd - 2>&1 |\ $grepCmd "@db.server@ @${ServerID}@" | head -1 | "$CUT" -d '@' -f 13) fi remove_jd_tables # Do a bitwise operation to determine if the ServicesData value indicates # this is an edge server. if [[ -n "$ServicesData" ]]; then EdgeCheck=$((ServicesData & 4096)) if [[ "$EdgeCheck" -gt 0 ]]; then echo YES else echo NO fi else echo NO fi } #------------------------------------------------------------------------------ # is_replica ($ServerID, $RootDir) # # Determine if a given ServerID is a replica server or not, checking a given # database root dir (e.g. $P4ROOT or $OFFLINE_DB). # # Input: # $1 - ServerID (required) # $2 - RootDir (optional, defaults to $P4ROOT) # # Output YES if an standby server, NO otherwise. # #------------------------------------------------------------------------------ is_replica () { local ServerID="${1:-Unset}" local RootDir="${2:-$P4ROOT}" local ServicesData= local grepCmd= if [[ -r "$RootDir/db.server" ]]; then copy_jd_table "db.server" "$RootDir" else echo NO return fi # Prepare grep to be case sensitive or insensitive based on SDP instance # configuration. IsP4DCaseSensitive is set in set_vars(). # Sensitive data can have case-varied server specs, but exactly one will # match grep without the -i. Insensitive data can have only one server # spec. grepCmd="$GREP" [[ "${IsP4DCaseSensitive:-0}" -eq 0 ]] && grepCmd+=" -i" [[ "${UseGrepM:-0}" -eq 1 ]] && grepCmd+=" -m 1" # Extract a slice of db.server referencing the given ServerID, and then grab # the field containing Services data. if [[ "${UseGrepM:-0}" -eq 1 ]]; then ServicesData=$("$P4DBIN" -r "$JDTmpDir" -J off -L /dev/null -k db.server -v track=-1 -jd - 2>&1 |\ $grepCmd "@db.server@ @${ServerID}@" | "$CUT" -d '@' -f 13 | tr -d ' ') else ServicesData=$("$P4DBIN" -r "$JDTmpDir" -J off -L /dev/null -k db.server -v track=-1 -jd - 2>&1 |\ $grepCmd "@db.server@ @${ServerID}@" | head -1 | "$CUT" -d '@' -f 13 | tr -d ' ') fi remove_jd_tables # Do a check to see if the ServicesData value indicates # this is an standby server. if [[ -n "$ServicesData" ]]; then if [[ "$ServicesData" -eq '2533' ]]; then echo YES else echo NO fi else echo NO fi } #------------------------------------------------------------------------------ # is_standby ($ServerID, $RootDir) # # Determine if a given ServerID is an standby server or not, checking a given # database root dir (e.g. $P4ROOT or $OFFLINE_DB). # # Input: # $1 - ServerID (required) # $2 - RootDir (optional, defaults to $P4ROOT) # # Output YES if an standby server, NO otherwise. # #------------------------------------------------------------------------------ function is_standby () { local ServerID="${1:-Unset}" local RootDir="${2:-$P4ROOT}" local ServicesData= local grepCmd= if [[ -r "$RootDir/db.server" ]]; then copy_jd_table "db.server" "$RootDir" else echo NO return fi # Prepare grep to be case sensitive or insensitive based on SDP instance # configuration. IsP4DCaseSensitive is set in set_vars(). # Sensitive data can have case-varied server specs, but exactly one will # match grep without the -i. Insensitive data can have only one server # spec. grepCmd="$GREP" [[ "${IsP4DCaseSensitive:-0}" -eq 0 ]] && grepCmd+=" -i" [[ "${UseGrepM:-0}" -eq 1 ]] && grepCmd+=" -m 1" # Extract a slice of db.server referencing the given ServerID, and then grab # the field containing Services data. if [[ "${UseGrepM:-0}" -eq 1 ]]; then ServicesData=$("$P4DBIN" -r "$JDTmpDir" -J off -L /dev/null -k db.server -v track=-1 -jd - 2>&1 |\ $grepCmd "@db.server@ @${ServerID}@" | "$CUT" -d '@' -f 13 | tr -d ' ') else ServicesData=$("$P4DBIN" -r "$JDTmpDir" -J off -L /dev/null -k db.server -v track=-1 -jd - 2>&1 |\ $grepCmd "@db.server@ @${ServerID}@" | head -1 | "$CUT" -d '@' -f 13 | tr -d ' ') fi remove_jd_tables # Do a check to see if the ServicesData value indicates # this is an standby server. if [[ -n "$ServicesData" ]]; then if [[ "$ServicesData" -eq '35141' || "$ServicesData" -eq '35301' ]]; then echo YES else echo NO fi else echo NO fi } #------------------------------------------------------------------------------ # Set variables for use in various scripts: # OFFLINE_DB=path to offline db directory # EDGESERVER=1 if this is an edge server, 0 otherwise. # STANDBYSERVER=1 if this is a standby server, 0 otherwise. # # This must be called after loading the standard shell environment by # doing: # source /p4/common/bin/p4_vars N # # This sets P4HOME, SERVERID, etc. needed by this function. #------------------------------------------------------------------------------ function set_vars () { P4DInitScript="$P4HOME/bin/p4d_${SDP_INSTANCE}_init" P4DRef="${P4DInitScript%_init}" P4DSystemdServiceFile="/etc/systemd/system/p4d_${SDP_INSTANCE}.service" P4BrokerInitScript="$P4HOME/bin/p4broker_${SDP_INSTANCE}_init" P4BrokerRef="${P4BrokerInitScript%_init}" P4BrokerSystemdServiceFile="/etc/systemd/system/p4broker_${SDP_INSTANCE}.service" P4ProxyInitScript="$P4HOME/bin/p4p_${SDP_INSTANCE}_init" P4ProxyRef="${P4ProxyInitScript%_init}" P4ProxySystemdServiceFile="/etc/systemd/system/p4p_${SDP_INSTANCE}.service" # If /p4/N/bin/p4d_N is a symlink, the current instance is configured to be # case-sensitive, otherwise it is case-insensitive. (On a non-p4d server # machine, the IsP4DCaseSensitive value may be wrong, but this is of no # consequence, as it is only used on p4d server machines). if [[ -L "${P4DInitScript%_init}" ]]; then IsP4DCaseSensitive=1 else IsP4DCaseSensitive=0 fi # Determine if 'grep' supports the '-m' option. Use it when we can as it is # fast. If it's not supported, use other methods, like "|head -1", which are # more portable but less efficient. if echo "test" | "$GREP" -m 1 "test" >/dev/null 2>&1; then UseGrepM=1 else UseGrepM=0 fi # shellcheck disable=SC2072 if [[ -n "${DO_PARALLEL_CHECKPOINTS:-}" && "$DO_PARALLEL_CHECKPOINTS" != "0" && "$P4D_VERSION" > "2022.2" ]]; then CreateParallelCheckpoint=1 # shellcheck disable=SC2072 [[ "$P4D_VERSION" > "2023.1" ]] && CreateMultifileParallelCheckpoint=1 if [[ "$DO_PARALLEL_CHECKPOINTS" =~ ^[1-9]{1}[0-9]*$ ]]; then if [[ "$DO_PARALLEL_CHECKPOINTS" == 1 ]]; then Threads=4 else # Strip leading zeros from the DO_PARALLEL_CHECKPOINTS value # (if the 'bc' utility is available). if [[ -n "$(command -v bc)" ]]; then Threads=$(echo "$DO_PARALLEL_CHECKPOINTS"|bc) else Threads="$DO_PARALLEL_CHECKPOINTS" fi fi else Threads=4 fi else CreateParallelCheckpoint=0 fi OFFLINE_DB="${P4HOME}/offline_db" # shellcheck disable=SC2153 if [[ -n "$SERVERID" ]]; then if [[ "$(is_edge "$SERVERID")" == YES ]]; then export EDGESERVER=1 else export EDGESERVER=0 fi else export EDGESERVER=0 fi if [[ -n "$SERVERID" ]]; then if [[ "$(is_replica "$SERVERID")" == YES ]]; then export REPLICASERVER=1 # Get commit server from P4TARGET setting in database else export REPLICASERVER=0 fi else export REPLICASERVER=0 fi if [[ -n "$SERVERID" ]]; then if [[ "$(is_standby "$SERVERID")" == YES ]]; then export STANDBYSERVER=1 # Get commit server from P4TARGET setting in database else export STANDBYSERVER=0 fi else export STANDBYSERVER=0 fi # Ensure that SDP_ADMIN_PASSWORD_FILE is set, using existing value if set (e.g. # in p4_vars), otherwise set it to the SDP standard value. export SDP_ADMIN_PASSWORD_FILE="${SDP_ADMIN_PASSWORD_FILE:-Unset}" if [[ "$SDP_ADMIN_PASSWORD_FILE" == Unset ]]; then export SDP_ADMIN_PASSWORD_FILE="$P4CCFG/.p4passwd.${P4SERVER}.admin" fi } #------------------------------------------------------------------------------ # Check if user is running as required OS user. #------------------------------------------------------------------------------ function check_uid () { user=$(id -un) if [[ "${user}" != "${OSUSER}" ]]; then die "Must be run by user: ${OSUSER}. Abort!" fi } #------------------------------------------------------------------------------ # Function log() - echo message to logfile or stdout. # # If $LOGFILE is defined, write message to the log file only; nothing goes to # stdout. Prepend a datestamp. # If $LOGFILE isn't defined, just echo to stdout, w/o timestamp or. # In all cases, support '-e' formatting. # Input: # $1 - message to log (must be quoted). #------------------------------------------------------------------------------ function log () { if [[ "${LOGFILE:-Unset}" != Unset ]]; then echo -n "$(date +'%Y-%m-%d %H:%M:%S')" >> "$LOGFILE" 2>&1 echo -e " $0: $*" >> "$LOGFILE" 2>&1 else echo -e "$@" fi } #------------------------------------------------------------------------------ # Decide depending on our mail utility, how to specify sender (if we need to). # Mail on some platforms sets sender by default. # If the mail utility returns what looks like a version identifier # when given the '-V' flag, use a '-S' flag. If it does not return a # version identifier, don't set a mail sender option. # Allow GNU Mailutils alternative flag instead. Also check for s-nail. #------------------------------------------------------------------------------ function get_mail_sender_opt () { local mail_sender_opt= local mail_ver= if [[ -n "$MAILFROM" ]]; then mail_ver=$($SDPMAIL -V 2>&1) # shellcheck disable=SC2076 if [[ "$mail_ver" =~ "GNU Mailutils" ]]; then mail_sender_opt="-aFrom:$MAILFROM" elif [[ "$mail_ver" =~ ^[0-9]+\.[0-9] ]]; then mail_sender_opt="-r $MAILFROM" elif [[ "$mail_ver" =~ s-nail ]]; then mail_sender_opt="-Sttycharset=latin1 -r $MAILFROM" fi fi echo "$mail_sender_opt" } #------------------------------------------------------------------------------ # Email the log file by $LOGFILE. #------------------------------------------------------------------------------ function mail_log_file () { local subject=$1 local mail_sender_opt mail_sender_opt=$(get_mail_sender_opt) # If the configured mail program is not in the PATH, just do an echo rather # than emailing. [[ -n "$(command -v "$SDPMAIL")" ]] || SDPMAIL="echo Simulated $SDPMAIL" # Send email # shellcheck disable=SC2086 $SDPMAIL -s "$subject" $mail_sender_opt "$MAILTO" < "$LOGFILE" # Check for SNS_ALL_ALERT_TOPIC_ARN to send SNS notifications on both success and failure if [[ -n "${SNS_ALL_ALERT_TOPIC_ARN:-}" ]]; then sns_log_file_all "$subject" fi } #------------------------------------------------------------------------------ # Deliver the $LOGFILE via AWS SNS. #------------------------------------------------------------------------------ function sns_log_file () { # AWS SNS has a 100 character limit for subject field local subject="$1" local short_subject= # shellcheck disable=SC2116 disable=SC2155 short_subject="$(echo "${subject:0:100}")" aws --region "$AWS_DEFAULT_REGION" sns publish --topic-arn "$SNS_ALERT_TOPIC_ARN" --subject "$short_subject" --message "$(cat "$LOGFILE")" } function sns_log_file_all () { local subject="$1" # shellcheck disable=SC2116 disable=SC2155 local short_subject="$(echo "${subject:0:100}")" aws --region "$AWS_DEFAULT_REGION" sns publish --topic-arn "$SNS_ALL_ALERT_TOPIC_ARN" --subject "$short_subject" --message "$(cat "$LOGFILE")" } #------------------------------------------------------------------------------ # Deliver the $LOGFILE via PagerDuty. # See SDP documentation on how to configure: # https://swarm.workshop.perforce.com/projects/perforce-software-sdp/view/main/doc/SDP_Guide.Unix.html#_configuring_pagerduty_for_notifications # # Required environment variables # PAGERDUTY_ROUTING_KEY #------------------------------------------------------------------------------ function pd_log_file () { # shellcheck disable=SC2116 echo -e "Sending alert and log file contents to administrator via PagerDuty." >&2 # set these defaults to empty strings in case the user not defined them in their local p4 vars file PAGERDUTY_CUSTOM_FIELD="${PAGERDUTY_CUSTOM_FIELD:-''}" /usr/local/bin/pd event alert \ --routing_key="$PAGERDUTY_ROUTING_KEY" \ --source="$(hostname)" \ --summary="$(basename "$0") has failed in the SDP!" \ --keys="hostname" --keys="P4INSTANCE" --keys="component" --keys="log_file" \ --keys="script_name" --keys="log_contents" --keys="custom_field" \ --values="$(hostname)" --values="$P4INSTANCE" --values="SDP" --values="$LOGFILE" \ --values="$(basename "$0")" --values="$(cat "$LOGFILE")" --values="$PAGERDUTY_CUSTOM_FIELD" } #------------------------------------------------------------------------------ # Function die() - log message, send email/SNS, and exit. # If $LOGFILE is defined, write message to the log file, email/SNS log, # and exit. # If $LOGFILE is not defined, write message to the stdout, and skip # email/SNS. # If in terminal session, display message to stderr as well. #------------------------------------------------------------------------------ function die () { # mail the error (with more helpful subject line than cron) log "ERROR!!! - $HOSTNAME $P4SERVER $0: $*" if [[ "${LOGFILE:-Unset}" != Unset ]]; then if [[ "${SNS_ALERT_TOPIC_ARN:-Unset}" != Unset ]]; then log "Using SNS for log file delivery..." sns_log_file "ERROR!!! - $HOSTNAME $P4SERVER $0: $*" elif [[ "${PAGERDUTY_ROUTING_KEY:-Unset}" != Unset ]] && [[ -f "/usr/local/bin/pd" ]]; then log "Using PagerDuty for log file delivery..." pd_log_file "ERROR!!! - $HOSTNAME $P4SERVER $0: $*" else log "Using email for log file delivery..." mail_log_file "ERROR!!! - $HOSTNAME $P4SERVER $0: $*" fi fi # if running from terminal, also send to stderr if tty >/dev/null; then echo -e "$@" >&2 fi rm -f "${LOGS}/ckp_running.txt" exit 1 } #------------------------------------------------------------------------------ # Convert various byte values (K,M,G,%) to bytes # Pass in values such as 1024K, 512M, 1G or 10% #------------------------------------------------------------------------------ function convert_to_bytes () { local value=$1 local totalsize=${2:-Undefined} local size= local unit= # Break up value into size (numeric) and unit (K,M,G) size=$("$GREP" -Eo '[[:alpha:]%]+|[0-9]+' <<< "$value" | head -1) unit=$("$GREP" -Eo '[[:alpha:]%]+|[0-9]+' <<< "$value" | tail -1) # Based on unit, convert to bytes case "$unit" in K) echo $((size * 1024)) ;; M) echo $((size * 1024**2)) ;; G) echo $((size * 1024**3)) ;; %) echo $((totalsize * size / 100)) ;; esac } #------------------------------------------------------------------------------ # Write a semaphore file, $LOGS/ckp_running.txt. This file is written at # the start of processing, and removed upon successful completion. It # prevents multiple concurrent operations from being launched accidentally # e.g. by multiple human admins, or a human inadvertently competing with a # cron job. # # It is also intended to get human admins to determine the root cause of # checkpoint failures. #------------------------------------------------------------------------------ function ckp_running() { if [[ -f "${LOGS}/ckp_running.txt" ]]; then die "Last checkpoint not complete. Check the backup process or contact support." fi echo "Checkpoint running on $(date)." > "${LOGS}/ckp_running.txt" } #------------------------------------------------------------------------------ # Remove the ckp_running.txt semaphore file when checkpoint processing is # complete. #------------------------------------------------------------------------------ function ckp_complete() { rm -f "${LOGS}/ckp_running.txt" } #------------------------------------------------------------------------------ # Ensure key directories are writable. Abort if they are not. # Input: # $1 - serverType, specify 1 (default) to check dirs for p4d, 2 to check # dirs for a standalone proxy or broker. #------------------------------------------------------------------------------ function check_dirs () { local -i serverType=${1:-1} # Check that key dirs are writable local -i dirsOK=1 local dirList # shellcheck disable=SC2153 case "$serverType" in (1) dirList="$OFFLINE_DB $CHECKPOINTS $LOGS $P4TMP";; (2) dirList="$LOGS $P4TMP";; (*) dirList="$LOGS $P4TMP";; esac [[ "$EDGESERVER" -eq 1 ]] && dirList+=" ${CHECKPOINTS}.${SERVERID#p4d_}" for dir in $dirList; do if [[ ! -d "$dir" || ! -w "$dir" ]]; then log "Error: Dir $dir does not exist or is not writable." dirsOK=0 fi done [[ "$dirsOK" -eq 1 ]] || die "Some expected dirs are missing or not writable. Aborting." } #------------------------------------------------------------------------------ # Add the results of df -h or df -m to the log file. #------------------------------------------------------------------------------ function check_disk_space () { log "Checking disk space..." $P4BIN diskspace >> "$LOGFILE" 2>&1 } #------------------------------------------------------------------------------ # Check value of journal; ensure it is an integer. #------------------------------------------------------------------------------ function check_journalnum () { local JNLNUM=${1:-Unset} re='^[0-9]+$' if ! [[ $JNLNUM =~ $re ]] ; then die "The journal counter value [$JNLNUM] is invalid. It must be numeric." fi } #------------------------------------------------------------------------------ # Check the checkpoints directory for the oldest checkpoint #------------------------------------------------------------------------------ function get_ckpnum () { if [[ "$EDGESERVER" -eq 0 ]]; then # shellcheck disable=SC2034 disable=SC2012 disable=SC2016 OLDESTCHECKPOINT=$(ls -1tr "${CHECKPOINTS}/" | "$GREP" \\.ckp | "$GREP" -v \\.md5$ | head -n 1 | "$AWK" -F '.ckp.' '{ print $(2) }' | tr -d '.gz') else # shellcheck disable=SC2034 disable=SC2012 disable=SC2016 OLDESTCHECKPOINT=$(ls -1tr "${CHECKPOINTS}.${SERVERID#p4d_}/" | "$GREP" \\.ckp | "$GREP" -v \\.md5$ | head -n 1 | "$AWK" -F '.ckp.' '{ print $(2) }' | tr -d '.gz') fi } #------------------------------------------------------------------------------ # Determine journal counter by checking counter in db.counters. #------------------------------------------------------------------------------ function get_journalnum () { # get the current journal and checkpoint serial numbers. local nextCheckpointNum if [[ -r "$P4ROOT/db.counters" ]]; then copy_jd_table "db.counters" "$P4ROOT" nextCheckpointNum=$("$P4DBIN" -r "$JDTmpDir" -k db.counters -v track=-1 -jd - 2>&1 | grep @journal@ | cut -d '@' -f 8) remove_jd_tables if [[ -n "$nextCheckpointNum" ]]; then check_journalnum "$nextCheckpointNum" JOURNALNUM="$nextCheckpointNum" else # Special case: If db.counters is empty, then we have a new/empty data # set, so just set the value to 0. JOURNALNUM=0 fi else # Special case: If db.counters doesn't exist, then we have a new/empty # data set, so just set the value to 0. JOURNALNUM=0 fi # If we are on an edge server, the journal has already rotated, so we have to decrement the value # so that we replay the correct journal file and create the correct checkpoint number on the # edge server. # # In the case of a standby server, the journal rotation occurs on the master server, # so we don't need to increment the journal number again, so we decrement by 1. # Also, when replaying the journals to the offline db, we don't want to play to the live journal # because it is still being replicated. if [[ "$EDGESERVER" -eq 1 || "$REPLICASERVER" -eq 1 || "$STANDBYSERVER" -eq 1 ]]; then JOURNALNUM=$((JOURNALNUM - 1)) fi CHECKPOINTNUM=$((JOURNALNUM + 1)) } #------------------------------------------------------------------------------ # Determine journal space usage and minimum disk space requirement #------------------------------------------------------------------------------ function get_journal_stats () { # Get minimum disk space required on server journal filesystem before server rejects commands # This will return the configured and default value, but grab the configured value which shows first # If a configured value is not present, it will use the default value # shellcheck disable=SC2034 disable=SC2016 P4JOURNALMIN=$("$P4BIN" configure show filesys.P4JOURNAL.min | "$AWK" '{ print $1 }' | $CUT -d'=' -f2 | head -1) # Get current journal free disk space # shellcheck disable=SC2034 P4JOURNALFREE=$("$P4BIN" -ztag -F "%freeBytes%" diskspace P4JOURNAL) # Get total available disk space for journal # shellcheck disable=SC2034 P4JOURNALTOTAL=$("$P4BIN" -ztag -F "%totalBytes%" diskspace P4JOURNAL) } #------------------------------------------------------------------------------ # Verify that the offline databases are usable by checking the existence # of a 'offline_db_usable.txt' file that is written only when databases # are in a known-good state, following successful recovery from a checkpoint. #------------------------------------------------------------------------------ function check_offline_db_usable () { # Check it is OK if [[ ! -f "$OFFLINE_DB/offline_db_usable.txt" ]]; then die "Offline database not in a usable state. Check the backup process." fi if [[ ! -f "$OFFLINE_DB/db.counters" ]]; then die "Offline database not found. Consider creating it with live_checkpoint.sh. Be aware that live_checkpoint.sh locks the live system and may take a long time. Aborting." fi } #------------------------------------------------------------------------------ # Determine journal counter in offline databases. #------------------------------------------------------------------------------ function get_offline_journal_num () { check_offline_db_usable if [[ -r "$OFFLINE_DB/db.counters" ]]; then copy_jd_table "db.counters" "$OFFLINE_DB" else die "Cannot check offline journal counter; offline_db is missing db.counters." fi # Get the journal number of the offline database export OFFLINEJNLNUM OFFLINEJNLNUM=$("$P4DBIN" -r "$JDTmpDir" -v track=-1 -jd - db.counters 2>&1 | grep '@journal@' | cut -d "@" -f 8 2>> "$LOGFILE") || die "Cannot get the offline journal number. Abort!" remove_jd_tables check_journalnum "$OFFLINEJNLNUM" log "Offline journal number is: $OFFLINEJNLNUM" } #------------------------------------------------------------------------------ # Request replica checkpoints. function request_replica_checkpoint () { local checkpointCmd= if [[ "$CreateParallelCheckpoint" -eq 1 ]]; then if [[ "$CreateMultifileParallelCheckpoint" -eq 1 ]]; then checkpointCmd="$P4BIN admin checkpoint -Z -p -m -N ${Threads:-4}" else checkpointCmd="$P4BIN admin checkpoint -Z -p -N ${Threads:-4}" fi else checkpointCmd="$P4BIN admin checkpoint -Z" fi log "Running: $checkpointCmd" $checkpointCmd >> "$LOGFILE" 2>&1 & } #------------------------------------------------------------------------------ # Cleanup old checkpoint and numbered journal files. #------------------------------------------------------------------------------ function remove_old_checkpoints_and_journals () { local checkpointsDir= local filePrefix= local ckpDir= local ckpFile= local jnlFile= local md5File= local journalPrefix= local standbyReplicaJournalsDir= local tmpFile= local lastCheckpointJournalCounter= local journalCounter= local keepCheckpointCounter= local keepJournalCounter= if [[ "$KEEPCKPS" -eq 0 ]]; then log "Skipping cleanup of old checkpoints because KEEPCKPS is set to 0." return 0 fi if [[ "$CheckpointDumpSkipped" -eq 1 ]]; then log "Skipping cleanup of old checkpoints and journals because the checkpoint dump was skipped." return 0 fi if get_latest_checkpoint_with_md5; then lastCheckpointJournalCounter="${CHECKPOINT##*.ckp.}" lastCheckpointJournalCounter="${lastCheckpointJournalCounter%%.*}" if [[ "$lastCheckpointJournalCounter" =~ ^[0-9]+$ ]]; then if [[ "$KEEPCKPS" -lt "$lastCheckpointJournalCounter" ]]; then keepCheckpointCounter=$((lastCheckpointJournalCounter-KEEPCKPS+1)) else log "Not removing any checkpoints or journals because KEEPCKPS ($KEEPCKPS) is greater than or equal to the highest checkpoint counter ($lastCheckpointJournalCounter)." return fi if [[ "$KEEPJNLS" -lt "$lastCheckpointJournalCounter" ]]; then keepJournalCounter=$((lastCheckpointJournalCounter-KEEPJNLS)) else log "Not removing any numbered journals because KEEPJNLS ($KEEPJNLS) is greater than or equal to the highest checkpoint counter ($lastCheckpointJournalCounter)." keepJournalCounter=ALL fi log "Deleting obsolete checkpoints and journals since last checkpoint counter ($lastCheckpointJournalCounter). Removing checkpoints with counter less than $keepCheckpointCounter (KEEPCKPS=$KEEPCKPS) and journals with counter less than $keepJournalCounter (KEEPJNLS=$KEEPJNLS)." else log "Error: Invalid journal counter [$lastCheckpointJournalCounter] extracted from file name: $CHECKPOINT - Skipping removal of obsolete checkpoints and journals." return 1 fi else log "Skipping removal of checkpoints and journal as no checkpoints are available." return 0 fi # For the master server, we can safely rely on the SDP standard that the journalPrefix # is of the form '/p4/N/checkpoints/p4_N'. For replicas and edge servers, determine # the values dynamically based on the current journalPrefix value for the given ServerID. if [[ "$SERVERID" == "$P4MASTER_ID" ]]; then # Refer to ckp/jnl files starting like: /p4/N/checkpoints/p4_N checkpointsDir="${CHECKPOINTS}" filePrefix="${P4SERVER}" else if [[ -r "$P4ROOT/db.config" ]]; then copy_jd_table "db.config" "$P4ROOT" journalPrefix="$("$P4DBIN" -r "$JDTmpDir" -k db.config -v track=-1 -jd - | grep "@${SERVERID}@ @journalPrefix@" | cut -d '@' -f 10)" remove_jd_tables fi if [[ -n "$journalPrefix" ]]; then checkpointsDir="${journalPrefix%/*}" filePrefix="${journalPrefix##*/}" else log "Warning: Could not determine journalPrefix for ServerID $SERVERID." checkpointsDir="${CHECKPOINTS}" filePrefix="${P4SERVER}" fi fi # Find old checkpoints and journals to remove. if [[ -d "$checkpointsDir" ]]; then # First, build a list of all checkpoints based on *.md5 and *.OK files. This list will include checkpoint files and # parallel checkpoint directories, but will ignore uncompressed checkpoints. Ignoring uncompressed checkpoints is # for safety; SDP deals only in compressed checkpoints, and any uncompressed checkpoint is likely due to some kind # of recovery or other non-routine operation. Leave uncompressed files alone, but list them as warnings. For # optimal safety, ignore files that don't precisely match patterns of checkpoint and journal files that could be # created by p4d with the given journalPrefix using tight regular expressions. # # The *.OK files are essentially stand-ins for for *.md5 files for a small range of p4d versions that that did not generate top-level # *.md5 files for parallel checkpoint directories. For P4D 2023.2+, top-level *.md5 files are generated for parallel # checkpoint directories, so they'll have both *.OK and *.md5 files. tmpFile=$(mktemp "$P4TMP/checkpoint_MD5_files.XXXXXXXX") log "Finding old *.md5 and *.OK files and corresponding checkpoints to remove." log "Running: find \"$checkpointsDir/\" -mindepth 1 -maxdepth 1 | grep -E \"^${checkpointsDir}/${filePrefix}\.ckp\.[1-9]{1}[0-9]*(\.gz|)\.(md5|OK)\$\" \"$tmpFile\"" find "$checkpointsDir/" -mindepth 1 -maxdepth 1 | grep -E "^${checkpointsDir}/${filePrefix}\.ckp\.[1-9]{1}[0-9]*(\.gz|)\.(md5|OK)$" > "$tmpFile" if [[ -n "${SDP_DEBUG:-}" ]]; then log "DEBUG: Contents of $tmpFile:" cat "$tmpFile" >> "$LOGFILE" 2>&1 fi # Given an MD5 file, there are a few possibilities for the corresponding checkpoint: # - A compressed checkpoint file created by 'p4 admin checkpoint -Z'. # - An uncompressed checkpoint file created by 'p4 admin checkpoint'. # - A checkpoint directory created if parallel checkpoints were enabled. # # Symlinks for checkpoints and journal files are never created by SDP; such are created only by # external processing, e.g. by human admins. We avoid deleting those. while read -r md5File; do journalCounter="${md5File##*.ckp.}" journalCounter="${journalCounter%%.*}" if [[ "$journalCounter" =~ ^[0-9]+$ ]]; then if [[ "$journalCounter" -ge "$keepCheckpointCounter" ]]; then [[ -n "${SDP_DEBUG:-}" ]] && log "DEBUG: $md5File not removed; journal counter $journalCounter is greater than or equal to $keepCheckpointCounter." continue fi else log "Error: Invalid journal counter [$journalCounter] extracted from file name: $md5File - Not removing this file." continue fi ckpFile= ckpDir= if [[ "$md5File" == *.gz.md5 ]]; then if [[ -r "${md5File%.md5}" ]]; then if [[ -f "${md5File%.md5}" ]]; then ckpFile="${md5File%.md5}" elif [[ -L "${md5File%.md5}" ]]; then log "Warning: For MD5 file [$md5File], expected checkpoint .gz file was a symlink instead. Not removing: ${md5File%.md5}" fi else log "Warning: For MD5 file [$md5File], did not find expected checkpoint .gz file to cleanup: ${md5File%.md5}" fi elif [[ "$md5File" == *.md5 ]]; then if [[ -r "${md5File%.md5}" ]]; then if [[ -d "${md5File%.md5}" ]]; then ckpDir="${md5File%.md5}" elif [[ -f "${md5File%.md5}" ]]; then log "Warning: For MD5 file [$md5File], expected checkpoint directory was a file, possibly an uncompressed checkpoint. Not removing: ${md5File%.md5}" elif [[ -L "${md5File%.md5}" ]]; then log "Warning: For MD5 file [$md5File], expected checkpoint directory was a symlink. Not removing: ${md5File%.md5}" fi elif [[ -r "${md5File%.md5}.gz" ]]; then if [[ -f "${md5File%.md5}.gz" ]]; then ckpFile="${md5File%.md5}.gz" elif [[ -L "${md5File%.md5}.gz" ]]; then log "Warning: For MD5 file [$md5File], expected checkpoint file was a symlink. Not removing: ${md5File%.md5}.gz" fi else log "Warning: For MD5 file [$md5File], did not find expected checkpoint file or direcgtory to cleanup: ${md5File%.md5} or ${md5File%.md5}.gz" fi elif [[ "$md5File" == *.OK ]]; then if [[ -r "${md5File%.OK}" ]]; then if [[ -d "${md5File%.OK}" ]]; then ckpDir="${md5File%.OK}" elif [[ -f "${md5File%.OK}" ]]; then log "Warning: For MD5 file [$md5File], expected checkpoint directory was a file, possibly an uncompressed checkpoint. Not removing: ${md5File%.OK}" elif [[ -L "${md5File%.OK}" ]]; then log "Warning: For MD5 file [$md5File], expected checkpoint directory was a symlink. Not removing: ${md5File%.OK}" fi fi fi log "Removing checkpoint MD5 file with: rm -f \"$md5File\"" rm -f "$md5File" >> "$LOGFILE" 2>&1 ||\ log "Warning: Error doing: rm -f \"$md5File\"" if [[ -n "$ckpFile" ]]; then log "Removing checkpoint file: rm -f \"$ckpFile\"" rm -f "$ckpFile" >> "$LOGFILE" 2>&1 ||\ log "Warning: Error doing: rm -f \"$ckpFile\"" fi if [[ -n "$ckpDir" ]]; then log "Removing checkpoint dir: rm -rf \"$ckpDir\"" rm -rf "$ckpDir" >> "$LOGFILE" 2>&1 ||\ log "Warning: Error doing: rm -rf \"$ckpDir\"" fi done < "$tmpFile" [[ -n "${SDP_DEBUG:-}" ]] || rm -f "$tmpFile" if [[ "$keepJournalCounter" != "ALL" ]]; then # Use KEEPJNLS to allow for separate journal rotation at a different frequency than KEEPCKPS. tmpFile=$(mktemp "$P4TMP/journal_files.XXXXXXXX") log "Finding old numbered journal files to remove." log "Running: find \"$checkpointsDir/\" -mindepth 1 -maxdepth 1 | grep -E \"^${checkpointsDir}/${filePrefix}\.jnl\.[0-9]+\$\" \"$tmpFile\"" find "$checkpointsDir/" -mindepth 1 -maxdepth 1 | grep -E "^${checkpointsDir}/${filePrefix}\.jnl\.[0-9]+$" > "$tmpFile" if [[ -n "${SDP_DEBUG:-}" ]]; then log "DEBUG: Contents of $tmpFile:" cat "$tmpFile" >> "$LOGFILE" 2>&1 fi while read -r jnlFile; do journalCounter="${jnlFile##*.jnl.}" if [[ "$journalCounter" =~ ^[0-9]+$ ]]; then if [[ "$journalCounter" -ge "$keepJournalCounter" ]]; then [[ -n "${SDP_DEBUG:-}" ]] && log "DEBUG: $jnlFile not removed; journal counter $journalCounter is greater than or equal to $keepJournalCounter." continue fi else log "Error: Invalid journal counter [$journalCounter] extracted from file name: $jnlFile - Not removing this file." continue fi log "Removing journal file with: rm -f \"$jnlFile\"" rm -f "$jnlFile" >> "$LOGFILE" 2>&1 ||\ log "Warning: Error doing: rm -f \"$jnlFile\"" done < "$tmpFile" [[ -n "${SDP_DEBUG:-}" ]] || rm -f "$tmpFile" fi else log "Warning: Expected checkpoints directory [$checkpointsDir] does not exist." fi # The 'journals.rep' structure has been obsolete for some time, but we still advise cleanup in # that structure in case modern SDP is installed in an environment where older SDP recently # had been operating. standbyReplicaJournalsDir="${P4HOME}/journals.rep" if [[ -d "$standbyReplicaJournalsDir" ]]; then log "Obsolete folder detected: $standbyReplicaJournalsDir - this folder can be removed after sufficient checkpoints and numbered journals appear in ${CHECKPOINTS%/*}/" fi # This is a workaround to cleanup $LOGS/journal.NNN files on standby replicas. # These files are normally removed by p4d during journal rotation on the standby # replica. Use only if standby journals are not removed due to a standby replica # sharing /hxdepots with its P4TARGET server. To use this workround, add this # line to the end of the /p4/common/config/p4_N.vars file: # # export SDP_REMOVE_STANDBY_JOURNALS=1 # if [[ "${SDP_REMOVE_STANDBY_JOURNALS:-0}" == 1 && "$(is_standby "$SERVERID")" == YES ]]; then tmpFile=$(mktemp "$P4TMP/standby_journal_N_files.XXXXXXXX") log "Finding old numbered journal files to remove on standby." log "Running: find \"$LOGS/\" -mindepth 1 -maxdepth 1 | grep -E \"^${LOGS}\.journal\.[1-9]{1}[0-9]*\$\" \"$tmpFile\"" find "$LOGS/" -mindepth 1 -maxdepth 1 | grep -E "^${LOGS}\.juornal\.[1-9]{1}[0-9]*$" > "$tmpFile" if [[ -n "${SDP_DEBUG:-}" ]]; then log "DEBUG: Contents of $tmpFile:" cat "$tmpFile" >> "$LOGFILE" 2>&1 fi log "Removing excess journal.NNN files due to SDP_REMOVE_STANDBY_JOURNALS=1." while read -r standbyJnlFile; do journalCounter="${standbyJnlFile##*journal.}" if [[ "$journalCounter" =~ ^[0-9]+$ ]]; then if [[ "$journalCounter" -ge "$keepJournalCounter" ]]; then [[ -n "${SDP_DEBUG:-}" ]] && log "DEBUG: $standbyJnlFile not removed; journal counter $journalCounter is greater than or equal to $keepJournalCounter." continue fi else log "Error: Invalid journal counter [$journalCounter] extracted from file name: $standbyJnlFile - Not removing this file." continue fi log "Removing standby journal file with: rm -f \"$standbyJnlFile\"" rm -f "$standbyJnlFile" >> "$LOGFILE" 2>&1 ||\ log "Warning: Error doing: rm -f \"$standbyJnlFile\"" done < "$tmpFile" [[ -n "${SDP_DEBUG:-}" ]] || rm -f "$tmpFile" fi } #------------------------------------------------------------------------------ # Function: is_server_up ($server) # # Input: # $1 - server, one of 'p4d', 'p4p', or 'p4broker' # # Output: None # # Return Codes: # 0: Server is up. # 1: Server is down. # 2: Bad usage. # # Server up/down status is checked using the appropriate init script. #------------------------------------------------------------------------------ function is_server_up () { local server="${1:-Unset}" case "$server" in (p4d) "$P4DInitScript" status > /dev/null 2>&1 return $? ;; (p4broker) "$P4BrokerInitScript" status > /dev/null 2>&1 return $? ;; (p4p) "$P4ProxyInitScript" status > /dev/null 2>&1 return $? ;; (Unset) log "Internal Error: is_server_up(): No server type specified." return 2 ;; (*) log "Internal Error: is_server_up(): Unknown server specified: $server" return 2 ;; esac } #------------------------------------------------------------------------------ # Shutdown p4d using systemd if configured for systemd. Otherwise call the # underlying init script directly. # # Log the shutdown activity. # #------------------------------------------------------------------------------ function stop_p4d () { log "Shutting down the ${P4DBIN##*/} server." local -i maxStopDelay=${SDP_MAX_STOP_DELAY_P4D:-43200} local -i stopVerified=0 local -i i=0 local -i useSystemd=0 local serviceName= if [[ -n "$(command -v systemctl)" ]]; then serviceName="${P4DBIN##*/}" if [[ -n "$(systemctl is-enabled "$serviceName" 2>/dev/null)" ]]; then useSystemd=1 fi fi if [[ "$useSystemd" -eq 1 ]]; then { sudo systemctl stop "${P4DBIN##*/}"; } >> "$LOGFILE" 2>&1 ||\ die "Failed to execute: sudo systemctl stop ${P4DBIN##*/}" # With systemd, we must independently confirm service stop, # waiting if needed. stopVerified=0 i=0; while [[ "$i" -lt "$maxStopDelay" ]]; do if is_server_up p4d; then sleep 1 else stopVerified=1 break fi i+=1 done else "$P4DInitScript" stop >> "$LOGFILE" 2>&1 stopVerified=1 fi if [[ "$stopVerified" -eq 1 ]]; then log "Stopped ${P4DBIN##*/} server." return 0 else log "Error: Server ${P4DBIN##*/} did not stop after $maxStopDelay seconds. Tailing $P4LOG:" tail "$P4LOG" >> "$LOGFILE" 2>&1 die "Aborting due to failed p4d stop." fi } #------------------------------------------------------------------------------ # Shutdown p4broker using systemd if configured for systemd. Otherwise call the # underlying init script directly. # # Log the shutdown activity. # #------------------------------------------------------------------------------ function stop_p4broker () { log "Shutting down the ${P4BROKERBIN##*/} server." local -i maxStopDelay=${SDP_MAX_STOP_DELAY_P4BROKER:-600} local -i stopVerified=0 local -i i=0 local -i useSystemd=0 local serviceName= if [[ -n "$(command -v systemctl)" ]]; then serviceName="${P4BROKERBIN##*/}" if [[ -n "$(systemctl is-enabled "$serviceName" 2>/dev/null)" ]]; then useSystemd=1 fi fi if [[ "$useSystemd" -eq 1 ]]; then { sudo systemctl stop "${P4BROKERBIN##*/}"; } >> "$LOGFILE" 2>&1 ||\ die "Failed to execute: sudo systemctl stop ${P4BROKERBIN##*/}" # With systemd, we must independently confirm service stop, # waiting if needed. stopVerified=0 i=0; while [[ "$i" -lt "$maxStopDelay" ]]; do if is_server_up p4broker; then sleep 1 else stopVerified=1 break fi i+=1 done else "$P4BrokerInitScript" stop >> "$LOGFILE" 2>&1 stopVerified=1 fi if [[ "$stopVerified" -eq 1 ]]; then log "Stopped ${P4BROKERBIN##*/} server." return 0 else die "Server ${P4BROKERBIN##*/} did not stop after $maxStartDelay seconds." fi } #------------------------------------------------------------------------------ # Shutdown p4p using systemd if configured for systemd. Otherwise call the # underlying init script directly. # # Log the shutdown activity. # #------------------------------------------------------------------------------ function stop_p4p () { log "Shutting down the ${P4PBIN##*/} server." local -i maxStopDelay=${SDP_MAX_STOP_DELAY_P4P:-600} local -i stopVerified=0 local -i i=0 local -i useSystemd=0 local serviceName= if [[ -n "$(command -v systemctl)" ]]; then serviceName="${P4PBIN##*/}" if [[ -n "$(systemctl is-enabled "$serviceName" 2>/dev/null)" ]]; then useSystemd=1 fi fi if [[ "$useSystemd" -eq 1 ]]; then { sudo systemctl stop "${P4PBIN##*/}"; } >> "$LOGFILE" 2>&1 ||\ die "Failed to execute: sudo systemctl stop ${P4PBIN##*/}" # With systemd, we must independently confirm service stop, # waiting if needed. stopVerified=0 i=0; while [[ "$i" -lt "$maxStopDelay" ]]; do if is_server_up p4p; then sleep 1 else stopVerified=1 break fi i+=1 done else "$P4ProxyInitScript" stop >> "$LOGFILE" 2>&1 stopVerified=1 fi if [[ "$stopVerified" -eq 1 ]]; then log "Stopped ${P4PBIN##*/} server." return 0 else die "Server ${P4PBIN##*/} did not stop after $maxStopDelay seconds." fi } #------------------------------------------------------------------------------ # Start p4d using systemd if configured for systemd. Otherwise call the # underlying init script directly. # # Log the startup activity. # # This is a do-or-die function. It returns success upon successful server # startup, or else dies. #------------------------------------------------------------------------------ function start_p4d () { log "Starting the ${P4DBIN##*/} server." local -i maxStartDelay=${SDP_MAX_START_DELAY_P4D:-120} local -i startVerified=0 local -i i=0 local -i useSystemd=0 local serviceName= if [[ -n "$(command -v systemctl)" ]]; then serviceName="${P4DBIN##*/}" if [[ -n "$(systemctl is-enabled "$serviceName" 2>/dev/null)" ]]; then useSystemd=1 fi fi if [[ "$useSystemd" -eq 1 ]]; then { sudo systemctl start "${P4DBIN##*/}"; } ||\ die "Failed to execute: sudo systemctl start ${P4DBIN##*/}" else "$P4DInitScript" start >> "$LOGFILE" 2>&1 fi # Confirm that p4d started, waiting if needed. startVerified=0 i=0; while [[ "$i" -lt "$maxStartDelay" ]]; do if is_server_up p4d; then startVerified=1 break else sleep 1 fi i+=1 done if [[ "$startVerified" -eq 1 ]]; then log "Server ${P4DBIN##*/} started successfully." return 0 else log "Error: Server ${P4DBIN##*/} did not start after $maxStartDelay seconds. Tailing $P4LOG:" tail "$P4LOG" >> "$LOGFILE" 2>&1 die "Aborting due to failed p4d start." fi } #------------------------------------------------------------------------------ # Start p4broker using systemd if configured for systemd. Otherwise call the # underlying init script directly. # # Log the startup activity. # # This is a do-or-die function. It returns success upon successful server # startup, or else dies. #------------------------------------------------------------------------------ function start_p4broker () { log "Starting the ${P4BROKERBIN##*/} server." local -i maxStartDelay=${SDP_MAX_START_DELAY_P4BROKER:-60} local -i startVerified=0 local -i i=0 local -i useSystemd=0 local serviceName= if [[ -n "$(command -v systemctl)" ]]; then serviceName="${P4BROKERBIN##*/}" if [[ -n "$(systemctl is-enabled "$serviceName" 2>/dev/null)" ]]; then useSystemd=1 fi fi if [[ "$useSystemd" -eq 1 ]]; then { sudo systemctl start "${P4BROKERBIN##*/}"; } ||\ die "Failed to execute: sudo systemctl start ${P4BROKERBIN##*/}" else "$P4BrokerInitScript" start >> "$LOGFILE" 2>&1 fi # Confirm that p4broker started, waiting if needed. startVerified=0 i=0; while [[ "$i" -lt "$maxStartDelay" ]]; do if is_server_up p4broker; then startVerified=1 break else sleep 1 fi i+=1 done if [[ "$startVerified" -eq 1 ]]; then log "Server ${P4BROKERBIN##*/} started successfully." return 0 else die "Server ${P4BROKERBIN##*/} did not start after $maxStartDelay seconds." fi } #------------------------------------------------------------------------------ # Start p4p using systemd if configured for systemd. Otherwise call the # underlying init script directly. # # Log the startup activity. # # This is a do-or-die function. It returns success upon successful server # startup, or else dies. #------------------------------------------------------------------------------ function start_p4p () { log "Starting the ${P4PBIN##*/} server." local -i maxStartDelay=${SDP_MAX_START_DELAY_P4P:-60} local -i startVerified=0 local -i i=0 local -i useSystemd=0 local serviceName= if [[ -n "$(command -v systemctl)" ]]; then serviceName="${P4PBIN##*/}" if [[ -n "$(systemctl is-enabled "$serviceName" 2>/dev/null)" ]]; then useSystemd=1 fi fi if [[ "$useSystemd" -eq 1 ]]; then { sudo systemctl start "${P4PBIN##*/}"; } ||\ die "Failed to execute: sudo systemctl start ${P4PBIN##*/}" else "$P4ProxyInitScript" start >> "$LOGFILE" 2>&1 fi # Confirm that p4p started, waiting if needed. startVerified=0 i=0; while [[ "$i" -lt "$maxStartDelay" ]]; do if is_server_up p4p; then startVerified=1 break else sleep 1 fi i+=1 done if [[ "$startVerified" -eq 1 ]]; then log "Server ${P4PBIN##*/} started successfully." return 0 else die "Server ${P4PBIN##*/} did not start after $maxStartDelay seconds." fi } #------------------------------------------------------------------------------ # Do a front-door 'p4d admin journal' command to rotate the current/active # journal file on the master server, starting a fresh new P4JOURNAL file. # # In a distributed topology with replicas/edge servers, this function must # be called only on the master/commit server. #------------------------------------------------------------------------------ function truncate_journal () { local CheckpointFile="${CHECKPOINTS}/${P4SERVER}.ckp.${CHECKPOINTNUM}.gz" local JournalFile="${CHECKPOINTS}/${P4SERVER}.jnl.${JOURNALNUM}" local JournalCmd= if [[ "$SERVERID" == "$P4MASTER_ID" ]]; then [[ -f "$CheckpointFile" ]] && \ die "Checkpoint $CheckpointFile already exists, check the backup process." [[ -f "$JournalFile" ]] && \ die "Journal $JournalFile already exists, check the backup process." log "Truncating journal..." # During journal rotation, either by a front-door 'p4 admin journal' or a # back-door 'p4d -jj', p4d does a copy-then-delete rather than an mv at # the OS level. During rotation, the perforce server will pause # responses to clients (just as with a checkpoint), but this should be # for a short period of time even for large data sets, as the journal # typically represents a single day of metadata. # Curly braces capture output of 'time'. "$P4CBIN"/p4login -p "$P4MASTERPORT" JournalCmd="$P4BIN -p $P4MASTERPORT admin journal" log "Running: $JournalCmd" { time $JournalCmd; } >> "$LOGFILE" 2>&1 || { die "Journal rotation failed. Abort!"; } # The test below waits until the journal file exists in the checkpoints directory before proceeding. test=1 while [[ $test != 0 ]]; do sleep 5 if [[ -f "$JournalFile" ]]; then test=0 fi done "$P4CBIN"/p4login else log "Warning: The truncate_journal () function should only be called on the master server. It is ignored on edge and replica replica servers." fi } #------------------------------------------------------------------------------ # Call 'p4d -jj' to rotate the current/active journal file on the master # server from an edge server, starting a fresh new P4JOURNAL file. # # In a distributed topology with edge and standby servers, this function can be # used to trigger a journal rotation on master/commit server. It's not meant to # be used from the master server itself. #------------------------------------------------------------------------------ function truncate_journal_on_master () { # Increment Edge journal number since the journal will increment on the master after calling journal rotation local EdgeJournalNum=$((JOURNALNUM + 1)) local StandbyJournalNum=$((JOURNALNUM + 2)) # If using journalcopy, have to add 2 since live journal is in checkpoints folder local JournalCmd= local JournalFile= if [[ "$EDGESERVER" -eq 1 ]]; then # Refer to ckp/jnl files starting like (example ServerID=p4d_edge_nyc): # /p4/N/checkpoints.edge_nyc/p4_N.edge_nyc JournalFile="${CHECKPOINTS}.${SERVERID#p4d_}/${P4SERVER}.${SERVERID#p4d_}.jnl.${EdgeJournalNum}" elif [[ "$STANDBYSERVER" -eq 1 ]]; then JournalFile="${CHECKPOINTS}/${P4SERVER}.jnl.${StandbyJournalNum}" fi if [[ "$SERVERID" != "$P4MASTER_ID" ]]; then [[ -f "$JournalFile" ]] && \ die "Journal $JournalFile already exists, check the backup process." log "Truncating journal on ${P4MASTERPORT}." # 'p4d -jj' does a copy-then-delete, instead of a simple mv. # During 'p4d -jj' the perforce server will hang the responses to clients, # this should be for a very short period of time even for large data # sets, as the journal represents a single day of metadata. # Curly braces capture output of 'time'. "$P4CBIN"/p4login -p "$P4MASTERPORT" JournalCmd="$P4BIN -p $P4MASTERPORT admin journal" log "Running: $JournalCmd" { time $JournalCmd; } >> "$LOGFILE" 2>&1 || { die "Journal rotation failed. Abort!"; } # The test below waits until the journal file exists in the checkpoints directory before proceeding. test=1 while [[ $test != 0 ]]; do sleep 5 if [[ -f "$JournalFile" ]]; then test=0 fi done "$P4CBIN"/p4login -service else log "Warning: truncate_journal_on_master () function should not be called on a master server. Ignoring." fi } #------------------------------------------------------------------------------ # Similar to truncate_journal() above, p4d_truncate_journal() is intended to be # usable form the p4d_base init script, to allow journal rotation just before # p4d is started. As it may be called from the init script, it may be called # on the master, a replica, or an edge. However, it will only do the journal # rotation if called on the master. #------------------------------------------------------------------------------ function p4d_truncate_journal () { local JournalFile="${CHECKPOINTS}/${P4SERVER}.jnl.${JOURNALNUM}" if [[ "$SERVERID" == "$P4MASTER_ID" ]]; then [[ -f "$JournalFile" ]] && \ die "Journal $JournalFile already exists, check the backup process." log "Rotating journal prior to starting p4d." "$P4DBIN" -r "$P4ROOT" -J "$P4JOURNAL" -jj >> "$LOGFILE" 2>&1 ||\ die "Failed to rotate journal. Aborting p4d server start." else log "Warning: The p4d_truncate_journal() function has no effect if called on a server other than the master. Ignoring." fi } #------------------------------------------------------------------------------ # Replay any and all numbered journal files into the offline databases. #------------------------------------------------------------------------------ function replay_journals_to_offline_db () { local checkpointCmd= local checkpointsDir= local filePrefix= local numberedJournal= local journalPrefix= local useTargetJournalPrefix=${1:-0} log "Replay any unreplayed journals to the offline database." check_offline_db_usable # For the master server, we can safely rely on the SDP standard that the # journalPrefix is of the form '/p4/N/checkpoints/p4_N'. For replicas and # edge servers, determine the values dynamically based on the current journal # Prefix value for the given ServerID. if [[ "$SERVERID" == "$P4MASTER_ID" ]]; then # Refer to ckp/jnl files starting like: /p4/N/checkpoints/p4_N checkpointsDir="${CHECKPOINTS}" filePrefix="${P4SERVER}" else if [[ "$useTargetJournalPrefix" -eq 1 ]]; then journalPrefix=$(get_target_config_value journalPrefix) else if [[ -r "$P4ROOT/db.config" ]]; then copy_jd_table "db.config" "$P4ROOT" journalPrefix="$("$P4DBIN" -r "$JDTmpDir" -k db.config -v track=-1 -jd - | grep "@${SERVERID}@ @journalPrefix@" | cut -d '@' -f 10)" remove_jd_tables fi fi if [[ -n "$journalPrefix" ]]; then checkpointsDir="${journalPrefix%/*}" filePrefix="${journalPrefix##*/}" else log "Warning: Could not determine journalPrefix for ServerID $SERVERID." checkpointsDir="${CHECKPOINTS}" filePrefix="${P4SERVER}" fi fi for (( j=OFFLINEJNLNUM; j <= JOURNALNUM; j++ )); do numberedJournal="${checkpointsDir}/${filePrefix}.jnl.${j}" log "Replay journal $numberedJournal to offline db." rm -f "${OFFLINE_DB}/offline_db_usable.txt" >> "$LOGFILE" 2>&1 # Curly braces capture output of 'time'. checkpointCmd="$P4DBIN -r $OFFLINE_DB -jr -f $numberedJournal" log "Running: $checkpointCmd" { time $checkpointCmd; } >> "$LOGFILE" 2>&1 || { die "Offline journal replay failed. Abort!"; } echo "Offline db file restored successfully." > "${OFFLINE_DB}/offline_db_usable.txt" done } #------------------------------------------------------------------------------ # Replay the live, active P4JOURNAL file into the offline database. #------------------------------------------------------------------------------ function replay_active_journal_to_offline_db () { log "Replay active journal to offline db." local ActiveJournal= local CheckpointCmd= # On a standby server, the current/active journal is named /p4/N/logs/journal.<jnlNum>. # On the master and other server types, the active journal is $P4JOURNAL. if [[ "$STANDBYSERVER" -eq 1 ]]; then local _JNLNUM if [[ -r "$P4ROOT/db.counters" ]]; then copy_jd_table "db.counters" "$P4ROOT" _JNLNUM=$("$P4DBIN" -r "$JDTmpDir" -v track=-1 -jd - db.counters 2>&1 | grep '@journal@' | cut -d "@" -f 8 2>> "$LOGFILE") ||\ die "Cannot get $P4ROOT journal number. Abort!" remove_jd_tables ActiveJournal="$LOGS/journal.$_JNLNUM" else die "Cannot get $P4ROOT journal number. No db.counters found in $P4ROOT. Abort!" fi else ActiveJournal="$P4JOURNAL" fi # Curly braces capture output of 'time'. CheckpointCmd="$P4DBIN -r $OFFLINE_DB -jr -f $ActiveJournal" log "Running: $CheckpointCmd" { time $CheckpointCmd; } >> "$LOGFILE" 2>&1 || { die "Active Journal replay failed. Abort!"; } } #------------------------------------------------------------------------------ # Function: get_target_config_value($configName) # # Get a 'p4 configure' configuration value from the P4TARGET server. # # Example: # # targetJournalPrefix=$(get_target_config_value journalPrefix) #------------------------------------------------------------------------------ function get_target_config_value() { local configName=${1:-} local configValue= local targetPort= local grepCmd= if [[ -r "$P4ROOT/db.config" ]]; then copy_jd_table "db.config" "$P4ROOT" # Prepare grep to be case sensitive or insensitive based on SDP instance # configuration. IsP4DCaseSensitive is set in set_vars(). # Sensitive data can have case-varied server specs, but exactly one will # match grep without the -i. Insensitive data can have only one server # spec. grepCmd="$GREP" [[ "${IsP4DCaseSensitive:-0}" -eq 0 ]] && grepCmd+=" -i" [[ "${UseGrepM:-0}" -eq 1 ]] && grepCmd+=" -m 1" if [[ "${UseGrepM:-0}" -eq 1 ]]; then targetPort=$("$P4DBIN" -r "$JDTmpDir" -cshow | $grepCmd "${SERVERID}: P4TARGET" | cut -d ' ' -f 4) else targetPort=$("$P4DBIN" -r "$JDTmpDir" -cshow | $grepCmd "${SERVERID}: P4TARGET" | head -1 | cut -d ' ' -f 4) fi remove_jd_tables fi if [[ -n "$targetPort" ]]; then configValue=$("$P4BIN" -p "$targetPort" configure show "$configName" 2>/dev/null | head -1 | sed -E -e 's/^.*=//g' -e 's/ .*$//g') fi echo "$configValue" } #------------------------------------------------------------------------------ # Function: get_latest_checkpoint_with_md5 () # # This function returns the most recent available checkpoint for which a *.md5 # file exists, based on timestamps on the .md5 files. The md5 files are # generated by p4d upon successful completion of the checkpoint. # # The checkpoint can be a file or a directory (for parallel checkpoints), and # may be compressed or uncompressed. # # This function reads several global variables. It sets the global # LoadParallelCheckpoint=1 if the checkpoint is a directory (requiring # parallel replay options), or 0 if a file (for serial replay) # It sets the global CHECKPOINT to the the path to the checkpoint file. # # The function returns a 0 (happy) if a checkpoint is found (a checkpoint file # or directory), or 1 (unhappy) otherwise. #------------------------------------------------------------------------------ function get_latest_checkpoint_with_md5 () { local useTargetJournalPrefix=${1:-0} local checkpointsDir= local filePrefix= local journalPrefix= local lastCheckpointMD5= local lastCheckpoint= local tmpFile= local journalCounter=0 local highJournalCounter=0 # For the master server, we can safely rely on the SDP standard that the journalPrefix # is of the form '/p4/N/checkpoints/p4_N'. For replicas and edge servers, determine # the values dynamically based on the current journalPrefix value for the given ServerID. if [[ "$SERVERID" == "$P4MASTER_ID" ]]; then # Refer to ckp/jnl files starting like: /p4/N/checkpoints/p4_N checkpointsDir="${CHECKPOINTS}" filePrefix="${P4SERVER}" else if [[ "$useTargetJournalPrefix" -eq 1 ]]; then journalPrefix=$(get_target_config_value journalPrefix) else if [[ -r "$P4ROOT/db.config" ]]; then copy_jd_table "db.config" "$P4ROOT" journalPrefix="$("$P4DBIN" -r "$JDTmpDir" -k db.config -v track=-1 -jd - | grep "@${SERVERID}@ @journalPrefix@" | cut -d '@' -f 10)" remove_jd_tables fi fi if [[ -n "$journalPrefix" ]]; then checkpointsDir="${journalPrefix%/*}" filePrefix="${journalPrefix##*/}" else log "Warning: Could not determine journalPrefix for ServerID $SERVERID." checkpointsDir="${CHECKPOINTS}" filePrefix="${P4SERVER}" fi fi # Make sure we have at least one checkpoint available, which may be a file or # directory. Search for the most recent among the *.md5 and *.OK files. # # Set lastCheckpoint to the path of a the most recent valid checkpoint, or # set it to empty to indicate no checkpoint is available. tmpFile=$(mktemp "$P4TMP/md5_files.XXXXXXXX") log "Seeking highest journal counter among checkpoint *.md5 files with prefix $checkpointsDir/$filePrefix" log "Running: find \"$checkpointsDir/\" -mindepth 1 -maxdepth 1 | grep -E \"^${checkpointsDir}/${filePrefix}\.ckp\.[1-9]{1}[0-9]*(\.gz|)\.(md5|OK)\$\" \"$tmpFile\"" find "$checkpointsDir/" -mindepth 1 -maxdepth 1 | grep -E "^${checkpointsDir}/${filePrefix}\.ckp\.[1-9]{1}[0-9]*(\.gz|)\.(md5|OK)$" > "$tmpFile" [[ -n "${SDP_DEBUG:-}" ]] && log "Contents of tmpFile[$tmpFile]:\\n$(cat "$tmpFile")" while read -r md5File; do journalCounter="${md5File##*.ckp.}" journalCounter="${journalCounter%%.*}" if [[ "$journalCounter" =~ ^[0-9]+$ ]]; then [[ "$journalCounter" -gt "$highJournalCounter" ]] && highJournalCounter="$journalCounter" [[ "$journalCounter" -eq "$highJournalCounter" ]] && lastCheckpointMD5="$md5File" else log "Error: Invalid journal counter [$journalCounter] extracted from file name: $md5File" fi done < "$tmpFile" [[ -n "${SDP_DEBUG:-}" ]] && log "DEBUG: lastCheckpointMD5=$lastCheckpointMD5" [[ -n "${SDP_DEBUG:-}" ]] || rm -f "$tmpFile" if [[ -n "$lastCheckpointMD5" ]]; then # Account for the idiosyncrasy that MD5 files for checkpoints may look # like p4_N.ckp.gz.md5, p4_N.ckp.md5, or p4_N.ckp.OK. if [[ "$lastCheckpointMD5" == *.gz.md5 ]]; then lastCheckpoint="${lastCheckpointMD5%.md5}" LoadParallelCheckpoint=0 if [[ ! -r "$lastCheckpoint" ]]; then if [[ -f "${lastCheckpointMD5%.gz.md5}" ]]; then lastCheckpoint="${lastCheckpointMD5%.gz.md5}" log "Warning: Missing last checkpoint [${lastCheckpointMD5%.md5}], which is expected to exist because this exists: [$lastCheckpointMD5]. However, an uncompressed checkpoint [$lastCheckpoint] was found, so using that instead." else log "Error: Missing last checkpoint [$lastCheckpoint], which is expected to exist because this exists: $lastCheckpointMD5" lastCheckpoint= fi fi elif [[ "$lastCheckpointMD5" == *.md5 ]]; then if [[ -r "${lastCheckpointMD5%.md5}" ]]; then lastCheckpoint="${lastCheckpointMD5%.md5}" if [[ -d "$lastCheckpoint" ]]; then LoadParallelCheckpoint=1 else LoadParallelCheckpoint=0 fi elif [[ -r "${lastCheckpointMD5%.md5}.gz" ]]; then lastCheckpoint="${lastCheckpointMD5%.md5}.gz" LoadParallelCheckpoint=0 else log "Error: Missing last checkpoint [${lastCheckpointMD5%.md5}.gz] or [${lastCheckpoint%.md5}], one of which is expected to exist because this exists: $lastCheckpointMD5" lastCheckpoint= fi elif [[ "$lastCheckpointMD5" == *.OK ]]; then lastCheckpoint="${lastCheckpointMD5%.OK}" LoadParallelCheckpoint=1 if [[ ! -r "$lastCheckpoint" ]]; then log "Error: Missing last checkpoint [$lastCheckpoint], which is expected to exist because this exists: $lastCheckpointMD5" lastCheckpoint= fi else log "Error: Could not determine checkpoint file from last checkpoint MD5 file [$lastCheckpointMD5]." fi else if [[ "$SERVERID" == "$P4MASTER_ID" ]]; then log "Error: No checkpoints found while scanning for checkpoint files or parallel checkpoint directories, compressed or uncompressed, with a journalPrefix of '$checkpointsDir/$filePrefix'. Consider creating a live checkpoint here on the commit server, which will cause a period of server unresponsiveness, like so:\\n\\tnohup live_checkpoint.sh $SDP_INSTANCE < /dev/null > /dev/null 2>&1 &\\n\\ttail -f $LOGS/checkpoint.log\\n" else log "Error: No checkpoints found while scanning for checkpoint files or parallel checkpoint directories, compressed or uncompressed, with a journalPrefix of '$checkpointsDir/$filePrefix'.\\n" fi LoadParallelCheckpoint="$CreateParallelCheckpoint" fi # The primary job of this function is to set a value for CHECKPOINTS, and # return happy (0) if a valid value was set, or unhappy (1) otherwise. export CHECKPOINT="$lastCheckpoint" [[ -z "$CHECKPOINT" ]] && return 1 return 0 } #------------------------------------------------------------------------------ # Recreate offline databases from the latest checkpoint. #------------------------------------------------------------------------------ # shellcheck disable=SC2120 function recreate_offline_db_files () { local checkpointCmd= local checkpointsDir= local useTargetJournalPrefix=${1:-0} if [[ "$CheckpointDumpSkipped" -eq 1 ]]; then log "Skipping recreation of offline_db because the checkpoint dump was skipped." return 0 fi get_latest_checkpoint_with_md5 "$useTargetJournalPrefix" ||\ die "Error: Aborting due to unavailable checkpoint." # At this point we know we have a valid checkpoint to load, # referenced by $LastCheckpoint, and we know that # $LoadParallelCheckpoint is correctly set based on the # latest checkpoint found above. { rm -f "${OFFLINE_DB}"/offline_db_usable.txt rm -f "${OFFLINE_DB}"/db.* rm -f "${OFFLINE_DB}"/save/db.* } >> "$LOGFILE" 2>&1 if [[ "$LoadParallelCheckpoint" -eq 1 ]]; then log "Recovering from parallel checkpoint directory: $CHECKPOINT" checkpointCmd="$P4DBIN -r $OFFLINE_DB -z -N ${Threads:-4} -jrp $CHECKPOINT" else log "Recovering from serial checkpoint file: $CHECKPOINT" checkpointCmd="$P4DBIN -r $OFFLINE_DB -jr $CHECKPOINT" fi log "Running: $checkpointCmd" # Curly braces capture output of 'time'. { time $checkpointCmd; } >> "$LOGFILE" 2>&1 || { die "Restore of checkpoint to $OFFLINE_DB failed!"; } echo "Offline db file restored successfully." > "${OFFLINE_DB}/offline_db_usable.txt" } #------------------------------------------------------------------------------ # Take a live checkpoint from db.* files in P4ROOT. #------------------------------------------------------------------------------ function checkpoint () { local checkpointCmd= local checkpointsDir= local filePrefix= local parallelCheckpointDir= local parallelCheckpointOKFile= local journalPrefix= local checkpointsDir= local filePrefix= local expectedMD5File= local -i isCommitServer=0 log "Create a new checkpoint from live db files in $P4ROOT." if [[ "$SERVERID" == "$P4MASTER_ID" ]]; then # Refer to ckp/jnl files starting like: /p4/N/checkpoints/p4_N checkpointsDir="${CHECKPOINTS}" filePrefix="${P4SERVER}" isCommitServer=1 else isCommitServer=0 fi if [[ "$isCommitServer" -eq 1 ]]; then if [[ "$CreateParallelCheckpoint" -eq 1 ]]; then parallelCheckpointDir="${checkpointsDir}/${filePrefix}.ckp.${CHECKPOINTNUM}" parallelCheckpointOKFile="${parallelCheckpointDir}.OK" if [[ "$CreateMultifileParallelCheckpoint" -eq 1 ]]; then checkpointCmd="$P4DBIN -r $P4ROOT -Z -N ${Threads:-4} -jcpm ${checkpointsDir}/${filePrefix}" else checkpointCmd="$P4DBIN -r $P4ROOT -Z -N ${Threads:-4} -jcp ${checkpointsDir}/${filePrefix}" fi else checkpointCmd="$P4DBIN -r $P4ROOT -jc -Z ${checkpointsDir}/${filePrefix}" fi log "Running: $checkpointCmd" # Curly braces capture output of 'time'. { time $checkpointCmd; } >> "$LOGFILE" 2>&1 || { die "ERROR - New live checkpoint failed!"; } if [[ "$CreateParallelCheckpoint" -eq 1 ]]; then touch "$parallelCheckpointOKFile" log "Live parallel checkpoint completed OK. Writing: $parallelCheckpointOKFile" else log "Live checkpoint completed OK." fi else if [[ -r "$P4ROOT/db.config" ]]; then copy_jd_table "db.config" "$P4ROOT" journalPrefix="$("$P4DBIN" -r "$JDTmpDir" -k db.config -v track=-1 -jd - | grep "@${SERVERID}@ @journalPrefix@" | cut -d '@' -f 10)" remove_jd_tables fi if [[ -n "$journalPrefix" ]]; then checkpointsDir="${journalPrefix%/*}" filePrefix="${journalPrefix##*/}" else die "Could not determine journalPrefix for ServerID $SERVERID." fi log "Starting live checkpoint process for ServerID $SERVERID with journal $((JOURNALNUM+2))." log "Calling: $P4CBIN/request_replica_checkpoint.sh $SDP_INSTANCE -now" "$P4CBIN"/request_replica_checkpoint.sh "$SDP_INSTANCE" -now expectedMD5File="$checkpointsDir/${filePrefix}.ckp.$((JOURNALNUM+2)).md5" log "Waiting for MD5 to appear for server $SERVERID: $expectedMD5File" # Wait until the *.md5 file appears before proceeding. while true; do sleep 5 [[ -f "$expectedMD5File" ]] && break done "$P4CBIN"/p4login # shellcheck disable=SC2119 recreate_offline_db_files fi } #------------------------------------------------------------------------------A # Take a checkpoint from the ROOTDIR, typically either /p4/N/root or # /p4/N/offline_db. #------------------------------------------------------------------------------ function dump_checkpoint () { local CheckpointCmd= local CheckpointsDir= local NewCheckpointFile= local NewCheckpointDir= local NewCheckpointMD5= local FilePrefix= local JournalPrefix= local -i DoSnapshot=0 local -i SnapshotOK=1 local -i CheckpointOK=1 local ParallelCheckpointDir= local ParallelCheckpointOKFile= # shellcheck disable=SC2153 log "Dump out new checkpoint from db files in $ROOTDIR." # For the master server, we can safely rely on the SDP standard that the journalPrefix # is of the form '/p4/N/checkpoints/p4_N'. For replicas and edge servers, determine # the values dynamically based on the current journalPrefix value for the given ServerID. if [[ "$SERVERID" == "$P4MASTER_ID" ]]; then # Refer to ckp/jnl files starting like: /p4/N/checkpoints/p4_N CheckpointsDir="${CHECKPOINTS}" FilePrefix="${P4SERVER}" else if [[ -r "$P4ROOT/db.config" ]]; then copy_jd_table "db.config" "$P4ROOT" JournalPrefix="$("$P4DBIN" -r "$JDTmpDir" -k db.config -v track=-1 -jd - | grep "@${SERVERID}@ @journalPrefix@" | cut -d '@' -f 10)" remove_jd_tables fi if [[ -n "$JournalPrefix" ]]; then CheckpointsDir="${JournalPrefix%/*}" FilePrefix="${JournalPrefix##*/}" else log "Warning: Could not determine journalPrefix for ServerID $SERVERID." CheckpointsDir="${CHECKPOINTS}" FilePrefix="${P4SERVER}" fi fi if [[ "$CreateParallelCheckpoint" -eq 1 ]]; then NewCheckpointDir="${CheckpointsDir}/${FilePrefix}.ckp.${CHECKPOINTNUM}" NewCheckpointMD5="${NewCheckpointDir}.md5" else NewCheckpointFile="${CheckpointsDir}/${FilePrefix}.ckp.${CHECKPOINTNUM}.gz" NewCheckpointMD5="${NewCheckpointFile}.md5" fi log "Ensuring MD5 file $NewCheckpointMD5 does not already exist." if [[ -r "$NewCheckpointMD5" ]]; then if [[ -r "$NewCheckpointFile" ]] ; then log "Warning: Skipping this checkpoint dump due to existing checkpoint file $NewCheckpointFile. Verified MD5 file exists: $NewCheckpointMD5. Checkpoints should continue after next journal rotation." CheckpointDumpSkipped=1 return elif [[ -d "$NewCheckpointDir" ]]; then log "Warning: Skipping this checkpoint dump due to existing checkpoint dir $NewCheckpointDir. Verified MD5 file exists: $NewCheckpointMD5. Checkpoints should continue after next journal rotation." CheckpointDumpSkipped=1 return else if [[ "$CreateParallelCheckpoint" -eq 1 ]]; then die "Found file $NewCheckpointMD5, but did not see expected checkpoint file $NewCheckpointFile. Aborting checkpoint dump." else die "Found file $NewCheckpointMD5, but did not see expected checkpoint dir $NewCheckpointDir. Aborting checkpoint dump." fi fi # If the MD5 file exists but checkpoint does not in file or directory form, abort. elif [[ -r "$NewCheckpointFile" ]]; then die "Aborting because checkpoint file $NewCheckpointFile exists but $NewCheckpointMD5 does not." elif [[ -d "$NewCheckpointDir" ]]; then die "Aborting because checkpoint dir $NewCheckpointDir exists but $NewCheckpointMD5 does not." fi if [[ "$CreateParallelCheckpoint" -eq 1 ]]; then ParallelCheckpointDir="${CheckpointsDir}/${FilePrefix}.ckp.${CHECKPOINTNUM}" ParallelCheckpointOKFile="${ParallelCheckpointDir}.OK" if [[ "$CreateMultifileParallelCheckpoint" -eq 1 ]]; then CheckpointCmd="$P4DBIN -r $ROOTDIR -z -N ${Threads:-4} -jdpm $ParallelCheckpointDir" else CheckpointCmd="$P4DBIN -r $ROOTDIR -z -N ${Threads:-4} -jdp $ParallelCheckpointDir" fi else CheckpointCmd="$P4DBIN -r $ROOTDIR -jd -z ${CheckpointsDir}/${FilePrefix}.ckp.${CHECKPOINTNUM}.gz" fi log "Running: $CheckpointCmd" # Curly braces capture output of 'time'. if { time $CheckpointCmd; } >> "$LOGFILE" 2>&1; then CheckpointOK=1 if [[ "$CreateParallelCheckpoint" -eq 1 ]]; then log "Parallel checkpoint dump completed OK. Writing: $ParallelCheckpointOKFile" touch "$ParallelCheckpointOKFile" else log "Checkpoint dump completed OK." fi else CheckpointOK=0 fi if [[ -n "${SNAPSHOT_SCRIPT:-}" ]]; then DoSnapshot=1 log "Calling site-specific snapshot script: $SNAPSHOT_SCRIPT" if "$SNAPSHOT_SCRIPT" >> "$LOGFILE" 2>&1; then SnapshotOK=1 else SnapshotOK=0 fi fi if [[ "$DoSnapshot" -eq 0 ]]; then if [[ "$CheckpointOK" -eq 1 ]]; then log "New checkpoint dump succeeded." else die "New checkpoint dump FAILED." fi else if [[ "$CheckpointOK" -eq 0 && "$SnapshotOK" -eq 0 ]]; then die "Both checkpoint dump and snapshot FAILED." elif [[ "$CheckpointOK" -eq 1 && "$SnapshotOK" -eq 0 ]]; then die "New checkpoint dump succeeded, but snapshot FAILED." elif [[ "$CheckpointOK" -eq 0 && "$SnapshotOK" -eq 1 ]]; then die "New checkpoint dump FAILED, but snapshot succeeded." else log "New checkpoint dump and snapshot succeeded." fi fi } #------------------------------------------------------------------------------ # Compare journal numbers between live and offline databases, to ensure # they can be safely swapped out. #------------------------------------------------------------------------------ function compare_journal_numbers () { local _OFFLINEJNLNUM if [[ -r "$OFFLINE_DB/db.counters" ]]; then copy_jd_table "db.counters" "$OFFLINE_DB" else die "Cannot get $OFFLINE_DB journal number. No db.counters found in $OFFLINE_DB. Abort!" fi _OFFLINEJNLNUM=$("$P4DBIN" -r "$JDTmpDir" -v track=-1 -jd - db.counters 2>&1 | grep '@journal@' | cut -d "@" -f 8 2>> "$LOGFILE") ||\ die "Cannot get $OFFLINE_DB journal number. Abort!" remove_jd_tables check_journalnum "$_OFFLINEJNLNUM" # Get the journal number of the root database if [[ ! -f "$P4ROOT/db.counters" ]]; then die "$P4ROOT database not found. Something is seriously wrong since the server was just running a minute ago! Contact support-helix-core@perforce.com" fi local _JNLNUM if [[ -r "$P4ROOT/db.counters" ]]; then copy_jd_table "db.counters" "$P4ROOT" _JNLNUM=$("$P4DBIN" -r "$JDTmpDir" -v track=-1 -jd - db.counters 2>&1 | grep '@journal@' | cut -d "@" -f 8 2>> "$LOGFILE") ||\ die "Cannot get $P4ROOT journal number. Abort!" remove_jd_tables else die "Cannot get $P4ROOT journal number. No db.counters found in $P4ROOT. Abort!" fi check_journalnum "$_JNLNUM" if [[ "$_JNLNUM" -gt "$_OFFLINEJNLNUM" ]]; then log "$P4ROOT journal number is: $_JNLNUM" log "$OFFLINE_DB journal number is: $_OFFLINEJNLNUM" die "$OFFLINE_DB journal number is less than $P4ROOT, cannot switch." fi } #------------------------------------------------------------------------------ # Function: rsync_with_preflight ($src, $path, $safetyThresholdKB) function rsync_with_preflight () { local srcPath=${1:-} local tgtPath=${2:-} local safetyThresholdKB=${3:-1048576} local rsyncCmd= local rsyncDryRunCmd= local rsyncTransferSize= local tgtAvail= local tgtFs= local remainingSpace= [[ -n "$srcPath" && -n "$tgtPath" ]] || return 1 rsyncCmd="rsync -a --delete \"$srcPath/\" \"$tgtPath\"" rsyncDryRunCmd="rsync -a --delete --dry-run --stats \"$srcPath/\" \"$tgtPath\"" log "Planned rsync: $rsyncCmd" # Get source directory size in KB rsyncTransferSize=$(eval "$rsyncDryRunCmd" | awk '/Total transferred file size:/ {print $5}') log "Pre-rsync safety check: Disk space required for rsync is $rsyncTransferSize KB." # Get available space on target filesystem in KB. tgtFs=$(df -k --output=avail "${tgtPath%/*}" | tail -1) tgtAvail=$(echo "$tgtFs" | tr -d '[:space:]') log "Pre-rsync safety check: Available space in ${tgtPath%/*} is $tgtAvail KB." # Calculate remaining space after rsync remainingSpace=$((tgtAvail - rsyncTransferSize)) # Check against safety threshold log "Pre-rsync safety check results: Required to transfer: $rsyncTransferSize KB, Available: $tgtAvail KB, Safety Threshold: $safetyThresholdKB KB." if (( remainingSpace < safetyThresholdKB )); then log "Error: Not enough disk space to safely rsync to $tgtPath. Aborting rsync operation." return 1 else log "Disk space sufficient. Proceeding with rsync." fi log "Proceeding with this command: $rsyncCmd" if eval "$rsyncCmd" >> "$LOGFILE" 2>&1; then log "The rsync completed OK." else log "The rsync failed to copy." return 1 fi return 0 } #------------------------------------------------------------------------------ # Function: copy_readonly_clients_dir() # # Duplicate the data for partitioned clients. This is part of the procedure to # swap P4ROOT and offline_db. It must be called when the p4d service is stopped, # and before the call to switch_db_files(), so the P4ROOT still points at the # live/current data. #------------------------------------------------------------------------------ function copy_readonly_clients_dir() { local tmpFile= local srcPath= local tgtPath= local safetyThresholdKB=10485760 # 10 GB in KB (adjust as needed) #local safetyThresholdKB=5242880 # 5G in KB #local safetyThresholdKB=1048576 # 1G in KB log "Checking to see if client.readonly.dir is set." tmpFile=$(mktemp) "$P4DBIN" -r "$P4ROOT" -cshow > "$tmpFile" 2>&1 if grep -q "$SERVERID: client.readonly.dir = " "$tmpFile"; then srcPath=$(grep "$SERVERID: client.readonly.dir = " "$tmpFile" | cut -d= -f2) srcPath=${srcPath# } elif grep -q "any: client.readonly.dir = " "$tmpFile"; then srcPath=$(grep "any: client.readonly.dir = " "$tmpFile" | cut -d= -f2) srcPath=${srcPath# } fi rm -f "$tmpFile" >> "$LOGFILE" 2>&1 if [[ -n "$srcPath" && -d "$srcPath" ]]; then log "The client.readonly.dir value is set to: $srcPath" # If the client.readonly.dir value looks like "/p4/N/root/<SOMETHING>", # the copy target path is determined by changing /p4/N/root to /p4/N/offline_db. if [[ "$srcPath" == "$P4ROOT/"* ]]; then tgtPath="$OFFLINE_DB/$(echo "$srcPath" | sed -E -e "s@$P4ROOT/@@g")" # If the clientReadonly.Dir path is an absolute path but not under # /p4/N/root, we don't need to handle it, as its location is outside # SDP control. elif [[ "$srcPath" == /* ]]; then log "The client.readonly.dir is outside P4ROOT, so copying is not needed. Skipping." tgtPath= # If the client.readonly.dir value is a relative path to P4ROOT, # the copy target path is just the same directory under /p4/N/offline_db. else tgtPath="$OFFLINE_DB/$srcPath" fi if [[ -n "$tgtPath" ]]; then if rsync_with_preflight "$srcPath" "$tgtPath" "$safetyThresholdKB"; then log "The client.readonly.dir copied OK." else log "Error: Failed to copy the client.readonly.dir." return 1 fi fi elif [[ -n "$srcPath" ]]; then log "The client.readonly.dir value is set to: $srcPath, but the directory does not exist, so a copy is not needed. Skipping." return 0 else log "The client.readonly.dir is not set, so no need to copy it." fi return 0 } #------------------------------------------------------------------------------ # Swap out live db.* database files in P4ROOT with those in offline_db. #------------------------------------------------------------------------------ function switch_db_files () { local verifyCmd="$P4CBIN/verify_sdp.sh -skip crontab,excess,init,license,masterid,version -L off" log "Calling 'verify_sdp.sh' before swapping db.* files:\\n$verifyCmd" $verifyCmd >> "$LOGFILE" 2>&1 ||\ die "Error: Cannot confirm all is well with $P4CBIN/verify_sdp.sh. Aborting" # Compare the Offline and Master journal numbers before switching to make # sure they match. compare_journal_numbers log "Switching root and offline_db links." [[ -d "${P4ROOT}"/save ]] || mkdir -p "${P4ROOT}"/save >> "$LOGFILE" 2>&1 # shellcheck disable=SC2129 echo "P4ROOT is not available during switch_db_files() processing." > "$P4ROOT/P4ROOT_not_usable.txt" 2>> "$LOGFILE" echo "P4ROOT is not available during switch_db_files() processing." > "$OFFLINE_DB/P4ROOT_not_usable.txt" 2>> "$LOGFILE" # shellcheck disable=SC2129 rm -f "${P4ROOT}"/save/db.* >> "$LOGFILE" 2>&1 rm -rf "${P4ROOT}"/server.locks >> "$LOGFILE" 2>&1 mv "${P4ROOT}"/db.* "${P4ROOT}"/save/. >> "$LOGFILE" 2>&1 if [[ -r "$P4ROOT"/license ]]; then mv "${P4ROOT}"/license "${OFFLINE_DB}/." >> "$LOGFILE" 2>&1 fi if [[ -n "$(ls "$P4ROOT"/license* 2>/dev/null)" ]]; then mv "${P4ROOT}"/license* "${OFFLINE_DB}/." >> "$LOGFILE" 2>&1 fi if [[ -r "${P4ROOT}"/rdb.lbr ]]; then mv "${P4ROOT}"/rdb.lbr "${OFFLINE_DB}/." >> "$LOGFILE" 2>&1 fi if [[ -n "$(ls "$P4ROOT"/state* 2>/dev/null)" ]]; then mv "${P4ROOT}"/state* "${OFFLINE_DB}/." >> "$LOGFILE" 2>&1 fi if [[ -r "${P4ROOT}"/server.id ]]; then mv "${P4ROOT}"/server.id "${OFFLINE_DB}/." >> "$LOGFILE" 2>&1 fi if [[ -n "$(ls "$P4ROOT"/server.id* 2>/dev/null)" ]]; then mv "${P4ROOT}"/server.id* "${OFFLINE_DB}/." >> "$LOGFILE" 2>&1 fi rm -f "${OFFLINE_DB}/offline_db_usable.txt" >> "$LOGFILE" 2>&1 LinkOfflineDB="$(readlink "$OFFLINE_DB")" LinkP4ROOT="$(readlink "$P4ROOT")" unlink "$OFFLINE_DB" unlink "$P4ROOT" ln -s "$LinkOfflineDB" "$P4ROOT" >> "$LOGFILE" 2>&1 ||\ die "Link of $LinkOfflineDB to $P4ROOT failed." ln -s "$LinkP4ROOT" "$OFFLINE_DB" >> "$LOGFILE" 2>&1 ||\ die "Link of $LinkP4ROOT to $OFFLINE_DB failed." rm -f "$P4ROOT/P4ROOT_not_usable.txt" >> "$LOGFILE" 2>&1 rm -f "$OFFLINE_DB/P4ROOT_not_usable.txt" >> "$LOGFILE" 2>&1 } #------------------------------------------------------------------------------ # Function: trim_log_file ($LogToTrim, $MaxLines) # # For log files expected to be short, keep them at a max size. # # When they get too big, trim them from the top first, so the most recent # output is retained. # # Thus function treats its processing as non-essential; most errors are # silently ignored and output discarded. Only error output related to replacing # the original log is retained in LOGFILE or displayed. #------------------------------------------------------------------------------ function trim_log_file () { local LogToTrim="${1:-}" local MaxLines="${2:-5000}" local TmpFile= local Lines= [[ -w "$LogToTrim" ]] || return # Abort if MaxLines isn't numeric. [[ "$MaxLines" =~ ^[0-9]+$ ]] || return TmpFile="${LogToTrim}.trimming.$$.$RANDOM" Lines=$(wc -l "$LogToTrim") Lines=${Lines%% *} # Confirm Lines is a number, else just abort. [[ "$Lines" =~ ^[0-9]+$ ]] || return # If the file isn't big enough to need trimming, abort. [[ "$Lines" -gt "$MaxLines" ]] || return log "Trimming log $LogToTrim from $Lines to $MaxLines lines." # If the trimming fails, discard output and just return. if tail -"$MaxLines" "$LogToTrim" > "$TmpFile" 2>/dev/null; then if [[ -n "${LOGFILE:-}" ]]; then mv -f "$TmpFile" "$LogToTrim" >> "$LOGFILE" 2>&1 else mv -f "$TmpFile" "$LogToTrim" fi else return fi } #------------------------------------------------------------------------------ # Rotate specified log files, and compress with gzip. #------------------------------------------------------------------------------ function rotate_log_file () { local LogToRotate="${1:-}" local GzExt="${2:-}" local -i i=1 local Datestamp= local RotatedLog= local RotatedZippedLog= [[ -n "$LogToRotate" ]] || return if [[ -n "${LOGFILE:-}" ]]; then pushd "$LOGS" > /dev/null 2>> "$LOGFILE" || die "Could not cd to: $LOGS" else pushd "$LOGS" > /dev/null || die "Could not cd to: $LOGS" fi Datestamp=$(date +'%Y-%m-%d_%H-%M-%S') RotatedLog="${LogToRotate}.${Datestamp}" if [[ -f "${LogToRotate}" ]]; then if [[ -n "${LOGFILE:-}" ]]; then mv -f "${LogToRotate}" "${RotatedLog}" >> "$LOGFILE" 2>&1 if [[ -n "$GzExt" ]]; then RotatedZippedLog="${RotatedLog}${GzExt}" # If needed, move existing zipped log aside. if [[ -e "$RotatedZippedLog" ]]; then while [[ -e "${LogToRotate}.${Datestamp}.${i}${GzExt}" ]]; do i+=1 done log "Moving pre-existing $RotatedZippedLog aside to ${LogToRotate}.${Datestamp}.${i}${GzExt}" >> "$LOGFILE" 2>&1 mv -f "$RotatedZippedLog" "${LogToRotate}.${Datestamp}.${i}${GzExt}" >> "$LOGFILE" 2>&1 fi gzip "$RotatedLog" >> "$LOGFILE" 2>&1 fi else mv -f "${LogToRotate}" "${RotatedLog}" if [[ -n "$GzExt" ]]; then RotatedZippedLog="${RotatedLog}${GzExt}" # If needed, move existing zipped log aside. if [[ -e "$RotatedZippedLog" ]]; then while [[ -e "${LogToRotate}.${Datestamp}.${i}${GzExt}" ]]; do i+=1 done log "Moving pre-existing $RotatedZippedLog aside to ${LogToRotate}.${Datestamp}.${i}${GzExt}" mv -f "$RotatedZippedLog" "${LogToRotate}.${Datestamp}.${i}${GzExt}" fi gzip "$RotatedLog" fi fi fi if [[ -n "${LOGFILE:-}" ]]; then popd > /dev/null 2>> "$LOGFILE" || die "Could not cd to: $OLDPWD" else popd > /dev/null || die "Could not cd to: $OLDPWD" fi } #------------------------------------------------------------------------------ # At the start of each run for live_checkpoint.sh, daily_checkpoint.sh, and # recreate_db_checkpoint.sh, before *any* logging activity occurs, rotate the # logs from the most recent prior run, always named "checkpoint.log" or "log". #------------------------------------------------------------------------------ function rotate_last_run_logs () { # Rotate prior log file for the current script. rotate_log_file "$LOGFILE" # Rotate prior server log. rotate_log_file "log" ".gz" # Rotate prior broker log. rotate_log_file "p4broker.log" ".gz" # Rotate prior audit log. rotate_log_file "audit.log" ".gz" # Rotate prior monitor metrics log. rotate_log_file "monitor_metrics.log" ".gz" # Rotate prior p4triggers log. rotate_log_file "p4triggers.log" ".gz" # Additions for p4p.log, rotate_proxy.log, and p4pcm.log rotate_log_file "p4p.log" ".gz" rotate_log_file "rotate_proxy.log" ".gz" rotate_log_file "p4pcm.log" ".gz" } #------------------------------------------------------------------------------ # Remove log files matching a specified name prefix, preserving a specified # number of the recent logs. #------------------------------------------------------------------------------ function remove_log_files () { local removeLog=${1:-} local keepNum=${2:-} local log= [[ -n "${removeLog:-}" && -n "${keepNum:-}" ]] || return 1 # shellcheck disable=SC2012 for log in $(ls -t "${removeLog}"* 2>/dev/null | $AWK "NR > $keepNum"); do log "rm -f $log" rm -f "$log" done return 0 } #------------------------------------------------------------------------------ # Remove old SDP logs. #------------------------------------------------------------------------------ function remove_old_logs () { if [[ "$CheckpointDumpSkipped" -eq 1 ]]; then log "Skipping cleanup of old logs because the checkpoint dump was skipped." return 0 fi pushd "$LOGS" > /dev/null 2>> "$LOGFILE" || die "Could not cd to: $LOGS" # First remove checkpoint logs. For checkpoint logs specifically, use # KEEPJNLS rather than KEEPLOGS to determine how many to keep, so we keep # the same number of checkpoint logs as we keep checkpoints. if [[ "$KEEPJNLS" -eq 0 ]]; then log "Skipping cleanup of old checkpoint logs because KEEPJNLS is set to 0." else log "Deleting old checkpoint logs. Keeping latest $KEEPJNLS, per KEEPJNLS setting in p4_vars." remove_log_files "checkpoint.log" "$KEEPJNLS" fi # Next, use KEEPLOGS to determine how many other logs to cleanup. if [[ "$KEEPLOGS" -eq 0 ]]; then log "Skipping cleanup of old server logs because KEEPLOGS is set to 0." else log "Deleting old server logs. Keeping latest $KEEPLOGS, per KEEPLOGS setting in p4_vars." remove_log_files "log" "$KEEPLOGS" remove_log_files "p4broker.log" "$KEEPLOGS" remove_log_files "broker_rotate.log" "$KEEPLOGS" remove_log_files "audit.log" "$KEEPLOGS" remove_log_files "sync_replica.log" "$KEEPLOGS" remove_log_files "replica_status.log" "$KEEPLOGS" remove_log_files "replica_cleanup.log" "$KEEPLOGS" remove_log_files "request_checkpoint.log" "$KEEPLOGS" remove_log_files "recreate_offline_db.log" "$KEEPLOGS" remove_log_files "edge_shelf_replicate.log" "$KEEPLOGS" remove_log_files "upgrade.log" "$KEEPLOGS" remove_log_files "p4login" "$KEEPLOGS" remove_log_files "p4verify.log" "$KEEPLOGS" remove_log_files "journal_watch.log" "$KEEPLOGS" remove_log_files "refresh_P4ROOT_from_offline_db.log" "$KEEPLOGS" remove_log_files "purge_revisions.log" "$KEEPLOGS" remove_log_files "monitor_metrics.log" "$KEEPLOGS" remove_log_files "p4p.log" "$KEEPLOGS" remove_log_files "rotate_proxy.log" "$KEEPLOGS" remove_log_files "p4pcm.log" "$KEEPLOGS" fi popd > /dev/null 2>>"$LOGFILE" || die "Could not cd to: $OLDPWD" # Cleanup old/excess JDTmpDir find /tmp/ -mindepth 1 -maxdepth 1 -type d -name "tmp.*" -user "$OSUSER" -mtime +1 -exec rm -rf {} \; find "$P4TMP/" -mindepth 1 -maxdepth 1 -type d -name "tmp_jdtmpdir.*" -user "$OSUSER" -mtime +1 -exec rm -rf {} \; } #------------------------------------------------------------------------------ # Set the SDP Checkpoint counter to indicate last successful SDP checkpoint # operation for a given ServerID. #------------------------------------------------------------------------------ function set_counter() { local counterName="LastSDPCheckpoint.${SERVERID:-UnsetServerID}" local counterValue= "$P4CBIN"/p4login -p "$P4MASTERPORT" counterValue="$(date +'%s (%Y/%m/%d %H:%M:%S %z %Z)')" log "Setting checkpoint counter $counterName to value $counterValue." echo "$P4BIN -s -u $P4USER -p $P4MASTERPORT counter $counterName \"$counterValue\"" >> "$LOGFILE" "$P4BIN" -s -u "$P4USER" -p "$P4MASTERPORT" counter "$counterName" "$counterValue" >> "$LOGFILE" 2>&1 ||\ log "Warning: Unable to set checkpoint counter $counterName to value [$counterValue]." }
# | Change | User | Description | Committed | |
#79 | 31204 | Will Kreitzmann |
Released SDP 2024.2.31193 (2025/01/17). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'. |
#78 | 31077 | C. Thomas Tyler |
Released SDP 2024.2.31075 (2024/12/20). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'. |
#77 | 30915 | C. Thomas Tyler |
Released SDP 2024.1.30913 (2024/11/20). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'. |
#76 | 30388 | C. Thomas Tyler |
Released SDP 2024.1.30385 (2024/06/11). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'. |
#75 | 30297 | C. Thomas Tyler |
Released SDP 2023.2.30295 (2024/05/08). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'. |
#74 | 30043 | C. Thomas Tyler |
Released SDP 2023.2.30041 (2023/12/22). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'. |
#73 | 29954 | C. Thomas Tyler |
Released SDP 2023.1.29949 (2023/12/01). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'. |
#72 | 29891 | C. Thomas Tyler |
Released SDP 2023.1.29699 (2023/07/11). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'. |
#71 | 29612 | C. Thomas Tyler |
Released SDP 2023.1.29610 (2023/05/25). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'. |
#70 | 29443 | C. Thomas Tyler |
Released SDP 2022.2.29441 (2023/02/27). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'. |
#69 | 29401 | C. Thomas Tyler |
Released SDP 2022.2.29399 (2023/02/06). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'. |
#68 | 29252 | C. Thomas Tyler |
Released SDP 2022.2.29250 (2022/12/08). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'. |
#67 | 29143 | C. Thomas Tyler |
Released SDP 2022.1.29141 (2022/10/29). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'. |
#66 | 28858 | C. Thomas Tyler |
Released SDP 2022.1.28855 (2022/05/27). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'. |
#65 | 28651 | C. Thomas Tyler |
Released SDP 2021.2.28649 (2022/03/03). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'. |
#64 | 28412 | C. Thomas Tyler |
Released SDP 2021.2.28410 (2021/11/24). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'. |
#63 | 28240 | C. Thomas Tyler |
Released SDP 2021.1.28238 (2021/11/12). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'. |
#62 | 27761 | C. Thomas Tyler |
Released SDP 2020.1.27759 (2021/05/07). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'. |
#61 | 27463 | C. Thomas Tyler |
Released SDP 2020.1.27457 (2021/02/17). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'. |
#60 | 27331 | C. Thomas Tyler |
Released SDP 2020.1.27325 (2021/01/29). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'. |
#59 | 26496 | C. Thomas Tyler |
Released SDP 2019.3.26494 (2020/04/23). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'. |
#58 | 26480 | C. Thomas Tyler |
Released SDP 2019.3.26478 (2020/04/12). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'. |
#57 | 26475 | C. Thomas Tyler |
Released SDP 2019.3.26472 (2020/04/10). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'. |
#56 | 26470 | C. Thomas Tyler |
Released SDP 2019.3.26468 (2020/04/10). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'. |
#55 | 26411 | C. Thomas Tyler |
Released SDP 2019.3.26407 (2020/03/28). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'. |
#54 | 26403 | C. Thomas Tyler |
Released SDP 2019.3.26400 (2020/03/28). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'. |
#53 | 26246 | C. Thomas Tyler |
Released SDP 2019.3.26239 (2020/01/08). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'. |
#52 | 26161 | C. Thomas Tyler |
Released SDP 2019.3.26159 (2019/11/06). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'. |
#51 | 25596 | C. Thomas Tyler |
Released SDP 2019.2.25594 (2019/05/02). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'. |
#50 | 25380 | C. Thomas Tyler |
Released SDP 2019.1.25374 (2019/03/21). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'. |
#49 | 25279 | C. Thomas Tyler |
Released SDP 2019.1.25276 (2019/03/06). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'. |
#48 | 25245 | C. Thomas Tyler |
Released SDP 2019.1.25238 (2019/03/02). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'. |
#47 | 23331 | C. Thomas Tyler |
Released SDP 2017.4.23329 (2017/12/05). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'. |
#46 | 23044 | C. Thomas Tyler |
Released SDP 2017.3.23041 (2017/10/24). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'. |
#45 | 23006 | C. Thomas Tyler |
Released SDP 2017.3.23003 (2017/10/19). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'. |
#44 | 22685 | Russell C. Jackson (Rusty) | Update main with current changes from dev. | ||
#43 | 22185 | C. Thomas Tyler |
Released SDP 2017.2.22177 (2017/05/17). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'. |
#42 | 21723 | C. Thomas Tyler |
Released SDP 2017.1.21720 (2017/02/17). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'. |
#41 | 21338 | C. Thomas Tyler |
Released SDP 2016.2.21328 (2016/12/16). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'. |
#40 | 21193 | Russell C. Jackson (Rusty) | Update main from dev. | ||
#39 | 20974 | C. Thomas Tyler |
Released SDP 2016.2.20972 (2016/11/01). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'. |
#38 | 20858 | C. Thomas Tyler |
Released SDP 2016.2.20856 (2016/10/04). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'. |
#37 | 20767 | C. Thomas Tyler |
Released SDP 2016.2.20755 (2016/09/29). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'. |
#36 | 20353 | C. Thomas Tyler |
Released SDP 2016.1.20348. Copy Up using 'p4 copy -r -b perforce_software-sdp-dev', with selective removal of changes related to work-in-progress changes. |
#35 | 19898 | C. Thomas Tyler |
Released SDP/MultiArch/2016.1/19888 (2016/07/07). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'. |
#34 | 19835 | C. Thomas Tyler |
Released Rev. SDP/MultiArch/2016.1/19768 (2016/06/24). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'. |
#33 | 19694 | C. Thomas Tyler |
Released SDP/MultiArch/2016.1/19661 (2016/06/08). Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'. |
#32 | 19414 | C. Thomas Tyler | Released SDP/MultiArch/2016.1/19410 (2016/05/17). | ||
#31 | 18961 | C. Thomas Tyler | Released: SDP/MultiArch/2016.1/18958 (2016/04/08). | ||
#30 | 18619 | Russell C. Jackson (Rusty) | Updating main with current changes. | ||
#29 | 18530 | Russell C. Jackson (Rusty) | Update main from dev. | ||
#28 | 16155 | Russell C. Jackson (Rusty) |
Removed check code that probably hasn't ever worked. Deleted mirror_ldap* since that functionality is built into the server now. |
#27 | 15856 | C. Thomas Tyler |
Replaced the big license comment block with a shortened form referencing the LICENSE file included with the SDP package, and also by the URL for the license file in The Workshop. |
#26 | 15784 | Russell C. Jackson (Rusty) | Added missing bracket. | ||
#25 | 15780 | C. Thomas Tyler |
Per Robert: Style police causing problems :) Fixed! |
#24 | 15777 | C. Thomas Tyler |
No functional changes. Style Policing only on bash scripts only. Normalized indentation and line breaks, removed offending tabs, and general whitespace usage. |
#23 | 15609 | C. Thomas Tyler | Pushing SDP 2015.1.15607 (2015/09/02). | ||
#22 | 15197 | Russell C. Jackson (Rusty) | Corrected versions from testing. | ||
#21 | 15193 | Russell C. Jackson (Rusty) |
Added semaphore file to indicate state of the offline database and added check into the backup process to fail if the state of the offline db is not good. |
#20 | 15190 | Russell C. Jackson (Rusty) |
Added a semaphore file to prevent the checkpoint process from running if another one hasn't finished. Added a check to make sure the journal number is numeric. |
#19 | 13928 | dsp |
Set lastSDPCounter after a successfull SDP checkpoint p4 admin checkpoint sets lastCheckpointAction, which is useful for monitoring, in particular when checkpoint age should be observed from the outside through p4. However the SDP is using p4d directly to create checkpoints and will not set checkpoints. In order to distinguish human actions from the SDP cronjobs, set a new counter lastSDPCounter in a similar format. |
#18 | 13908 | C. Thomas Tyler | Pushing SDP 2015.1.13906. | ||
#17 | 12171 | Russell C. Jackson (Rusty) | Merge in changes to remove the need for p4master_run. | ||
#16 | 11950 | Russell C. Jackson (Rusty) |
Made die function record ERROR!!! $HOSTNAME and $P4SERVER in subject. Cleaned up message passed to die command and corrected a typo. |
#15 | 11929 | Russell C. Jackson (Rusty) | Updated die function to just pass parameter to mail_log_file instead of echo. | ||
#14 | 11919 | Russell C. Jackson (Rusty) |
Added a SERVERID variable to p4_vars and updated backup_functions to use it. Changed the location and the names of the config files so that they could live in /p4/common/config (You're welcome Tom). The files names are: p4_$INSTANCE.vars p4_$INSTANCE.p4review.cfg p4_$INSTANCE.vars will now set P4REPLICA to FALSE if SERVERID matches MASTERNAME, otherwise it is TRUE. This change means that a user must change server.id now in order to change the role of the server rather than changing the instance vars file. This makes more sense to a user that is reading the admin guide about server.id rather than overwriting the file based on a setting that isn't in the admin guide. Change mkdirs to reflect all of the above changes. |
#13 | 11908 | adrian_waters | Use set -u to trap unbounded variables | ||
#12 | 11886 | Russell C. Jackson (Rusty) | Changed $prog to $0 so that we don't have to set prog in the calling functions. | ||
#11 | 11766 | Robert Cowham |
Missed a reference to $MAIL in @11758 Tweaked run_tests.sh to output more error messages on failure. Though this still doesn't show output of individual failed commands. |
#10 | 11758 | Russell C. Jackson (Rusty) |
Change MAIL variable to SDPMAIL to avoid conflicts with customer variables. Changed sdp_sync.sh to use get_mail_opts from backup_functions to avoid duplicate functions. |
#9 | 11730 | Russell C. Jackson (Rusty) |
Moved P4SERVER variable to p4_vars so that all scripts can use it properly. replica_status.sh referenced it, but it wasn't working since it was only in backup_functions.sh |
#8 | 11710 | Russell C. Jackson (Rusty) |
Changed die function to call new email function. Added su to OSUSER functionality to p4master_run to avoid problems with people running scripts manually as root by mistake. |
#7 | 11707 | Robert Cowham |
Refactored sending of mail to a common function. Make the setting of "MAILFROM" work for Ubuntu (GNU Mailutils) as well as CentOS |
#6 | 11570 | Russell C. Jackson (Rusty) |
Brought in changes from Mark Foundry to add -S $MAILFROM to mail commands. Changed sync_replica.sh and weekly_sync_replica.sh to use $LOGFILE for consistency. Added mail command to both files as well. |
#5 | 11540 | Russell C. Jackson (Rusty) | Converted to unix format. | ||
#4 | 11534 | Russell C. Jackson (Rusty) | Added -f to -jr to cover offline obliterates where the entries are already removed from the offline database. | ||
#3 | 11524 | Russell C. Jackson (Rusty) | Released updated version of the SDP from Dev. | ||
#2 | 11130 | Robert Cowham | Check for the existence of offline database and log error message if not found. | ||
#1 | 10148 | C. Thomas Tyler | Promoted the Perforce Server Deployment Package to The Workshop. |