#!/bin/bash
#==============================================================================
# Copyright and license info is available in the LICENSE file included with
# the Helix Management System (hms), and also available online:
# https://swarm.workshop.perforce.com/projects/perforce_software-hms/view/main/LICENSE
#------------------------------------------------------------------------------
set -u
#==============================================================================
# Environment Setup
# Allow override of P4U_HOME, which is set only when testing P4U scripts.
export P4U_HOME=${P4U_HOME:-/p4/common/bin}
export CBIN=${CBIN:-$P4U_HOME}
export P4U_ENV=$P4U_HOME/p4u_env.sh
export P4U_LOG="off"
# Environment isolation. For stability and security reasons, prepend
# PATH to include dirs where known-good scripts exist.
# known/tested PATH and, by implication, executables on the PATH.
export PATH=$CBIN:$PATH:~/bin:.
unset P4CONFIG
[[ -r "$P4U_ENV" ]] || {
echo -e "\nError: Cannot load environment from: $P4U_ENV\n\n"
exit 1
}
. $P4U_ENV
. $P4U_HOME/libcore.sh
export VERBOSITY=5
declare CfgFile=""
declare Mode=""
declare Instance=""
declare Master=""
declare Backup=""
declare -i SilentMode=0
#==============================================================================
# Local Functions
#------------------------------------------------------------------------------
# Function: usage (required function)
#
# Input:
# $1 - style, either -h (for short form) or -man (for man-page like format).
#------------------------------------------------------------------------------
function usage
{
declare style=${1:--h}
echo "USAGE:
$THISSCRIPT {-s|-u} -i <#> -M <host> -B <host> [-L <log>] [-si] [-v<n>] [-n] [-D]
or
$THISSCRIPT [-h|-man]
Note: This script is intended to be called by p4failover.sh. It should not
be called by a human directly.
"
if [[ $style == -man ]]; then
echo -e "
OPTIONS:
-s Specifies Scheduled failover mode. Unlike HA Failover, there is no
difference in the function of Local Failover (i.e. this script) for
Scheduled vs. Unscheduled failover modes. It is captured merely for
recording purposes.
-u Specifies Unscheduled failover mode.
-i Specify a Perforce server instance to failover. Required.
-M Specify the master host. Required.
-B Specify the backup host. Required.
-n Illustrates commands that would be executed in a
failover situation, without changing anything.
\"No-Op\" mode.
-v<n> Set verbosity 1-5 (-v1 = quiet, -v5 = highest).
-L <log>
Specify the path to a log file, or the special value 'off' to disable
logging. By default, all output (stdout and stderr) goes to:
${P4U_LOG}.
NOTE: This script is self-logging. That is, output displayed on the
screen is simultaneously captured in the log file. Do not run this
script with redirection operators like '> log' or '2>&1',
and do not use it with 'tee.'
-si Operate silently. All output (stdout and stderr) is redirected to
the log only; no output appears on the terminal. This cannot be
used with '-L off'.
-D Set extreme debugging verbosity.
HELP OPTIONS:
-h Display short help message
-man Display man-style help message
DESCRIPTION:
This script executes local failover. It is intendeded
to be called by p4failover.sh, rather than being called
by a human directly.
"
fi
exit 1
}
#------------------------------------------------------------------------------
# Function: terminate
function terminate
{
# Disable signal trapping.
trap - EXIT SIGINT SIGTERM
# Don't litter.
cleanTrash
# Stop logging.
[[ "${P4U_LOG}" == off ]] || stoplog
echo -e "\n$THISSCRIPT: PEXITCODE: $OverallReturnStatus"
# With the trap removed, exit.
exit $OverallReturnStatus
}
#==============================================================================
# Command Line Processing
declare -i shiftArgs=0
set +u
while [[ $# -gt 0 ]]; do
case $1 in
(-h) usage -h;;
(-man) usage -man;;
(-s) Mode="Scheduled";;
(-u) Mode="Unscheduled";;
(-i) Instance=$2; shiftArgs=1;;
(-M) export P4F_MASTER_HOST=$2; shiftArgs=1;;
(-B) export P4F_BACKUP_HOST=$2; shiftArgs=1;;
(-v1) export VERBOSITY=1;;
(-v2) export VERBOSITY=2;;
(-v3) export VERBOSITY=3;;
(-v4) export VERBOSITY=4;;
(-v5) export VERBOSITY=5;;
(-L) export P4U_LOG=$2; shiftArgs=1;;
(-si) SilentMode=1;;
(-n) export NO_OP=1;;
(-D) set -x;; # Debug; use 'set -x' mode.
(*) usageError "Unknown arg ($1).";;
esac
# Shift (modify $#) the appropriate number of times.
shift; while [[ $shiftArgs -gt 0 ]]; do
[[ $# -eq 0 ]] && usageError "Bad usage."
shiftArgs=$shiftArgs-1
shift
done
done
[[ $SilentMode -eq 1 && $P4U_LOG == off ]] && \
usageError "Cannot use '-si' with '-L off'."
set -u
#==============================================================================
# Main Program
trap terminate EXIT SIGINT SIGTERM
declare -i OverallReturnStatus=0
if [[ "${P4U_LOG}" != off ]]; then
touch ${P4U_LOG} || bail "Couldn't touch log file [${P4U_LOG}]."
# Redirect stdout and stderr to a log file.
if [[ $SilentMode -eq 0 ]]; then
exec > >(tee ${P4U_LOG})
exec 2>&1
else
exec >${P4U_LOG}
exec 2>&1
fi
initlog
fi
#------------------------------------------------------------------------------
# Extra command line checking.
if [[ -n "$Instance" ]]; then
case $Instance in
(1|2|3|4|5|6|7|8|9) ;;
(*) OverallReturnStatus=1; errmsg "Invalid instance [$Instance] specified." ;;
esac
[[ $OverallReturnStatus -ne 0 ]] && bail "Aborting: Invalid instance specified."
else
bail "Instance number not set. The '-i <#>' argument is required."
fi
#------------------------------------------------------------------------------
# Load SDP Environment. Note that core SDP scripts do not honor 'set -u'
# mode, so we temporarily disable variable declaration checking with 'set +u'
# when loading the SDP variables.
msg "Loading SDP environment for instance ${Instance}."
set +u
. /p4/common/bin/p4_vars ${Instance}
. /p4/common/bin/backup_functions.sh
. /p4/common/bin/p4failover.cfg
check_vars
set_vars
. /p4/common/bin/state_engine_functions.sh
set -u
msg "Executing LOCAL Failover."
runCmd "${P4INIT} stop" \
"Stopping Perforce instance ${P4INSTANCE} (it may already be down)."
get_sdp_state
if [[ -z "${SDP_STATE_MSG}" ]]; then
if [[ $(get_offline_db_status) != OK ]]; then
warnmsg "SDP state indicates that offline databases may not be reliable."
fi
else
echo "${SDP_STATE_MSG}"
fi
# Set state to indicate that primary databases are not reliable, as that
# is the assumption when initiating local failover.
runCmd "$CBIN/set_sdp_state.sh RESET:NR:NR:NR -i ${Instance}"
# Sanity/safety check: Don't do anything if the offline databases aren't there!
dbcount_offline=$(ls ${OFFLINE_DB}/db.*|wc -l)
msg "Offline db count sanity check ${dbcount_offline} / ${P4F_EXPECTED_DBCOUNT}."
[[ ${dbcount_offline} -lt $P4F_EXPECTED_DBCOUNT ]] && \
bail "Sanity check failed - offline databases are missing!"
# Offline DBs passed the sanity check.
runCmd "$CBIN/set_sdp_state.sh RESET:NR:OK:NR -i ${Instance}"
runCmd "/bin/rm -f ${SAVEDIR}/db.*" \
"Removing extra databases." ||:
runCmd "/bin/mv -f ${P4ROOT}/db.* ${SAVEDIR}/." \
"Moving live databases aside to 'save' folder." || \
bail "Failed to move live DBs aside."
runCmd "/bin/mv -f ${OFFLINE_DB}/db.* ${P4ROOT}/." \
"Moving offline databases into P4ROOT folder." ||
bail "Failed to move offline DBs to P4ROOT."
runCmd "$P4D -r ${P4ROOT} -f -jr ${P4JOURNAL}" \
"Replaying live journal into offline databases." || \
bail "Failed to replay live journal to offline DBs."
# The live DBs should be good. Offline are not yet ready.
runCmd "$CBIN/set_sdp_state.sh INITIAL:OK:NR:NR -i ${Instance}"
# All went well to this point. Get Perforce back online!
msg "Starting Perforce instance ${P4INSTANCE}."
if [[ ${NO_OP} -eq 0 ]]; then
vmsg "Executing: ${P4INIT} start"
${P4INIT} start
else
msg "NO-OP: Would execute: ${P4INIT} start"
fi
sleep 1
msg "Checking perforce server status."
CMDOUTPUT=$(${P4INIT} status 2>&1)
echo $CMDOUTPUT
[[ ${CMDOUTPUT} != *"Server"* ]] && bail "Aborting."
# Now that Perforce is up, repopulate local offline databases from a checkpoint.
ckp=$(get_last_checkpoint)
if [[ -n "$ckp" ]]; then
runCmd "$P4D -r ${OFFLINE_DB} -z -jr $ckp" \
"Rebuilding offline DBs from last known-good checkpoint on $(hostname -s)."
[[ $CMDEXITCODE -ne 0 ]] && bail "Aborting."
# Now the offline databases are good. They won't have the metadata from the
# current/active journal file, but they're as up-to-date as offline databases
# are expected to be.
runCmd "$CBIN/set_sdp_state.sh READY:OK:OK:NR -i ${Instance}"
runCmd "ssh ${P4F_BACKUP_HOST} /bin/rm -f ${SAVEDIR}/db.*" \
"Removing extra DBs on ${P4F_BACKUP_HOST}." ||:
runCmd "ssh ${P4F_BACKUP_HOST} /bin/mv -f ${OFFLINE_DB}/db.* ${SAVEDIR}/." \
"Moving offline DBs to 'save' on ${P4F_BACKUP_HOST}." || \
OverallReturnStatus=1
runCmd "ssh ${P4F_BACKUP_HOST} $P4D -r ${OFFLINE_DB} -z -jr ${ckp}" \
"Rebuilding offline DBs from last checkpoint on ${P4F_BACKUP_HOST}." || \
bail "Failed to rebuild remote offline DBs."
else
errmsg "Couldn't determine latest known-good checkpoint. Take a live checkpoint when possible to popuplate Offline dbs."
fi
if [[ $OverallReturnStatus -eq 0 ]]; then
msg "${H}\nLOCAL Failover completed successfully on $THIS_HOST."
else
msg "${H}\nProcessing completed, but with errors. Scan above output carefully."
fi
msg "LOCAL Failover on $(hostname -s) took about $(($SECONDS/3600)) hours $(($SECONDS%3600/60)) minutes $(($SECONDS%60)) seconds."
# See the terminate() function, which is really where this script exits.
exit $OverallReturnStatus