failover_p4d_to_this_host.sh #1

  • //
  • guest/
  • perforce_software/
  • hms/
  • dev/
  • p4/
  • common/
  • site/
  • hms/
  • scripts/
  • failover_p4d_to_this_host.sh
  • View
  • Commits
  • Open Download .zip Download (5 KB)
#!/bin/bash
Version=1.1.2

# Usage: failover_p4d_to_this_host.sh SDP_INSTANCE Style Type NewServerID NO_OP
# This is intended to be called by 'hms', not directly by a human.

# Micro-functions.
function msg () { echo -e "$*"; }
function bail () { msg "\nError: ${1:-Unknown Error}\n"; exit ${2:-1}; }
function usage () { msg "Usage:\n\t$UsageSummary\n"; exit 2; }
function cmd () {
   if [[ $NO_OP -eq 0 ]]; then
      msg "Running: $*"
      $*
      return $?
   else
      msg "NO_OP: Would run: $*"
   fi
}

# Do nothing unless usage is correct.
export SDP_INSTANCE=${1:-Unset}
declare Style=${2:-Unset}
declare Type=${3:-Unset}
declare NewServerID=${4:-Unset}
declare NO_OP=${5:-Unset}
declare ThisScript=${0##*/}
declare UsageSummary="$ThisScript SDP_INSTANCE Style Type NewServerID NO_OP"
declare P4DInitScript=/p4/$SDP_INSTANCE/bin/p4d_${SDP_INSTANCE}_init
declare VerifyCmd=
declare TmpLog=$(mktemp)
declare JournalCopyCounter=
declare JournalCopyJournal=
declare -i JournalCopyReplica=
declare -i ExitCode=0

[[ $# -eq 5 ]] || bail "Bad Usage!  Usage:\n\t$UsageSummary\n"

msg "Starting $ThisScript v$Version on ${HOSTNAME%%.*} at $(date)."
msg "Args: I=$SDP_INSTANCE S=$Style T=$Type NS=$NewServerID N=$NO_OP"

# Load SDP controlled shell environment.
source /p4/common/bin/p4_vars "$SDP_INSTANCE" || \
   bail "Failed to load SDP environment for instance $SDP_INSTANCE."

[[ $(id -u -n) == $OSUSER ]] || bail "$0 can only be run by $OSUSER"

[[ $NO_OP -eq 1 ]] && msg "NO_OP: Preview Mode Enabled."

[[ -x $P4DInitScript ]] || bail "$P4DInitScript is not executable."

[[ -r $P4ROOT/server.id ]] || bail "Missing file $P4ROOT/server.id. Aborting."

[[ $Style == Scheduled && ! -r $P4JOURNAL.from_old_master && $NO_OP -eq 0 ]] && \
   bail "The journal file from the master [$P4JOURNAL.from_old_master] is expected with Scheduled Failover but is missing. Aborting."

[[ "${SERVERID^^}" != *"MASTER"* ]] ||\
   bail "Instance $SDP_INSTANCE ServerID ($SERVERID) indicates it is already a master server! Aborting Failover."

# Check the serverServices field to see if this is a standby/journalcopy replica.
# We assume standby replicas are operated with rpl.journalcopy.location=1, meaning the journal file
# will appear as $LOGS/journal.N, where n is the current journal counter.
if [[ "$($P4BIN -ztag -F %serverServices% info -s)" == *"standby" ]]; then
   JournalCopyReplica=1
else
   JournalCopyReplica=0
fi

if [[ "$JournalCopyReplica" -eq 1 ]]; then
   cmd $P4BIN -s admin end-journal > $TmpLog 2>&1 ||\
      bail "Failed to execute 'p4 admin end-journal' command on standby replica. Got this:\n$(cat $TmpLog)\nAborting."
   JournalCopyCounter=$(grep journal $TmpLog)
   JournalCopyCounter=${JournalCopyCounter##* journal }
   JournalCopyJournal="$LOGS/journal.$JournalCopyCounter"
   /bin/rm -f "$TmpLog"
fi

cmd $P4DInitScript stop

if [[ "$JournalCopyReplica" -eq 1 ]]; then
   if [[ -e "$P4JOURNAL" && ! -s "$P4JOURNAL" ]]; then
      msg "Verified: P4JOURNAL file exists and is zero-length, as expected for a journalcopy replica."
   elif [[ -s "$P4JOURNAL" ]]; then
      bail "P4JOURNAL file has a non-zero size, which is unexpected for a journalcopy replica. Aborting."
   else
      msg "Warning: P4JOURNAL does not exist."
   fi

   msg "Copying journalcopy journal to P4JOURNAL location."
   cmd cp -p $JournalCopyJournal $P4JOURNAL
fi

if [[ "$Style" == "Scheduled" && $JournalCopyReplica -eq 0 ]]; then
   cmd /bin/mv -f $P4JOURNAL $P4JOURNAL.moved_by_hms.$(date +'%Y%m%d-%H%M%S') ||\
      bail "Failed to move $P4JOURNAL aside on ${HOSTNAME%%.%}."
   cmd /bin/mv -f $P4JOURNAL.from_old_master $P4JOURNAL ||\
      bail "Failed to move $P4JOURNAL.from_old_master to $P4JOURNAL."

   msg "Replaying journal from the master just in case this replica wasn't fully caught up."
   cmd $P4DBIN -r $P4ROOT -f -jr $P4JOURNAL || bail "Failed to replay journal from old master."
fi

cmd mv $P4ROOT/server.id $P4ROOT/server.id.moved_by_hms.$(date +'%Y%m%d-%H%M%S') ||\
   bail "Failed to move $P4ROOT/server.id file aside."

cmd $P4DBIN -xD $NewServerID || bail "Failed to set new ServerID to $NewServerID."

export SERVERID=$(cat ${P4ROOT}/server.id)
if [[ "$SERVERID" == "$NewServerID" ]]; then
   msg "Verified: New ServerID ($SERVERID) is set."
else
   if [[ $NO_OP -eq 0 ]]; then
      bail "Failed to confirm new ServerID was set. Aborting failover."
   else
      msg "Verified: New ServerID ($NewServerID) is set. (SIMULATED CHECK)."
   fi
fi

###
### Key Decisions:  If some archive files are lost, do we want to prevent a return
### to service?  For now, go live with information.
###

cmd $P4DInitScript start

# For Full and EdgeFull failover types, also verify recent archive files.
if [[ "$Type" == *"Full" ]]; then
   [[ $NO_OP -eq 0 ]] && sleep 2
   VerifyCmd="$P4CBIN/p4verify.sh $SDP_INSTANCE -recent"

   msg "Verifying recent changes to see if any where not replicated."
   cmd $VerifyCmd < /dev/null > /dev/null 2>&1
   ExitCode=$?
   cat $LOGS/p4verify.log

   if [[ $ExitCode -eq 0 ]]; then
      msg "\n$Style $Type Failover of p4d to ${HOSTNAME%%.*}, including verification of recent archive files, completed successfully at $(date)."
   else
      msg "\nError: Databases are OK, but couldn't verify recent archive files survived failover."
      msg "\nMetadata for $Style $Type Failover of p4d to ${HOSTNAME%%.*} completed at $(date)."
      ExitCode=2
   fi
else
   msg "\n$Style $Type Failover of p4d to ${HOSTNAME%%.*} completed successfully at $(date)."
fi

msg "That took $(($SECONDS/3600)) hours $(($SECONDS%3600/60)) minutes $(($SECONDS%60)) seconds.\n"

exit $ExitCode
# Change User Description Committed
#1 29182 C. Thomas Tyler Moved HMS files from /p4/common/bin -> /p4/common/site/bin.
Moved HMS files from /p4/common/lib -> /p4/common/site/lib.
Removed dependency on SDP libs so that HMS can be deployed
with a wider variety of SDP versions.
//guest/perforce_software/hms/dev/p4/common/hms/scripts/failover_p4d_to_this_host.sh
#1 25533 C. Thomas Tyler Copied updated and new files from SDP into the new HMS "overlay" structure.

A 'p4 copy' was done in all cases, so files in this change match what they did in
the SDP.  Corresponding files in the SDP are to be deleted.  Some files will need
modification to adapt to the new HMS structure, e.g. the 'setup' tree.
//guest/perforce_software/sdp/dev/Server/Unix/p4/common/hms/scripts/failover_p4d_to_this_host.sh
#6 24241 C. Thomas Tyler For HMS failover of a full replica (not metadata-only), fixed issue
with error detection.  Bypassing pre-commit review.

#review-24242
#5 24235 C. Thomas Tyler Enhanced HMS failover for standby/journalcopy replicas to use 'p4 admin end-journal'.

Bypassing pre-commit review to enable further testing.

#review-24236
#4 24224 C. Thomas Tyler Enhanced HMS failover to support failover of edge server.

Bypassing pre-commit review as this has been tested in
the Battle School alb and contains a needed fix.

#review-24225
#3 24132 C. Thomas Tyler Fixed comment error; bypassing review.
#2 21921 C. Thomas Tyler HMS change, to simplify setup on compliant platforms (i.e.
those using bash 4.x).
The shebang line for all bash scripts has been standardized to /bin/bash, and
bash checks for $BASH_VERSION added where needed.  References to having a
custom-built /usr/local/bin/bash have been removed.

As a trade-off, non-compliant platforms (way-old Linux and Mac using bash 3.x) will
have a bit more work to do to work with shebang lines.
#1 20745 C. Thomas Tyler Approving as is since it isn't changing core SDP functionality, and reviewing it all line by line will take some time.
We can do that as we move forward with it.

First addition of HMS v1.0 files.  This change is a soft launch HMS
for initial deployment and testing.

Updates to HMS-related files are expected and will bypass pre-commit
code review until stabilized.