#!/bin/bash #============================================================================== # Copyright and license info is available in the LICENSE file included with # the Server Deployment Package (SDP), and also available online: # https://swarm.workshop.perforce.com/projects/perforce_software-hms/view/main/LICENSE #------------------------------------------------------------------------------ #============================================================================== # Declarations and Environment set -u if [[ ${BASH_VERSINFO[0]} -lt 4 ]]; then echo -e "\n\nERROR: $0 requires bash version 4.x+; current bash version is $BASH_VERSION." exit 1 fi EnvFile=${SDP_ENV:-/p4/common/bin/p4_vars} if [[ -r "$EnvFile" ]]; then source "$EnvFile" hms else echo -e "\n\nERROR: The environment file $EnvFile is missing.\n\n" exit 1 fi if [[ $(id -u) -eq 0 ]]; then exec su - $OSUSER -c "$0 $*" elif [[ $(id -u -n) != $OSUSER ]]; then echo "$0 can only be run by root or $OSUSER" exit 1 fi # Allow override of P4U_HOME, which is set only when testing P4U utility scripts. export P4U_HOME=${P4U_HOME:-$P4CBIN} export P4U_LIB=${P4U_LIB:-$P4CLIB} export P4U_ENV=$P4U_LIB/p4u_env.sh export P4U_LOG=Unset export HMS_HOME=${HMS_HOME:-/p4/common/hms} export HMS_SCRIPTS=$HMS_HOME/scripts # Indicate whether we are called by the broker wrapper. export HMS_CALLED_BY_WRAPPER=${HMS_CALLED_BY_WRAPPER:-0} # Load bash libs. declare BASH_LIBS=$P4U_ENV BASH_LIBS+=" $P4U_LIB/libcore.sh" BASH_LIBS+=" $P4U_LIB/libp4u.sh" BASH_LIBS+=" $P4U_LIB/hms_actions.sh" BASH_LIBS+=" $P4U_LIB/hms_load_and_verify.sh" for bash_lib in $BASH_LIBS; do source $bash_lib done # After sourcing bash libs, set default VERBOSITY for this program, which can # be overridden on the command line. export VERBOSITY=3 # SDP Instance data, with associative arrays (i.e. arrays that can be indexed # by a string rather than a purely numeric name). These Instance* vars are # indexed by the SDP instance name. declare -A InstanceUserPorts declare -A InstanceMasterHost declare -A InstanceServerPort declare -A InstanceBrokerPort declare -A InstanceManaged declare -A InstanceDesc declare -A InstanceComponents declare -A InstanceFailoverOptions # Helix Topology Components data, indexed by the fully qualified component name # of the form "SDPInstance:ComponentName", e.g. "1:p4d-r01". declare -A ComponentType declare -A ComponentMasterHost declare -A ComponentURL declare -A ComponentBackupHost declare -A ComponentManaged declare -A ComponentDesc declare -A ComponentStatusCode declare -A ComponentStatusMsg declare -A ComponentMajorVersion declare -A ComponentMinorVersion declare -A ComponentVersion # Failover Options, indexed by the fully qualified failover option name of the # form "SDPInstance:FailoverOptionName", e.g. "1:dr". declare -A FailoverType declare -A FailoverMasterHost declare -A FailoverBackupHost declare -A FailoverInstanceList declare -A FailoverActive declare -A FailoverDesc declare -i SilentMode=0 # Increment the Version manually on each submit. (We avoid +k filetype of # Perforce due to various complications moving across p4d instances). declare Version=1.0.36 # The RequiredCfgVersion check is done with a lexiographic compare vs. # the value defined in the Helix Topology file. Helix Topology files define # their file format version with 3 digits, e.g. 3.0.0 or 3.0.1. The # RequiredCfgVersion here must be defined with two digits, e.g. "3.0" if #"3.0.0" is good enough, since 3.0.0 is lexiographically greater than 3.0. declare RequiredCfgVersion=1.0 #============================================================================== # Local Functions #------------------------------------------------------------------------------ # Function: terminate function terminate { # Disable signal trapping. trap - EXIT SIGINT SIGTERM # Don't litter. cleanTrash vvmsg "$THISSCRIPT: EXITCODE: $OverallReturnStatus" # Stop logging. [[ "${P4U_LOG}" == off ]] || stoplog # With the trap removed, exit. exit $OverallReturnStatus } #------------------------------------------------------------------------------ # Function: usage (required function) # # Input: # $1 - style, either -h (for short form) or -man (for man-page like format). # The default is -h. # # $2 - error message (optional). Specify this if usage() is called due to # user error, in which case the given message displayed first, followed by the # standard usage message (short or long depending on $1). If displaying an # errror, usually $1 should be -h so that the longer usage message doesn't # obsure the error message. # # Sample Usage: # usage # usage -h # usage -man # usage -h "Incorrect command line usage." #------------------------------------------------------------------------------ function usage { declare style=${1:--h} declare errorMessage=${2:-Unset} if [[ $errorMessage != Unset ]]; then echo -e "\n\nUsage Error:\n\n$errorMessage\n\n" fi echo -e "USAGE for $THISSCRIPT v$Version: $THISSCRIPT show $THISSCRIPT status {all|[:]} [-o] $THISSCRIPT {start|stop} [:] $THISSCRIPT dfm {on|off|status} $THISSCRIPT {update|upgrade} [:] *** NOT YET IMPLEMENTED *** $THISSCRIPT failover {i|h}: {u|s} $THISSCRIPT [-h|-man|-V] " if [[ $style == -man ]]; then echo -e " DESCRIPTION: This is the Perforce Helix Management System, $THISSCRIPT v$Version. This script is self-logging. That is, all output displayed on the screen (stdout and stderr) is simultaneously captured in a log file. You do not need to run this script with redirection operators like '> log' or '2>&1', and do not need to use 'tee.' The default log file is: $LOGS/hms...log HMS CONFIG FILES: The HMS config file defines a Helix global topology. HMS COMMANDS: status {all|[:]} [-o] Get status of components defined in the Helix Topology file. Specify all for a site-wide global status, an instance name for all components related to a given instance or a specific component within an instance. start|stop [:] Start or stop a specific component in the topology, or start/stop all components associated with a given instance. dfm {on|off|status} The 'dfm on' command puts all brokers for the specified SDP instance into \"Down For Maintenance\" mode. This involves stopping the broker running with the default config (p4_N.broker.cfg) and starting the broker using the DFM config file, p4_N.broker.dfm.cfg. The dfm broker config file is expected to be configured to reject all user commands and display a friendly message for users telling them that the Helix Server is offline for maintenance. The 'dfm off' command brings all brokers for the specified SDP instance (or all instances) back online with the default config, after shutting down brokers running with the dfm config. The 'dfm status' command checks the status of brokers running with the dfm config. update [:] [-n] Update to the latest versions of software available for the current major release for the given component. An update will not upgrade to a new major version. Specify 'all' to perform a site-wide update of all defined Helix topology components. Updates are done one instance at a time. Update processing aborts in event of a failure to upgrade any component. Specify an instance name to update all components associated with a given Helix instance. Update processing aborts in event of a failure to update any component. The update processing varies depending on the component to be updated. It essentially consists of: 1. Stop the service. 2. Replace the executable (e.g .p4d, p4broker, Swarm files) to the latest patched executable. 3. Update SDP symlinks as needed. 4. Restart the service. For p4d, a database upgrade is not done, and no checkpoint processing occurs. For any given instance, updates occur in the proper order, with the master/commit server being updated last. In case of daisy chain of replicas, those farthest removed from the commit server are updated first. Use with the '-n' flag to see what versions would be updated. upgrade [:] [-n] Upgrade to the latest versions of software available for the latest GA release, upgrading to new major versions if needed. Specify 'all' to perform a site-wide upgrade of all defined Helix topology components. upgrades are done one instance at a time. Upgrade processing aborts in event of a failure to upgrade or update any component. Specify an instance name to upgrade all components associated with a given Helix instance. Upgrade processing aborts in event of a failure to upgrade any component. The upgrade processing varies depending on the component to be upgraded. It essentially consists of: 1. Stop the service. 2. Replace the executable (e.g .p4d, p4broker, Swarm files) to the latest patched executable. 3. Restart the service. For p4d, an offline checkpoint is initiated at the start of processing. For any given instance, upgrades occur in the proper order, with the master/commit server being upgraded last. In case of a daisy chain of replicas, those farthest removed from the commit server are upgraded first. Use with the '-n' flag to see what versions would be updated or upgraded. failover {h|i}: {u|s} [-n] Execute a failover using a pre-defined plan name defined in the Helix Topology file. Multiple failover plans can be defined for a given SDP instance, defining (for example) a "local" failover plan (using local offline databases on the same host), an HA plan, and one or more DR plans. The must be prefixed with i: or h:, as in i: or h:. If an SDP instance is specified, that instance fails over to the designated backup host for the plan selected. If a machine is specified, all instances currently mastered on that machine failover. In event of hardware failure of a host, the host option should be considered. When a machine is specified for the scope, the plan should be valid for all instances on that machine. Any instances for which the plan is not valid will be skipped in the failover processing (but will not prevent it from handling other instances). The last paramter {u|s} indicates whether the failover is unscheduled ('u') or scheduled ('s'). This impacts how the failover process occurs. In a scheduled failover, all systems are expected to be operating normally at the start of the failover process, and the failover only proceeds if all systems are operating normally at the start of the process. Scheduled failover can be used to take a perfectly working machine offline for a time, e.g. to add memory. An 'unscheduled' failover is executed in reaction to a problem of some kind, such as hardware failure. An unscheduled failover should be used when it is known that master server is offline or otherwise not usable. If it can be reached, the master server is shutdown as part of an unscheduled failover. The 'failover' command can be abbreviated as 'fo'. HOST ALIAS UPDATE: External to processing of this script, the host alias used by users to taget the master server must be updated. This may involve a DNS change, virtual IP change, anycast update, etc. This must be done before the failover is complete and service can be restored for users. HMS GENERAL COMMAND OPTIONS (valid for all commands): -c Specify the path to your hand-crafted Helix Topology configuration file. See documentation in the default config file regarding the required format for topology configuration. The default file is $P4CCFG/HelixTopology.cfg. Specify something like '-c /full/path/HelixTopology.cfg' or '-c HelixTopology.test.cfg'. With the latter example, the file is presumed to be in the $P4CCFG directory. The toplogy configuration file defines SDP Instances, Helix Topology Components, and Failover Options. This option is intended for development of changes to Helix Topology configuration files. HMS COMMAND OPTIONS (valid for certain commands as indicated): -o Specify '-o' to be optimistic regarding status of components for which status logic is not yet implememented. By default, components that don't yet have status logic coded report as failed. With '-o', components of known types report as OK. Components of unknown type always report as failed. This can be helpful in verifying whether the Helix Topology config file contains only valid/known component types. Components of unknown types should be commented out or marked as unmanaged. -n No-Op preview mode. This shows operations that would be performed, but takes no actions that affect the live system or data. This is valid only for command for which it is indicated. GENERAL OPTIONS: -v Set verbosity 1-5 (-v1 = quiet, -v5 = highest). -L Specify the path to a log file, or the special value 'off' to disable logging. By default, all output (stdout and stderr) is captured in a log file named: $LOGS/hms...log -C Check the HMS config file and then stop. This can be used to verify the syntax of the HMS config file without starting an actual failover. -si Operate silently. All output (stdout and stderr) is redirected to the log only; no output appears on the terminal. This cannot be used with '-L off'. -D Set extreme debugging verbosity. -S Step mode - if running interactively prompt user to enter return before progressing to the next step. The '-S' option is not supported when called via p4broker (i.e. called as 'p4 hms' rather than calling the $THISSCRIPT script directly). THIS FLAG IS NOT IMPLEMENTED AND WILL BE DEFERRED TO hms 2.0 or beyond. HELP OPTIONS: -h Display short help message -man Display man-style help message -V Dispay version info for this script and its libraries. DEPENDENCIES: The bash shell version must be 4.0 or higher. This precludes operation on Mac OSX, at least up to and including OSX 10.14.5 (Mojave), which ships with bash 3.x. This script operates in an SDP environment, and the 'hms' instance (/p4/hms) must be configured. EXAMPLES: Get a status in the master p4d for Instance 1: $THISSCRIPT status 1:master Get a status in the second replica p4d for Instance 2: $THISSCRIPT status 2:p4d-r02 Get a status for all known components, optimisitically assuming those we haven't coded status logic for yet are configured are happy: $THISSCRIPT status all -o Show whether the second broker for Instance 2 can be updated to a newer patch version of p4broker, within the current major version: $THISSCRIPT update 1:p4b02 -n Upgrade the second broker for Instance 2 to the latest patch version of p4broker, within the current major version: $THISSCRIPT update 1:p4b02 Show what components in Instance 1 would be affected by site-wide topology upgrade: $THISSCRIPT upgrade 1 -n Upgrade all components in Instance 1 in the correct order. $THISSCRIPT upgrade 1 -n Perform an unscheduled failover for all instances mastered on host bos-helix-01: $THISSCRIPT failover ha h:bos-helix-01 u Perform a scheduled failover for all instances mastered on host syd-helix-04: $THISSCRIPT failover ha h:syd-helix-04 s Perform an unscheduled local failover for Instance 1 on its currently configured master host: $THISSCRIPT failover local i:1 u " fi exit 1 } #============================================================================== # Command Line Processing declare HelixTopologyCfg=$P4CCFG/HelixTopology.cfg declare GlobalOptions= declare Command= declare SubCommand= declare Target= declare FailoverPath= declare FailoverScope= declare FailoverStyle= declare -i PreflightCheck=0 declare -i RunTestSuite=0 declare -i OptimisticStatus=0 declare -i Interactive=1 declare -i StepMode=0 declare -i OverallReturnStatus=0 declare -i shiftArgs=0 set +u while [[ $# -gt 0 ]]; do case $1 in (show) Command=show;; (pull) Command=pull Target=$2 shiftArgs=1 ;; (df) Command=df Target=$2 shiftArgs=1 ;; (dfm) Command=dfm usageMsg="Invalid Usage. Usage for 'hms $Command' command is:\n\nhms $Command {on|off|status} " [[ $# -lt 3 ]] && bail "$usageMsg" SubCommand=$2 [[ $SubCommand =~ ^(on|off|status)$ ]] || bail "$usageMsg" Target=$3 shiftArgs=2 ;; (start|stop|status) Command=$1 usageMsg="Invalid Usage. Usage for 'hms $Command' command is:\n\nhms $Command [:]" [[ $# -lt 2 ]] && bail "$usageMsg" Target=$2 shiftArgs=1 ;; (failover|fo) Command=failover usageMsg="Invalid Usage. Usage for 'hms failover' command is:\n\nhms failover {i|h}: