# hms_actions.sh Version=1.0.17 #============================================================================== # Copyright and license info is available in the LICENSE file included with # the Server Deployment Package (SDP), and also available online: # https://swarm.workshop.perforce.com/projects/perforce-software-sdp/view/main/LICENSE #------------------------------------------------------------------------------ #============================================================================== # HMS Library Functions. #------------------------------------------------------------------------------ # Function: failover_instance ($instance, $path, $style) # # This function executes failover of all managed components for the instance. # # This is called by failover(), which does data verification. This routine # can thus dispense with some of the "defensive programming" done in # failover(), allowing us to assume our data model (implemenented primarily # with associative arrays) has what we need. #------------------------------------------------------------------------------ function failover_instance () { vvmsg "CALL: failover_instance ($*)" declare instance=${1:-Unset} declare path=${2:-Unset} declare style=${3:-Unset} declare componentList= declare componentType= declare componentMasterHost= declare brokerDfmOutput= declare cmd= declare -i summaryExitCode=0 [[ $instance == Unset || $path == Unset || $style == Unset ]] && return 1 msg "Getting list of managed components for instance $instance." componentList=$(get_component_list "$instance" 1) if [[ $? -ne 0 ]]; then errmsg "Could not get a list of valid components for instance $instance. Aborting failover." return 1 fi # Put all (available) brokers in Down For Maintenance mode. # This is a fire-and-forget operation; we don't wait for confirmation that this worked, as # we don't want to hold of failover. msg "Setting Down For Maintenance message on all available brokers." for fqComponent in $componentList; do componentType=${ComponentType[$fqComponent]:-Unset} [[ $componentType == p4broker ]] || continue componentMasterHost=${ComponentMasterHost[$fqComponent]:-Unset} brokerDfmOutput=$LOGS/dfm_broker.${fqComponent/:/_}.ON.log vmsg "Running: ssh -q -n -tt -l $OSUSER $componentMasterHost \"$HMS_SCRIPTS/dfm_broker.sh I=$instance on NO_OP=$NO_OP\"" nohup ssh -q -n -tt -l $OSUSER $componentMasterHost "$HMS_SCRIPTS/dfm_broker.sh $instance on $NO_OP" > $brokerDfmOutput 2>&1 & done for fqComponent in $componentList; do componentType=${ComponentType[$fqComponent]:-Unset} [[ $componentType == p4d-e ]] || break failover_p4d_edge "$path" "$fqComponent" "$style" ||\ summaryExitCode=1 done for fqComponent in $componentList; do componentType=${ComponentType[$fqComponent]:-Unset} [[ $componentType == p4d-mc ]] || break failover_p4d_master "$path" "$fqComponent" "$style" ||\ summaryExitCode=1 done for fqComponent in $componentList; do componentType=${ComponentType[$fqComponent]:-Unset} case "$componentType" in (p4p|p4dtg|p4web|p4gf|swarm) warnmsg "Failover for component type [$componentType] not impelmented." ;; (Unset) errmsg "ComponentType not defined for component $component." summaryExitCode=1 ;; esac done # Finally, bring all (available) brokers back online. # This is a fire-and-forget operation; we don't wait for confirmation that # this worked. # So long as p4d failover worked, we have at least a qualified success. for fqComponent in $componentList; do componentType=${ComponentType[$fqComponent]:-Unset} [[ $componentType == p4broker ]] || continue componentMasterHost=${ComponentMasterHost[$fqComponent]:-Unset} brokerDfmOutput=$LOGS/dfm_broker.${fqComponent/:/_}.OFF.log vmsg "Running: ssh -q -n -tt -l $OSUSER $componentMasterHost \"$HMS_SCRIPTS/dfm_broker.sh I=$instance off NO_OP=$NO_OP\"" nohup ssh -q -n -tt -l $OSUSER $componentMasterHost "$HMS_SCRIPTS/dfm_broker.sh $instance off $NO_OP" > $brokerDfmOutput 2>&1 & done return $summaryExitCode } #------------------------------------------------------------------------------ # Function: failover_p4d_edge ($path, $fqComponent, $style) #------------------------------------------------------------------------------ function failover_p4d_edge () { vvmsg "CALL: failover_p4d_edge ($*)" declare path=${1:-Unset} declare fqComponent=${2:-Unset} declare style=${3:-Unset} declare backupHost= declare instance= [[ $path == Unset || $fqComponent == Unset || $style == Unset ]] && return 1 instance=${fqComponent%%:*} ### THIS IS A STUB FUNCTION. ### WORK IN PROGRESS. } #------------------------------------------------------------------------------ # Function: failover_p4d_master ($path, $fqComponent, $style) #------------------------------------------------------------------------------ function failover_p4d_master () { vvmsg "CALL: failover_p4d_master ($*)" declare path=${1:-Unset} declare fqComponent=${2:-Unset} declare style=${3:-Unset} declare masterHost= declare backupHost= declare instance= declare failoverOutput= declare fqPath= ### DEMO NOTE: This ServerID of the master, typically 'master'. Hard coded here for demo, ### but this needs to determined from the master server and persisted/configured, since ### it won't always be 'master' (e.g. in servers with P4AUTH set, it must be different). declare newServerID=master declare -i exitCode=0 summaryExitCode=0 [[ $path == Unset || $fqComponent == Unset || $style == Unset ]] && return 1 instance=${fqComponent%%:*} fqPath=$instance:$path masterHost=${ComponentMasterHost[$fqComponent]:-Unset} backupHost=${ComponentBackupHost[$fqComponent]:-Unset} failoverType=${FailoverType[$fqPath]:-Unset} if [[ $failoverType == Local ]]; then failoverOutput=$LOGS/failover.${component/:/_}.Local.out vmsg "Running: ssh -q -n -tt -l $OSUSER $masterHost \"$HMS_SCRIPTS/failover_p4d_local.sh I=$instance S=$style NO_OP=$NO_OP\"" ssh -q -n -tt -l $OSUSER $masterHost "$HMS_SCRIPTS/failover_p4d_local.sh $instance $style $NO_OP" > $failoverOutput 2>&1 exitCode=$? cat $failoverOutput msg "\nEXIT_CODE: $exitCode" summaryExitCode=$exitCode elif [[ $failoverType == MO || $failoverType == Full ]]; then failoverOutput=$LOGS/failover.${component/:/_}.$failoverType.old_master.log # The difference between Scheduled and Unscheduled failover is partly one # of expectations. In Unscheduled Failover, we don't expect the master # to respond. But in either case, we do our best to avoid split brain, # and try to shut it down. if [[ $style == Scheduled ]]; then msg "Shutting down master server for Scheduled Failover. We expect it to shutdown cleanly." else msg "Attempting shutdown of master server for Unscheduled Failover. It may or may not respond." fi vmsg "Running: ssh -q -n -tt -l $OSUSER $masterHost \"$HMS_SCRIPTS/failover_p4d_from_this_host.sh I=$instance S=$style T=$failoverType BH=$backupHost NO_OP=$NO_OP\"" ssh -q -n -tt -l $OSUSER $masterHost "$HMS_SCRIPTS/failover_p4d_from_this_host.sh $instance $style $failoverType $backupHost $NO_OP" > $failoverOutput 2>&1 exitCode=$? cat $failoverOutput msg "\nEXIT_CODE: $exitCode" if [[ $exitCode -ne 0 ]]; then if [[ $style == Scheduled ]]; then errmsg "Shutdown of master server FAILED during Scheduled Failover. Aborting failover of $fqComponent." return 1 else warnmsg "Shutdown of master server failed during Unscheduled Failover. Continuing with failover of $fqComponent." fi fi failoverOutput=$LOGS/failover.${component/:/_}.$failoverType.new_master.log vmsg "Running: ssh -q -n -tt -l $OSUSER $backupHost \"$HMS_SCRIPTS/failover_p4d_to_this_host.sh I=$instance S=$style T=$failoverType SID=$newServerID NO_OP=$NO_OP\"" ssh -q -n -tt -l $OSUSER $backupHost "$HMS_SCRIPTS/failover_p4d_to_this_host.sh $instance $style $failoverType $newServerID $NO_OP" > $failoverOutput 2>&1 exitCode=$? cat $failoverOutput msg "\nEXIT_CODE: $exitCode" if [[ $exitCode -ne 0 ]]; then errmsg "Failover of $fqComponent to backup host [$backupHost] failed." summaryExitCode=1 fi else errmsg "Unknown failover type ($failoverType), aborting failover of component $fqComponent." return 1 fi return $summaryExitCode } #------------------------------------------------------------------------------ # Function: failover ($path, $scope, $style) #------------------------------------------------------------------------------ function failover () { vvmsg "CALL: failover ($*)" declare path=${1:-Unset} declare scope=${2:-Unset} declare style=${3:-Unset} declare scopeType= declare scopeHost= declare scopeInstance= declare fqPath= declare instance= declare instanceList= declare iPath= declare pathActive= declare failoverType= declare masterHost= declare backupHost= declare reply=Unset declare -i found declare -i status=0 # Validate instance scope. if [[ $scope == "i:"* ]]; then scopeType=Instance scopeInstance=${scope#i:} found=0; for instance in ${!InstanceManaged[*]}; do if [[ $scopeInstance == $instance ]]; then found=1 break fi done if [[ $found -eq 0 ]]; then errmsg "Invalid SDP instance specified with scope of $scope." return 1 fi # Validate host scope. elif [[ $scope == "h:"* ]]; then scopeType=Host scopeHost=${scope#h:} found=0; for host in ${FailoverMasterHost[*]}; do if [[ $scopeHost == $host ]]; then found=1 break fi done if [[ $found -eq 0 ]]; then errmsg "Invalid failover host specified with scope of $scope." return 1 fi else errmsg "Badly formatted scope value [$scope]." return 1 fi if [[ $scopeType == Instance ]]; then msg "Verified: Scope instance [$scopeInstance] is valid." else msg "Verified: Scope host [$scopeHost] is valid." fi # Verify the specified path is valid and active for each instance. if [[ $scopeType == Host ]]; then msg "Finding instances mastered on host $scopeHost." found=0; for instance in ${!InstanceManaged[*]}; do fqPath="$instance:$path" pathActive=${FailoverActive[$fqPath]:-Unset} instanceMasterHost="${InstanceMasterHost[$instance]:-Unset}" [[ $scopeHost == $instanceMasterHost ]] || continue if [[ $pathActive == 1 ]]; then msg "Verified: Failover path [$path] is active for instance [$instance]." elif [[ $pathActive == 0 ]]; then warnmsg "Failover path [$path] is inactive for instance [$instance]. Skipping it." continue else errmsg "No failover path [$path] is configured for instance [$instance]. Skipping it." status=1 continue fi # Next, confirm we have a configured target failover host (unless # FailoverType is 'local'). failoverType=${FailoverType[$fqPath]:-Unset} if [[ $failoverType =~ ^(Full|MO)$ ]]; then backupHost=${FailoverBackupHost[$fqPath]:-Unset]} if [[ $backupHost == Unset ]]; then errmsg "No backup host configured for instance:path [$fqPath]. Skipping it." status=1 continue else msg "Instance $instance will failover from $scopeHost to $backupHost." fi elif [[ $failoverType == Local ]]; then msg "Local Failover will use offline databases for instance $instance on host [$masterHost]." backupHost=localhost else errmsg "Could not determine failover type for [$fqPath]. Skipping it." status=1 continue fi if [[ $found -eq 0 ]]; then found+=1 instanceList="$instance" else instanceList+=" $instance" fi done if [[ $found -gt 0 ]]; then msg "$found instance(s) masterd on $scopeHost are targeted for failover: $instanceList." else errmsg "No instances mastered on host $scopeHost. Aborting failover." return 1 fi else instance=$scopeInstance instanceList="$instance" fqPath="$instance:$path" pathActive=${FailoverActive[$fqPath]:-Unset} if [[ $pathActive == 1 ]]; then msg "Verified: Failover path [$path] is active for instance [$instance]." elif [[ $pathActive == 0 ]]; then errmsg "Failover path [$path] is inactive for instance [$instance]. Aborting." return 1 else errmsg "No failover path [$path] is configured for instance [$instance]. Aborting." return 1 fi failoverType=${FailoverType[$fqPath]:-Unset} masterHost=${InstanceMasterHost[$instance]:-Unset} if [[ $masterHost == Unset ]]; then errmsg "No master host is configured for instance [$instance]. Aborting." return 1 fi if [[ $failoverType =~ ^(Full|MO)$ ]]; then backupHost=${FailoverBackupHost[$fqPath]:-Unset]} if [[ $backupHost == Unset ]]; then errmsg "No backup host configured for instance:path [$fqPath]. Aborting." return 1 else msg "Instance $instance will failover from $masterHost to $backupHost." fi elif [[ $failoverType == Local ]]; then msg "Local Failover will use offline databases for instance $instance on host [$masterHost]." backupHost=localhost else errmsg "Could not determine failover type for [$fqPath]. Skipping it." return 1 fi fi # Confirm user intent. if [[ $Interactive -eq 1 ]]; then while [[ $reply == Unset ]]; do echo -n -e "\nConfirm your intent to initiate failover: [Y/y/N/n]: " read -e reply [[ ${reply^^} == Y || ${reply^^} == N ]] && break msg "\nInvalid input specified ('$reply'), specify Y or N only.\n" reply=Unset done else msg "Failover intent confirmed non-interactively with '-y'." reply=Y fi if [[ ${reply^^} == N ]]; then msg "Failover not confirmed. Failover aborted." return 1 fi msg "Starting Failover for SDP instance(s): $instanceList." for instance in $instanceList; do failover_instance "$instance" "$path" "$style" done return $status } #------------------------------------------------------------------------------ # Function: get_component_list ($target, $onlyManaged) # # Given a target as specified on the command line, return a valid list of # components, or return 1 if we fail to return a valid list of components. # # The target may look like: all|[:] # # If called with a specified component, i.e. something like 1:p4d01, then # show a warning if that component is not managed. # # If called with 'all' or an instance name, get the list of all defined # components (globally or for the specified instance), regardless of whether # they are managed. # # Output: The output to stdout is the valid component list. Any warnings or # errors are sent to stderr. An exit code of 0 indicates valid components # were returned. # # Sample Usage: list=$(get_component_list "abc") || bail "No valid instances!" # #------------------------------------------------------------------------------ function get_component_list () { vvmsg "CALL: get_component_list ($*)" declare target=${1:-Unset} declare onlyManaged=${2:-0} declare component= declare fqComponent= declare instanceComponents= declare componentList= declare componentManaged= [[ $target == Unset ]] && return 1 if [[ $target == "all" ]]; then componentList="" for component in ${!ComponentManaged[*]}; do # Build a list of all define components, or only managed ones, # depending on whether $onlyManaged. if [[ $onlyManaged -eq 1 ]]; then componentManaged=${ComponentManaged[$component]:-Invalid} if [[ $componentManaged == 1 ]]; then componentList+=" $component" elif [[ $componentManaged == Invalid ]]; then errnmsg "Component $target is invalid." >&2 fi else componentList+=" $component" fi done componentList=$(echo $componentList) elif [[ $target == *":"* ]]; then # If target contains a colon, user specified a single component # named as "instance:component". componentList=$target componentManaged=${ComponentManaged[$target]:-Invalid} if [[ $componentManaged != Invalid ]]; then if [[ $componentManaged -eq 0 ]]; then warnmsg "Component $target is not managed by HMS." >&2 fi else errmsg "The specified component $target is invalid." >&2 return 1 fi else # Get all components for a given instance. In this case, the # $target was specified as just an SDP instance name. instanceComponents=${InstanceComponents[$target]:-Unset} if [[ $instanceComponents == Unset ]]; then errmsg "No components configured for instance $target." >&2 return 1 fi componentList= for component in $instanceComponents; do fqComponent=$target:$component # Build a list of all define components, or only managed ones, # depending on whether $onlyManaged. if [[ $onlyManaged -eq 1 ]]; then componentManaged=${ComponentManaged[$fqComponent]:-Invalid} if [[ $componentManaged == 1 ]]; then componentList+=" $fqComponent" elif [[ $componentManaged == Invalid ]]; then errnmsg "Component $fqComponent is invalid." >&2 fi else componentList+=" $fqComponent" fi done componentList=$(echo $componentList) fi if [[ -n "$componentList" ]]; then echo $componentList else return 1 fi return 0 } #------------------------------------------------------------------------------ # Function: get_component_status ($component) # # Input: # $1 - A fully qualified component name of the form : # # $2 - "Optimistic" setting, 1 or 0 (the default). By default, components that # don't yet have status logic coded report as failed. If 1, components of # known types report as OK. Components of unknown type always report as # failed. (Any such components should be marked inactive in the topology # config file). #------------------------------------------------------------------------------ function get_component_status () { vvmsg "CALL: get_component_status ($*)" declare component=${1:-Unset} declare optimistic=${2:-0} declare instance= declare componentType= declare componentHost= declare componentURL= declare componentP4PORT= declare componentStatusCmd= declare -i componentStatusCode= declare componentStatusMsg= declare componentVersion= declare brokerStatusFile=$P4U_TMPDIR/broker_status.${component/:/_}.out [[ $component == Unset ]] && return 1 get_component_version "$component" componentVersion=${ComponentVersion[$component]} instance=${component%%:*} componentType=${ComponentType[$component]:-Unset} componentURL=${ComponentURL[$component]:-Unset} componentManaged=${ComponentManaged[$component]:-Unset} componentMasterHost=${ComponentMasterHost[$component]:-Unset} if [[ $componentManaged == 1 ]]; then case "$componentType" in (p4d-mc|p4d-e|p4d-r) componentP4PORT="${ComponentMasterHost[$component]:-Unset}:$componentURL" componentStatusCmd="$P4BIN -s -p $componentP4PORT info -s" componentStatusCode=0 run "$componentStatusCmd" \ "3:Checking status for $component of type $componentType.|4:Checking with command: $componentStatusCmd" 0 0 ||\ componentStatusCode=1 ;; (p4broker) componentStatusCmd="/p4/${instance}/bin/p4broker_${instance}_init status" if [[ $componentMasterHost != Unset ]]; then msg "Checking status of $component on $componentMasterHost" ssh -q -n -tt -l $OSUSER $componentMasterHost "$componentStatusCmd" > $brokerStatusFile 2>&1 fgrep "is running as" $brokerStatusFile > /dev/null 2>&1 if [[ $? -eq 0 ]]; then componentStatusCode=0 msg "Verified: $component is OK." else fgrep "is NOT running" $brokerStatusFile > /dev/null 2>&1 if [[ $? -eq 0 ]]; then componentStatusCode=2 msg "Verified: $component is offline." else componentStatusCode=1 errmsg "Component $component status unknown." fi fi rm -f $brokerStatusFile else errmsg "No master host configured for component $component." componentStatusCode=-3 fi ;; (p4p|p4dtg|p4web|p4gf) warnmsg "Sorry, can't status components of type $componentType just yet." componentStatusCode=-1 [[ $optimistic -eq 1 ]] && componentStatusCode=0 ;; (swarm) warnmsg "Sorry, can't status components of type swarm just yet." msg "Check the Swarm worker queue with wget using the configured URL ($componentURL)." componentStatusCode=-1 [[ $optimistic -eq 1 ]] && componentStatusCode=0 ;; (*) warnmsg "Unknown component type ($componentType) configured." componentStatusCode=-1 ;; (Unset) errmsg "ComponentType not defined for component $component." componentStatusCode=-2 ;; esac else warnmsg "Component $component is not managed by HMS. Assuming it is OK." componentStatusCode=0 fi case "$componentStatusCode" in (-3) componentStatusMsg="Component host not configured.";; (-2) componentStatusMsg="Unknown component type.";; (-1) componentStatusMsg="Status not yet implemented for this component type.";; (0) componentStatusMsg=OK;; (1) componentStatusMsg=FAILED;; (2) componentStatusMsg=Offline;; (*) componentStatusMsg="Unknown Status Code [$componentStatusCode]";; esac ComponentStatusCode[$component]=$componentStatusCode ComponentStatusMsg[$component]=$componentStatusMsg msg "Component $component v$componentVersion status is $componentStatusMsg (code $componentStatusCode)." return $componentStatusCode } #------------------------------------------------------------------------------ # Function: get_component_version ($component) # # Set values in ComponentMajorVersion, ComponentMinorVersion, and # ComponentVersion associative arrays, indexed by the fully qualified # component name (i.e. :). # # Output: This get_component_version() is a quiet function. It does its work # silently. In event of problems, it just sets the version values to UNKNOWN. # It doesn't complain. This is called by get_component_status, which takes # care of notifying the user of problems. #------------------------------------------------------------------------------ function get_component_version () { vvmsg "CALL: get_component_version ($*)" declare component=${1:-Unset} declare componentMasterHost= declare componentType= declare componentExe= declare componentMajorVersion= declare componentMinorVersion= declare componentVersionFile=$P4U_TMPDIR/component_version.${component/:/_}.out declare componentVersionCmd= instance=${component%%:*} componentType=${ComponentType[$component]:-Unset} componentManaged=${ComponentManaged[$component]:-Unset} componentMasterHost=${ComponentMasterHost[$component]:-Unset} case "$componentType" in (p4d-*|p4broker|p4p) componentP4PORT="${ComponentMasterHost[$component]:-Unset}:$componentURL" componentExe="/p4/$instance/bin/${componentType%-*}_${instance}" ssh -q -n -tt -l $OSUSER $componentMasterHost "$componentExe -V" > $componentVersionFile 2>&1 componentMajorVersion=$(fgrep "Rev. " $componentVersionFile 2>/dev/null) if [[ -n "$componentMajorVersion" ]]; then componentMinorVersion=$(echo $componentMajorVersion|cut -d '/' -f 4) componentMinorVersion=${componentMinorVersion%% *} componentMajorVersion=$(echo $componentMajorVersion|cut -d '/' -f 3) else componentMajorVersion="UNKNOWN" componentMinorVersion="UNKNOWN" fi ;; (p4dtg|p4web|p4gf) warnmsg "Sorry, can't get version for component of type $componentType just yet." componentMajorVersion="UNKNOWN" componentMinorVersion="UNKNOWN" ;; (swarm) ssh -q -n -tt -l $OSUSER $componentMasterHost "cat /p4/$instance/swarm/Version" > $componentVersionFile 2>&1 # A Swarm version file has 3 lines and looks like this: # RELEASE = 2016 1 ; # PATCHLEVEL = 1400259 ; # SUPPDATE = 2016 06 13 ; # Extract the RELEASE and PATCHLEVEL and normalized to our standard format. componentMajorVersion=$(fgrep RELEASE $componentVersionFile 2>/dev/null) if [[ -n "$componentMajorVersion" ]]; then componentMajorVersion=${componentMajorVersion##* = } componentMajorVersion=${componentMajorVersion% ;} componentMajorVersion=${componentMajorVersion/ /.} else componentMajorVersion="UNKNOWN" fi componentMinorVersion=$(fgrep PATCHLEVEL $componentVersionFile 2>/dev/null) if [[ -n "$componentMinorVersion" ]]; then componentMinorVersion=${componentMinorVersion##* = } componentMinorVersion=${componentMinorVersion% ;} else componentMinorVersion="UNKNOWN" fi rm -f $componentVersionFile ;; esac ComponentMajorVersion[$component]=$componentMajorVersion ComponentMinorVersion[$component]=$componentMinorVersion if [[ $componentMajorVersion != UNKNOWN ]]; then ComponentVersion[$component]="$componentMajorVersion.$componentMinorVersion" else ComponentVersion[$component]=UNKNOWN fi }