#!/bin/bash #============================================================================== # Copyright and license info is available in the LICENSE file included with # the Server Deployment Package (SDP), and also available online: # https://swarm.workshop.perforce.com/projects/perforce-software-sdp/view/main/LICENSE #------------------------------------------------------------------------------ set -u #============================================================================== # Declarations and Environment if [[ ${BASH_VERSINFO[0]} -lt 4 ]]; then echo -e "\\n\\nERROR: $0 requires bash version 4.x+; current bash version is $BASH_VERSION." exit 1 fi # Allow override of P4U_HOME, which is set only when testing P4U scripts. export P4U_HOME=${P4U_HOME:-/p4/common/bin} export SDP_ENV=${SDP_ENV:-/p4/common/bin/p4_vars} export P4U_LIB=${P4U_LIB:-/p4/common/lib} export P4U_ENV=$P4U_LIB/p4u_env.sh export P4U_LOG=Unset export VERBOSITY=${VERBOSITY:-3} # Environment isolation. For stability and security reasons, prepend # PATH to include dirs where known-good scripts exist. # known/tested PATH and, by implication, executables on the PATH. export PATH=$P4U_HOME:$PATH:~/bin:. export P4CONFIG=${P4CONFIG:-.p4config} export P4ENVIRO=/dev/null/.p4enviro [[ -r "$P4U_ENV" ]] || { echo -e "\\nError: Cannot load environment from: $P4U_ENV\\n\\n" exit 1 } declare BASH_LIBS=$P4U_ENV BASH_LIBS+=" $P4U_LIB/libcore.sh" BASH_LIBS+=" $P4U_LIB/libp4u.sh" for bash_lib in $BASH_LIBS; do # shellcheck disable=SC1090 source "$bash_lib" ||\ { echo -e "\\nFATAL: Failed to load bash lib [$bash_lib]. Aborting.\\n"; exit 1; } done declare Version=3.1.1 declare ScriptArgs=$* declare -i StartupCmdNumFirst declare -i StartupCmdNumLast declare -i PreflightOK=1 declare RandomPassword= declare JournalPrefix= declare -i OverallReturnStatus=0 declare ServerSpec= declare ServerSpecFile= declare ShortServerSpec= declare ServiceUser= declare ServiceUserSpecFile= declare ServiceUsersGroup=ServiceUsers declare TargetPort= declare TmpDir= declare TmpFile= declare ProtectsFile= declare GroupSpecFile= declare ServicePasswdFile= declare PreOpScript=/p4/common/site/mkrep/pre-mkrep.sh declare PreOpCmd= declare PostOpScript=/p4/common/site/mkrep/post-mkrep.sh declare PostOpCmd= declare -i OverwriteServerSpec=0 export VERBOSITY=3 #============================================================================== # Local Functions #------------------------------------------------------------------------------ # Function: terminate function terminate { # Disable signal trapping. trap - EXIT SIGINT SIGTERM vvmsg "$THISSCRIPT: EXITCODE: $OverallReturnStatus" # Stop logging. [[ "${P4U_LOG}" == off ]] || stoplog # Don't litter. cleanTrash # With the trap removed, exit. exit "$OverallReturnStatus" } function bail { local msg="${1:-Unknown Error}" local -i rc rc="${2:-1}" echo -e "\\n$THISSCRIPT (line: ${BASH_LINENO[0]}): FATAL: $msg\\n\\n" >&2 exit "$rc" } #------------------------------------------------------------------------------ # Function: infer_type_tag ($serverID, $type) # Determine the type tag. In some cases we can determine the type tag just # from the name. function infer_type_tag () { local fromServerID=${1:-} local type=${2:-} local typeTag= local specFile= local -i isFiltered=0 case "$type" in (*standby) usageError "The ServerID specified with '-f $fromServerID' cannot refer to a standby or forwarding-standby, as those replica types do not support downstream replicas." ;; (commit-server|standard) typeTag=master;; (edge-server) typeTag=edge;; (forwarding-replica) # For a forwarding replica, we need to know if it is filtered, so we # can determine whether the type tag should be 'fr' or 'ffr'. (An # edge server can also be filtered, but that doesn't affect the steps # provided to the user as guidance in Phase 2, where as the # difference does matter between 'fr' vs. 'ffr'. specFile=$(mktemp) "$P4BIN" server -o "$fromServerID" > "$specFile" ||\ bail "Could not do: $P4BIN server -o $fromServerID .GT. $specFile" # A replica is filtered if the server spec makes use of any of the # *DataFilter fields in the server spec, and/or if it has a startup # thread that makes use of the '-T' option to filter by db table. if grep -qE '^(Archive|Client|Revision)DataFilter:' "$specFile"; then isFiltered=1 elif grep -qE '^\s+startup.*=.* -T ' "$specFile"; then isFiltered=1 fi if [[ "isFiltered" -eq 1 ]]; then typeTag=ffr else typeTag=fr fi ;; (replica) typeTag="ro";; (build-server) typeTag="bo";; ### Unsupported/Undocumented; build-server type is replaced by 'build edge'. (*) bail "Internal error: Unhandled server type [$type] passed to infer_type_tag(). Aborting." esac typeTag= echo "$typeTag" } #------------------------------------------------------------------------------ # Function: usage (required function) # # Input: # $1 - style, either -h (for short form) or -man (for man-page like format). # The default is -h. # # $2 - error message (optional). Specify this if usage() is called due to # user error, in which case the given message displayed first, followed by the # standard usage message (short or long depending on $1). If displaying an # error, usually $1 should be -h so that the longer usage message doesn't # obscure the error message. # # Sample Usage: # usage # usage -h # usage -man # usage -h "Incorrect command line usage." #------------------------------------------------------------------------------ function usage { declare style=${1:--h} declare errorMessage=${2:-Unset} if [[ "$errorMessage" != Unset ]]; then msg "\\n\\nUsage Error:\\n\\n$errorMessage\\n\\n" fi echo "USAGE for $THISSCRIPT v$Version: $THISSCRIPT -t <Type> -s <Site_Tag> -r <Replica_Host> [-f <From_ServerID>] [-os] [-p] [-N <N>] [-i <SDP_Instance>] [-L <log>] [-v<n>] [-n] [-D] or $THISSCRIPT [-h|-man|-V] " if [[ $style == -man ]]; then echo -e " DESCRIPTION: This script simplifies the task of creating Helix Core replicas and edge servers, and helps ensure they are setup with best practices. This script executes as two phases. In Phase 1, this script does all the metadata configuration to be executed on the master server that must be baked into a seed checkpoint for creating the replica/edge. This essentially captures the planning for a new replica, and can be done before the physical infrastructure (e.g. hardware, storage, and networking) is ready. Phase 1, fully automated by this script, takes only seconds to run. In Phase 2, this script provides information for the manual steps needed to create, transfer, and load seed checkpoints onto the replica/edge. The guidance is specific to type of replica created, based on the command line flags provided to this script. This processing can take a while for large data sets, as it involves creating and transporting checkpoints. Before using this script, a set of geographic site tags must be defined. See the FILES: below for details on a site tags. This script adheres to the these SDP Standards: * Server Spec Naming Standard: https://swarm.workshop.perforce.com/projects/perforce-software-sdp/view/main/doc/SDP_Guide.Unix.html#_server_spec_naming_standard * Journal Prefix Standard: https://swarm.workshop.perforce.com/projects/perforce-software-sdp/view/main/doc/SDP_Guide.Unix.html#_the_journalprefix_standard In Phase 1, this script does the following to help create a replica or edge server: * Generates the server spec for the the replica. * Generates a server spec for master server (if needed). * Sets configurables ('p4 configure' settings) for replication. * Selects the correct 'Services' based on replica type. * Creates service user for the replica, and sets a password. * Creates service user for the master (if needed), and sets a password. * Adds newly created service users to the group '$ServiceUsersGroup'. * Verifies the group $ServiceUsersGroup is granted super access in the protections table (and with the '-p', updates Protections). After these steps are completed, in Phase 2, detailed instructions are presented to guide the user through the remaining steps needed to complete the deployment of the replica. This starts with creating a new checkpoint to capture all the metadata changes made by this script in Phase 1. SERVICE USERS: Service users created by this type are always of type 'service', and so will not consume a licensed seat. Service users also have an 'AuthMethod' of 'perforce' (not 'ldap') as is required by 'p4d' for 'service' users. Passwords set for service users are long 32 character random strings that are not stored, as they are never needed. Login tickets for service users are generated using: p4login -service -v OPTIONS: -t <Type>[N] Specify the replica type tag. The type corresponds to the 'Type:' and 'Services:' field of the server spec, which describes the type of services offered by a given replica. Valid type values are: * ha: High Availability standby replica, for 'p4 failover' (P4D 2018.2+) * ham: High Availability metadata-only standby replica, for 'p4 failover' (P4D 2018.2+) * ro: Read-Only standby replica. (Discouraged; Use 'ha' instead for 'p4 failover' support.) * rom: Read-Only standby replica, Metadata only. (Discouraged; Use 'ham' instead for 'p4 failover' support.) * fr: Forwarding Replica (Unfiltered). * fs: Forwarding Standby (Unfiltered). * frm: Forwarding Replica (Unfiltered, Metadata only). * fsm: Forwarding Standby (Unfiltered, Metadata only). * ffr: Filtered Forwarding Replica. Not a valid failover target. * edge: Edge Server. Filtered by definition. Replicas with 'standby' are always unfiltered, and use the 'journalcopy' method of replication, which copies a byte-for-byte verbatim journal file rather than one that is merely logically equivalent. The tag has several purposes: 1. Short Hand. Each tag represents a combination of 'Type:' and fully qualified 'Services:' values used in server specs. 2. Distillation. Only the most useful Type/Services combinations have a shorthand form 3. For forwarding replicas, the name includes the critical distinction of whether any replication filtering is used; as filtering of any kind disqualifies a replica from being a potential failover target. (No such distinction is needed for edge servers, which are filtered by definition). -s <Site_Tag> Specify a geographic site tag indicating the location and/or data center where the replica will physically be located. Valid site tags are defined in the site tags file: $SiteTagsFile A sample SiteTags.cfg file that is here: $SiteTagsSample -r <Replica_Host> Specify the DNS name of the server machine on which the new replica will run. This is used in the 'ExternalAddress:' field of the replica's ServerID, and also used in instructions to the user for steps after metadata configuration is done by this script. -f <From_ServerID> Specify ServerID of the P4TARGET server from which we are replicating. This is used to populate the 'ReplicatingFrom' field of the server spec. The value must be a valid ServerID. By default, this is determined dynamically by checking the ServerID of the master server. This option should be used if the target is something OTHER THAN the master. For example, to create an HA replica of an edge server, you might specify something like '-f p4d_edge_syd'. -os Specify the '-os' option to overwrite an exising server spec. By default, this script will abort of the server spec to be generated already exists on the Helix Core server. Specify this option to overwrite the existing server spec. -p This script performs a check to ensure that the Protections table grants super access to the group $ServiceUsersGroup. By default, an error is displayed if the check fails, i.e. if super user access for the group $ServiceUsersGroup cannot be verified. This is because, by default, we want to avoid making changes to the Protections table. Some sites have local policies or custom automation that requires site-specific procedures to update the Protections table. If '-p' is specified, an attempt is made to append the Protections table an entry like: super group $ServiceUsersGroup * //... -N <N> Specify '-N <N>', where N is a positive integer, to indicate that multiple replicas of the same type are to be created at the same site. This affects the ServerID to ee generated. For example, the options '-t edge -s syd' would result in a ServerID of p4d_edge_syd. To create a second edge in the same site, use '-t edge -s syd -N 2' to generate p4d_edge2_syd. -i <SDP_Instance> Specify the SDP Instance. If not specifed and the SDP_INSTANCE environment is defined, that value is used. If SDP_INSTANCE is not defined, the '-i <SDP_Instance>' argument is required. -v<n> Set verbosity 1-5 (-v1 = quiet, -v5 = highest). -L <log> Specify the path to a log file, or the special value 'off' to disable logging. By default, all output (stdout and stderr) goes in the logs directory referenced by \$LOGS environment variable, in a file named mkrep.<timestamp>.log NOTE: This script is self-logging. That is, output displayed on the screen is simultaneously captured in the log file. Do not run this script with redirection operators like '> log' or '2>&1', and do not use 'tee.' -n No-Op. Prints commands instead of running them. -D Set extreme debugging verbosity. HELP OPTIONS: -h Display short help message -man Display man-style help message -V Display version info for this script and its libraries. FILES: This Site Tags file defines the list of valid geographic site tags: $SiteTagsFile The contains one-line entries of the form: <tag>: <description> where <tag> is a short alphanumeric tag name for a geographic location, data center, or other useful distinction. This tag is incorporated into the ServerID of replicas or edge servers created by this script. Tag names should be kept short, ideally no more than about 5 characters in length. The <description> is a one-line text description of what the tag refers to, which may contain spaces and ASCII punctuation. Blank lines and lines starting with a '#' are considered comments and are ignored. REPLICA SERVER MACHINE SETUP: The replica/edge server machine must be have the SDP structure installed, either using the mkdirs.sh script included in the SDP, or the Helix Installer for 'green field' installations. When setting up an edge server, a replica of an edge server, or filtered replica, confirm that the JournaPrefix Standard (see URL above) structure has the separate checkpoints folder as identified in the 'Second Form' in the standard. A baseline SDP structure can typically be extended by running commands like like these samples (assuming a ServerID of p4d_edge_syd or p4d_ha_edge_syd): mkdir /hxdepots/p4/1/checkpoints.edge_syd cd /p4/1 ln -s /hxdepots/p4/1/checkpoints.edge_syd CUSTOM PRE- AND POST- OPERATION AUTOMATION HOOKS: This script can execute custom pre- and post- processing scripts. This can be useful to incorporate site-specifc elements of replica setup. If the file /p4/common/site/mkrep/pre-mkrep.sh exists and is executable, it will be executed before mkrep.sh processing. If the file /p4/common/site/mkrep/post-mkrep.sh exists and is executable, it will be executed after mkrep.sh processing. Pre- and post- processing scripts are called with the same command line arguments passed to this mkrep.sh script. The pre- and post- processing scripts can use or ignore arguments as needed, though it is required to implement the '-n' flag to operate in preview mode, taking no actions that affect data (just as this script behaves). Pre- and post- processing scripts are expected to exit with a zero exit code to indicate success, and non-zero to indicate failure. The custom pre-processing script is executed after standard preflight checks complete successfully. If a custom pre-processing script indicates a failure, processing is aborted before standard mkrep.sh processing occurs. The post-processing custom script is executed after the standard mkrep.sh processing is successful. If a post-processing custom script is detected, the instructions that would be provided to the user in Phase 2 are not displayed, as it is expected that the custom post- processing will alter or handle these steps. Success or failure of pre- and post- processing scripts is reported in the log. These scripts do not require independent logging, as all standard and error output is captured in the log of this mkrep.sh script. TIP: Be sure to fully test custom scripts in a test environment before incorporating them into production systems. EXAMPLES: EXAMPLE 1 - Set up a High Availability (HA) Replica of the master. Add an HA replica to instance 1 to run on host bos-helix-02: $THISSCRIPT -i 1 -t ha -s bos -r bos-helix-02 EXAMPLE 2 - Add an Edge Server to the topology. Add an Edge server to instance acme to run on host syd-helix-04: $THISSCRIPT -i acme -t edge -s syd -r syd-helix-04 EXAMPLE 3 - Setup an HA replica of an edge server. Add a HA replica of the edge server to instance acme to run on host syd-helix-05: $THISSCRIPT -i acme -t ha -f p4d_edge_syd -s syd -r syd-helix-05 EXAMPLE 4 - Add a second edge server in the same site as another edge. $THISSCRIPT -i acme -t edge -N 2 -s syd -r syd-helix-04 " fi exit 1 } #============================================================================== # Command Line Processing declare ReplicaHost=Unset declare ReplicaTypeTag=Unset declare ReplicaType= declare ExtraReplicaNumber= declare FromServerID=Unset declare FromServerType= declare FromServerTypeTag= declare FromServerP4PORT= declare FromServerJournalPrefix= declare FromServerHost= declare -i CreateFromServerID=0 declare -i CreateMasterServiceUser=0 declare SiteTag=Unset declare SiteTagsFile="${P4CCFG:-/p4/common/config}/SiteTags.cfg" declare SiteTagsSample="${P4CCFG:-/p4/common/config}/SiteTags.cfg.sample" declare SDPInstance=${SDP_INSTANCE:-Unset} #declare -i Interactive=1 declare -i MetadataOnly=0 declare -i shiftArgs=0 declare -i UpdateProtections=0 set +u while [[ $# -gt 0 ]]; do case $1 in (-h) usage -h;; (-man) usage -man;; (-r) ReplicaHost="$2"; shiftArgs=1;; (-t) ReplicaTypeTag="$2"; shiftArgs=1;; (-N) ExtraReplicaNumber="$2"; shiftArgs=1;; (-i) SDPInstance="$2"; shiftArgs=1;; (-s) SiteTag="$2"; shiftArgs=1;; (-f) FromServerID="$2"; shiftArgs=1;; (-os) OverwriteServerSpec=1;; (-p) UpdateProtections=1;; (-V) show_versions; exit 1;; (-v1) export VERBOSITY=1;; (-v2) export VERBOSITY=2;; (-v3) export VERBOSITY=3;; (-v4) export VERBOSITY=4;; (-v5) export VERBOSITY=5;; (-L) export P4U_LOG="$2"; shiftArgs=1;; (-n) export NO_OP=1;; (-D) set -x;; # Debug; use bash 'set -x' mode. (*) usageError "Unknown arg ($1).";; esac # Shift (modify $#) the appropriate number of times. shift; while [[ $shiftArgs -gt 0 ]]; do [[ $# -eq 0 ]] && usageError "Bad usage." shiftArgs=$shiftArgs-1 shift done done set -u #============================================================================== # Command Line Verification TmpFile=$(mktemp) # shellcheck disable=SC1090 source "$SDP_ENV" "$SDPInstance" || bail "Could not do: source $SDP_ENV $SDPInstance" [[ "$P4U_LOG" == Unset ]] && \ P4U_LOG=${LOGS:-/tmp}/mkrep.$(date +'%Y%m%d-%H%M').log [[ $SDPInstance == Unset ]] && usageError "\\nThe '-i <SDP_Instance>' parameter is required unless SDP_INSTANCE is defined." [[ $ReplicaHost == Unset ]] && usageError "\\nThe '-r <Replica_Host>' parameter is required." [[ $ReplicaTypeTag == Unset ]] && usageError "\\nThe '-t <Type>' parameter is required." [[ $SiteTag == Unset ]] && usageError "\\nThe '-s <Site_Tag>' parameter is required." case "$ReplicaTypeTag" in (ha) ReplicaType=standby;; # HA Standby replica. (ham) ReplicaType=standby; MetadataOnly=1;; # HA Standby replica. (ro) ReplicaType=standby;; # Read-Only Standby replica. (rom) ReplicaType=standby; MetadataOnly=1;; # Read-Only Standby replica, Metadata only. (fr) ReplicaType=forwarding-replica;; # Forwarding Replica (Unfiltered). (fs) ReplicaType=forwarding-standby;; # Forwarding Standby (Unfiltered). (frm) ReplicaType=forwarding-replica; MetadataOnly=1;; # Forwarding Replica (Unfiltered), Metdata only. (fsm) ReplicaType=forwarding-standby; MetadataOnly=1;; # Forwarding Standby (Unfiltered). (ffr) ReplicaType=forwarding-replica;; # Filtered Forwarding Replica (edge) ReplicaType=edge-server;; # Edge Server. Filtered by def'n, cannot be Metdata only. (*) usageError "The specified replica type tag [$ReplicaTypeTag] is invalid.";; esac if [[ -n "$ExtraReplicaNumber" ]]; then if [[ ! "$ExtraReplicaNumber" =~ ^[0-9]+$ ]]; then bail "The value specified with '-N', [$ExtraReplicaNumber], is not valid. It must be a numeric value. Left-padding with zeroes is allowed." fi fi if [[ "$FromServerID" == "Unset" ]]; then FromServerTypeTag=master ServerSpec="p4d_${ReplicaTypeTag}${ExtraReplicaNumber}_${SiteTag}" else FromServerP4PORT=$("$P4BIN" -ztag -F %ExternalAddress% server -o "$FromServerID") FromServerType=$("$P4BIN" -ztag -F %Services% server -o "$FromServerID") # This assumes journalPrefix path can never have spaces, which is a safe # given the journalPrefix Standard. FromServerJournalPrefix=$("$P4BIN" configure show "$FromServerID#journalPrefix" | cut -d '=' -f 4) FromServerHost=${FromServerP4PORT%:*} FromServerHost=${FromServerHost#*:} if [[ -n "$FromServerType" ]]; then FromServerTypeTag=$(infer_type_tag "$FromServerID" "$FromServerType") else usageError "The type of the ServerID specified with with '-f $FromServerID' could not be determined." fi if [[ "$FromServerTypeTag" == "master" ]]; then ServerSpec="p4d_${ReplicaTypeTag}${ExtraReplicaNumber}_${SiteTag}" else # This will generate a ServerID like p4d_ha_edge_syd, for an HA replica of # an edge server at site syd. ServerSpec="p4d_${ReplicaTypeTag}${ExtraReplicaNumber}_${FromServerTypeTag}_${SiteTag}" fi fi ShortServerSpec="${ServerSpec#p4d_}" if "$P4BIN" server --exists -o "$ServerSpec" > "$TmpFile" 2>&1; then if [[ "$OverwriteServerSpec" -eq 1 ]]; then msg "Overwriting existing server spec [$ServerSpec] due to '-os'." else bail "Server spec to be generated already exists [$ServerSpec]. If you intend to overwrite this server spec, use '-os'." fi else if grep -qE 'Server .* doesn.t exist.' "$TmpFile"; then msg "Verified: This server spec does not exist: $ServerSpec" else bail "Could not determine if this server spec exists: $ServerSpec." fi fi rm -f "$TmpFile" # We set JournalPrefix value based on the JournalPrefixStandard referenced # in this script's manual page. There are 3 possibilities: # Per the journalPrefix standard, replicas with unique data sets (edge, ffr) OR those # that share /hxdepots with their P4TARGET server (fsm, frm, ham, rom), use the Second Form # of the journalPrefix with a distinct value incorporating the short form of the ServerID. # Otherwise, we use the First From of the journalPrefix. if [[ "$ReplicaTypeTag" == "edge" || "$ReplicaTypeTag" == "ffr" || "$ReplicaTypeTag" == "fsm" || "$ReplicaTypeTag" == "frm" || "$ReplicaTypeTag" == "ham" || "$ReplicaTypeTag" == "rom" ]]; then # shellcheck disable=SC2153 JournalPrefix="$P4HOME/checkpoints.${ShortServerSpec}/p4_${SDPInstance}.${ShortServerSpec}" elif [[ -n "$FromServerJournalPrefix" ]]; then JournalPrefix="$FromServerJournalPrefix" else JournalPrefix="$P4HOME/checkpoints/p4_${SDPInstance}" fi declare -i tagFound=0 if [[ -r "$SiteTagsFile" ]]; then while read -r line; do [[ $line == "#*" ]] && continue # shellcheck disable=SC2086 disable=SC2116 [[ -z "$(echo $line)" ]] && continue [[ "$line" == *":"* ]] || continue tag=${line%%:*} if [[ "$tag" == "$SiteTag" ]]; then tagFound=1 break fi done < "$SiteTagsFile" else bail "Missing site tag configuration file [$SiteTagsFile]. See the sample file: $SiteTagsSample\\nAborting." fi [[ $tagFound -eq 1 ]] ||\ bail "Failed to find specified site tag [$SiteTag] in the tag configuration file [$SiteTagsFile]. Aborting." #------------------------------------------------------------------------------ if [[ -x "$PreOpScript" ]]; then PreOpCmd="$PreOpScript $ScriptArgs" msg "\\nA custom pre-processing script exists and will be executed if preflight checks\\nare successful. The pre-processing command line will be:\\n\\t$PreOpCmd\\n" fi if [[ -x "$PostOpScript" ]]; then PostOpCmd="$PostOpScript $ScriptArgs" msg "\\nA custom post-processing script exists and will be executed if the processing is\\nsuccessful. The post-processing command line will be:\\n\\t$PostOpCmd\\n" fi #============================================================================== # Main Program trap terminate EXIT SIGINT SIGTERM TmpDir=$(mktemp -d) ProtectsFile="$TmpDir/protect.p4s" GroupSpecFile="$TmpDir/group.$ServiceUsersGroup.p4s" GARBAGE+=" $TmpDir" if [[ "${P4U_LOG}" != off ]]; then touch "${P4U_LOG}" || bail "Couldn't touch log file [${P4U_LOG}]." # Redirect stdout and stderr to a log file. exec > >(tee "${P4U_LOG}") exec 2>&1 initlog fi msg "Starting $THISSCRIPT v$Version as $OSUSER@${HOSTNAME%%.*} at $(date) as:\\n$CMDLINE\\n" msg "Server spec to be generated is: $ServerSpec" msg "\\nPhase 1 is automated processing, which this script performs.\\n" if [[ -n "$PostOpCmd" ]]; then msg "Phase 2 is deferred to custom automation implemented with: $PostOpScript" else msg "Phase 2 is manual processing, for which this script provides guidance." fi msg "${H}\\nPhase 1.0: Environment Setup and Preflight Checks." # If the '-f <from_serverid>' was specified, use it. If not, get the ServerID of the # master/commit server, one with a Services value of 'commit-server' or simply 'standard'. # As a last resort, e.g. if the master server does not have a server spec, use 'p4 info' # on the current server. msg "Preflight check for master ServerID." if [[ "$FromServerID" == Unset ]]; then # From the list of server specs, find the master/commit server spec. FromServerID=$("$P4BIN" -ztag -F "%ServerID% %Services%" servers |\ grep -E ' (standard|commit-server)') if [[ -n "$FromServerID" ]]; then FromServerID=${FromServerID%% *} msg "Verified: Master ServerID ($FromServerID) exists." if [[ -n "$("$P4BIN" -ztag -F %Update% user -o "svc_${FromServerID}")" ]]; then msg "Verified: Service user for ServerID $FromServerID exists: svc_${FromServerID}" else msg "Service user for ServerID $FromServerID will be created: svc_${FromServerID}" CreateMasterServiceUser=1 fi else if [[ -n "${P4MASTER_ID:-}" ]]; then FromServerID="$P4MASTER_ID" CreateFromServerID=1 CreateMasterServiceUser=1 msg "Server spec $FromServerID and service user will be created: svc_${FromServerID}" else errmsg "No server spec with Services value of 'commit-server' or 'standard' found., and P4MASTER_ID is not set.\\n" PreflightOK=0 fi fi fi msg "Checking if Protections table references group $ServiceUsersGroup." if [[ "$("$P4BIN" protects -g $ServiceUsersGroup -m)" == "super" ]]; then msg "Verified: Protections table grants super access to group $ServiceUsersGroup." else if [[ "$UpdateProtections" -eq 1 ]]; then msg "Protections does not grant access to group $ServiceUsersGroup as required, but '-p' was specified to mitigate this." else errmsg "Protections does not grant access to group $ServiceUsersGroup as required,\\nand '-p' not specified. Specify '-p' or adjust the Protections table to add\\nthis line near the bottom:\\n\\n\\tsuper group $ServiceUsersGroup * //...\\n" PreflightOK=0 fi fi # Version check: Require P4D 2018.1+ for using 'standby' replica. # shellcheck disable=SC2072 if [[ "$ReplicaType" == *"standby" && "$P4D_VERSION" > "2018.1" ]]; then msg "P4D is 2018.1+, as recommended for standby replicas." elif [[ "$P4D_VERSION" > "2016.2" ]]; then msg "P4D is 2016.2+, as supported for $ReplicaType replicas." else bail "P4D must be 2018.1+ if using journalcopy replicas, P4D_VERSION is $P4D_VERSION.." PreflightOK=0 fi # Version check: Require P4D 2018.2 for using 'ha' replica, i.e. a 2018.2-style # standby replica with the 'ReplicatingFrom:' field set. if [[ "$ReplicaTypeTag" == "ha"* ]]; then # shellcheck disable=SC2072 if [[ "$P4D_VERSION" > "2018.2" ]]; then msg "P4D is 2018.2+, as required for 'ha' type replicas that use 'p4 failover'." else errmsg "P4D must be 2018.2+ if using HA replicas, P4D_VERSION is $P4D_VERSION. Aborting." PreflightOK=0 fi fi if [[ "$PreflightOK" -eq 1 ]]; then msg "\\nPreflight checks passed. Continuing." else bail "\\nPreflight checks did not pass. Aborting." fi #------------------------------------------------------------------------------ # Preflight checks completed. Continue on! if [[ -n "$PreOpCmd" ]]; then msg "${H}\\nCustom Pre-Processing Phase: Executing custom pre-processing command:\\n\\t$PreOpCmd" if $PreOpCmd; then msg "\\nThe custom pre-processing command indicated success." else bail "\\nAlthough the standard preflight checks were successful, the custom pre-processing command indicated failure. Aborting." fi fi #-------------------------------------------------------------- msg "${H}\\nPhase 1.1: Define Server Spec." if [[ "$CreateFromServerID" -eq 1 ]]; then msg "Creating required master server spec [$FromServerID]." ServerSpecFile="${TmpDir}/${FromServerID}.server.p4s" echo -e "ServerID: $FromServerID\\n Type: server\\n Name: $FromServerID\\n Services: commit-server\\n Description:\\n\\tMaster server." > "$ServerSpecFile" || bail "Failed to initialize server spec file [$ServerSpecFile]." if [[ "$NO_OP" -eq 0 ]]; then "$P4BIN" -s server -i < "$ServerSpecFile" ||\ bail "Failed to load server spec from file: $ServerSpecFile\\n$(cat "$ServerSpecFile")\\n" else msg "NO_OP: Would run: $P4BIN -s server -i .LT. ${ServerSpecFile##*/}\\nContents of ${ServerSpecFile##*/}:\\n$(grep -v '^#' "$ServerSpecFile")\\n" fi fi ServerSpecFile="$TmpDir/$ServerSpec.server.p4s" if [[ "$P4PORT" =~ ^ssl[46]*: ]]; then SSLPrefix="${P4PORT%%:*}:" else SSLPrefix= fi # Strip off ssl: and host: element from front of value to just leave numeric port. ReplicaPortNum=${P4MASTERPORT##*:} if [[ "$ReplicaTypeTag" == "ha"* ]]; then echo -e "ServerID: $ServerSpec\\n Type: server\\n Name: $ServerSpec\\n Options: nomandatory\\n ReplicatingFrom: $FromServerID\\n Services: $ReplicaType\\n ExternalAddress: ${SSLPrefix}${ReplicaHost}:${ReplicaPortNum}\\n Description:" > "$ServerSpecFile" || bail "Failed to initialize server spec file [$ServerSpecFile]." msg "The server spec $ServerSpec may be configured to use the 'mandatory' setting in the Options field if desired. However, it must initially configured as 'nomandatory' to prevent undue stalling of a global topology while a fresh new standby replica gets caught up. As of P4D 2019.1, new standby replicas cannot be made 'mandatory' until they are online. After this replica is brought online and is seen to be replicating properly (and up-to-date), consider making it a 'mandatory' replica. A 'mandatory' replica is one that you can trust is at least as current as all other replicas. This helps ensure a smooth failover from the master server. However, if the 'mandatory' replica fails, it cannot be easily ignored -- if it stalls, the global topology stalls. Should that ever occur, you can modify the server spec manually on the master server, changing the 'mandatory' value to 'nomandatory'. That will enable global replication to move on. It should then be a high priority to figure out what went wrong with the standby replica so that it can be brought back online.\\n" elif [[ "$ReplicaType" == *"standby" ]]; then echo -e "ServerID: $ServerSpec\\n Type: server\\n Name: $ServerSpec\\n Options: nomandatory\\n ReplicatingFrom: $FromServerID\\n Services: $ReplicaType\\n ExternalAddress: ${SSLPrefix}${ReplicaHost}:${ReplicaPortNum}\\n Description:" > "$ServerSpecFile" || bail "Failed to initialize server spec file [$ServerSpecFile]." else echo -e "ServerID: $ServerSpec\\n Type: server\\n Name: $ServerSpec\\n Services: $ReplicaType\\n ExternalAddress: ${SSLPrefix}${ReplicaHost}:${ReplicaPortNum}\\n Description:" > "$ServerSpecFile" || bail "Failed to initialize server spec file [$ServerSpecFile]." fi case "$ReplicaTypeTag" in (ha) Desc="High Availability Standby Replica (Unfiltered) in ${SiteTag^^}.";; (ham) Desc="High Availability Metadata-only Standby Replica (Unfiltered) in ${SiteTag^^}.";; (ro) Desc="Read-Only Standby Replica (Unfiltered) in ${SiteTag^^}.";; (rom) Desc="Read-Only Standby Replica (Unfiltered, Metadata Only) in ${SiteTag^^}.";; (fr) Desc="Forwarding Replica (Unfiltered) in ${SiteTag^^}.";; (fs) Desc="Forwarding Standby Replica (Unfiltered) in ${SiteTag^^}.";; (frm) Desc="Forwarding Replica (Unfiltered, Metadata Only) in ${SiteTag^^}.";; (fsm) Desc="Forwarding Standby Replica (Unfiltered, Metadata Only) in ${SiteTag^^}.";; (ffr) Desc="Filtered Forwarding Replica in ${SiteTag^^}.";; (edge) Desc="Edge server in ${SiteTag^^}.";; (*) bail "\\nInternal Error: Unrecognized replica type tag [$ReplicaTypeTag].";; esac echo -e "\\t$Desc\\n" >> "$ServerSpecFile" || bail "Failed to complete server spec file [$ServerSpecFile]." msg "Creating server spec $ServerSpec with these contents:" msg "${H}" cat "$ServerSpecFile" msg "${H}" if [[ "$NO_OP" -eq 0 ]]; then "$P4BIN" -s server -i < "$ServerSpecFile" ||\ bail "Failed to load server spec from file: $ServerSpecFile\\n$(cat "$ServerSpecFile")\\n" else msg "NO_OP: Would run: $P4BIN -s server -i .LT. ${ServerSpecFile##*/}\\nContents of ${ServerSpecFile##*/}:\\n$(grep -v '^#' "$ServerSpecFile")\\n" fi #-------------------------------------------------------------- msg "${H}\\nPhase 1.2: Set configurables." ServiceUser="svc_${ServerSpec}" ServiceUserSpecFile="${TmpDir}/${ServiceUser}.user.p4s" declare -i ConfigureOK=1 if [[ "$FromServerID" == "$P4MASTER_ID" ]]; then TargetPort="$P4MASTERPORT" else TargetPort="$FromServerP4PORT" fi run "$P4BIN configure set $ServerSpec#P4TARGET=$TargetPort" || ConfigureOK=0 run "$P4BIN configure set $ServerSpec#db.replication=readonly" || ConfigureOK=0 run "$P4BIN configure set $ServerSpec#rpl.forward.all=1" || ConfigureOK=0 run "$P4BIN configure set $ServerSpec#rpl.compress=4" || ConfigureOK=0 run "$P4BIN configure set $ServerSpec#server=4" || ConfigureOK=0 run "$P4BIN configure set $ServerSpec#monitor=2" || ConfigureOK=0 run "$P4BIN configure set $ServerSpec#serviceUser=$ServiceUser" || ConfigureOK=0 if [[ "$ReplicaType" == *"standby" ]] ; then run "$P4BIN configure set $ServerSpec#rpl.journalcopy.location=1" || ConfigureOK=0 fi run "$P4BIN configure set $ServerSpec#journalPrefix=$JournalPrefix" || ConfigureOK=0 # For 'journalcopy' replicas, i.e. those with a Services value of *'standby', # startup.1 is the 'journalcopy' command to pull the raw P4JOURNAL file from # the P4TARGET server, and startup.2 is a 'pull' command with the -L' flag # to replay P4JOURNAL records into the db. # With the SDP, the pulled journal appears as a file $LOGS/journal.N, where N # is the journal counter. The rpl.journalcopy.location=1 setting enables this # desired behavior. # For non-journalcopy replicas (including any filtered replicas, including # edge servers that are filtered by nature), startup.1 is a pull # command that both pulls journal chunks and replays them into the database. if [[ "$NO_OP" -eq 0 ]]; then if [[ "$Desc" == *"Standby"* ]]; then vmsg "Executing: $P4BIN configure set $ServerSpec#startup.1='journalcopy -i 0'" # shellcheck disable=SC2140 "$P4BIN" configure set "$ServerSpec#startup.1"="journalcopy -i 0" || ConfigureOK=0 vmsg "Executing: $P4BIN configure set $ServerSpec#startup.2='pull -i 1 -L'" # shellcheck disable=SC2140 "$P4BIN" configure set "$ServerSpec#startup.2"="pull -i 1 -L" || ConfigureOK=0 StartupCmdNumFirst=3 StartupCmdNumLast=7 else vmsg "Executing: $P4BIN configure set $ServerSpec#startup.1='pull -i 1'" # shellcheck disable=SC2140 "$P4BIN" configure set "$ServerSpec#startup.1"="pull -i 1" || ConfigureOK=0 StartupCmdNumFirst=2 StartupCmdNumLast=6 fi else if [[ "$Desc" == *"Standby"* ]]; then vmsg "NO_OP: Would execute: $P4BIN configure set $ServerSpec#startup.1=\"journalcopy -i 0\"" vmsg "NO_OP: Would execute: $P4BIN configure set $ServerSpec#startup.2=\"pull -i 1 -L\"" StartupCmdNumFirst=3 StartupCmdNumLast=7 else vmsg "NO_OP: Would execute: $P4BIN configure set $ServerSpec#startup.1=\"pull -i 1\"" StartupCmdNumFirst=2 StartupCmdNumLast=6 fi fi if [[ "$MetadataOnly" -eq 0 ]]; then run "$P4BIN configure set $ServerSpec#lbr.replication=readonly" || ConfigureOK=0 for i in $(seq $StartupCmdNumFirst $StartupCmdNumLast); do if [[ "$NO_OP" -eq 0 ]]; then vmsg "Executing: $P4BIN configure set $ServerSpec#startup.$i='pull -i 1 -u'" # shellcheck disable=SC2140 "$P4BIN" configure set "$ServerSpec#startup.$i"="pull -i 1 -u" || ConfigureOK=0 else vmsg "NO_OP: Would execute: $P4BIN configure set $ServerSpec#startup.1='pull -i 1'" fi done else run "$P4BIN configure set $ServerSpec#lbr.replication=shared" || ConfigureOK=0 fi if [[ $ConfigureOK -eq 1 ]]; then msg "Verified: All configurables were set OK." run "$P4BIN configure show allservers" "Showing all persistent configurables." 0 1 0 else bail "Errors encountered setting configurables. See the output above. Aborting." fi #-------------------------------------------------------------- msg "${H}\\nPhase 1.3: Create replica service user $ServiceUser." echo -e "User: $ServiceUser\\n Email: ${MAILFROM#\#}\\n FullName: Replication Server User for $ServerSpec\\n Type: service\\n AuthMethod: perforce\\n" > "$ServiceUserSpecFile" || bail "Failed to initialize user spec file [$ServiceUserSpecFile]." vmsg "Contents of $ServiceUserSpecFile:" vmsg "${H}" [[ $VERBOSITY -gt 2 ]] && cat "$ServiceUserSpecFile" vmsg "${H}" if [[ "$NO_OP" -eq 0 ]]; then "$P4BIN" user -f -i < "$ServiceUserSpecFile" || \ bail "Failed to load user spec from file: $ServiceUserSpecFile\\n$(cat "$ServiceUserSpecFile")\\n" else msg "NO_OP: Would run: $P4BIN user -f -i .LT. ${ServiceUserSpecFile##*/}:\\nContents of ${ServiceUserSpecFile##*/}:\\n$(grep -v '^#' "$ServiceUserSpecFile")\\n" fi ServicePasswdFile="$TmpDir/.p4passwd.$P4SERVER.$ServiceUser" touch "$ServicePasswdFile" || bail "Failed to initialize password file $ServicePasswdFile." if [[ -n "$(command -v sha256sum)" ]]; then RandomPassword=$(date +%s | sha256sum | base64 | head -c 32) elif [[ -n "$(command -v md5sum)" ]]; then RandomPassword=$(date +%s | md5sum | base64 | head -c 32) else RandomPassword=$(date +%s | sum | base64 | head -c 32) fi chmod 600 "$ServicePasswdFile" echo "$RandomPassword" > "$ServicePasswdFile" echo "$RandomPassword" >> "$ServicePasswdFile" if [[ "$NO_OP" -eq 0 ]]; then msg "Setting password for service user $ServiceUser." "$P4BIN" passwd "$ServiceUser" < "$ServicePasswdFile" else msg "NO_OP: Would run: $P4BIN passwd $ServiceUser .LT. $ServicePasswdFile" fi if [[ "$CreateMasterServiceUser" -eq 1 ]]; then MasterServiceUser=svc_${FromServerID} MasterServiceUserSpecFile="${TmpDir}/${MasterServiceUser}.user.p4s" msg "Creating service $MasterServiceUser for master server spec." echo -e "User: $MasterServiceUser\\n Email: ${MAILFROM#\#}\\n FullName: Server User for $FromServerID\\n Type: service\\n AuthMethod: perforce\\n" > "$MasterServiceUserSpecFile" ||\ bail "Failed to initialize user spec file [$MasterServiceUserSpecFile]." vmsg "Contents of $MasterServiceUserSpecFile:" vmsg "${H}" [[ $VERBOSITY -gt 2 ]] && cat "$MasterServiceUserSpecFile" vmsg "${H}" if [[ "$NO_OP" -eq 0 ]]; then "$P4BIN" user -f -i < "$MasterServiceUserSpecFile" || \ bail "Failed to load user spec from file: $MasterServiceUserSpecFile\\n$(cat "$MasterServiceUserSpecFile")\\n" else msg "NO_OP: Would run: $P4BIN user -f -i .LT. ${MasterServiceUserSpecFile##*/}:\\nContents of ${MasterServiceUserSpecFile##*/}:\\n$(grep -v '^#' "$MasterServiceUserSpecFile")\\n" fi MasterServicePasswdFile="$TmpDir/.p4passwd.$P4SERVER.$MasterServiceUser" touch "$MasterServicePasswdFile" ||\ bail "Failed to initialize password file $MasterServicePasswdFile." chmod 600 "$MasterServicePasswdFile" echo "$RandomPassword" > "$MasterServicePasswdFile" echo "$RandomPassword" >> "$MasterServicePasswdFile" if [[ "$NO_OP" -eq 0 ]]; then msg "Setting password for master service user $MasterServiceUser." "$P4BIN" passwd "$MasterServiceUser" < "$MasterServicePasswdFile" else msg "NO_OP: Would run: $P4BIN passwd $MasterServiceUser .LT. $MasterServicePasswdFile" fi fi #-------------------------------------------------------------- msg "${H}\\nPhase 1.4: Make replica service user a super user with unlimited timeout." msg "Checking if Protections table references group $ServiceUsersGroup." if [[ "$UpdateProtections" -eq 1 ]]; then msg "Adding protections table entry to reference group $ServiceUsersGroup." "$P4BIN" protect -o | grep -v '^#' | grep -v '^Update:' > "$ProtectsFile" ||\ bail "Failed to dump protections to tmp file: $ProtectsFile" echo -e "\\tsuper group $ServiceUsersGroup * //..." >> "$ProtectsFile" ||\ bail "Failed to update file: $ProtectsFile" vmsg "Contents of updated $ProtectsFile:" vmsg "${H}" [[ $VERBOSITY -gt 3 ]] && cat "$ProtectsFile" vmsg "${H}" if [[ "$NO_OP" -eq 0 ]]; then "$P4BIN" protect -i < "$ProtectsFile" ||\ bail "Failed to load updated Protections table from file: $ProtectsFile" else msg "NO_OP: Would run: $P4BIN protect -i .LT. ${ProtectsFile##*/}" fi fi msg "Checking if service user $ServiceUser is in service users group $ServiceUsersGroup." # shellcheck disable=SC2143 if [[ -n $("$P4BIN" groups "$ServiceUser" | grep "^$ServiceUsersGroup$") ]]; then msg "Verified: Service user $ServiceUser is in service users group $ServiceUsersGroup." else # This logic will create the group spec for service users if it does not already exist, # or add our new service user to the group if it already exists. The 'p4 group -o' # command generates a valid group spec whether the spec actually exists on the server or # not. if [[ "$CreateMasterServiceUser" -eq 1 ]]; then msg "Adding service users $ServiceUser and $MasterServiceUser to group $ServiceUsersGroup." "$P4BIN" group -o "$ServiceUsersGroup" | grep -v '^#' |\ sed "s:43200:unlimited:g;\$ s/.*/\t$ServiceUser\n\t$MasterServiceUser/" > "$GroupSpecFile" ||\ bail "Failed to update group spec file: $GroupSpecFile" else msg "Adding service user $ServiceUser to group $ServiceUsersGroup." "$P4BIN" group -o "$ServiceUsersGroup" | grep -v '^#' |\ sed "s:43200:unlimited:g;\$ s/.*/\t$ServiceUser/" > "$GroupSpecFile" ||\ bail "Failed to update group spec file: $GroupSpecFile" fi vmsg "Contents of $GroupSpecFile:" vmsg "${H}" [[ $VERBOSITY -gt 3 ]] && cat "$GroupSpecFile" vmsg "${H}" if [[ "$NO_OP" -eq 0 ]]; then "$P4BIN" -s group -i < "$GroupSpecFile" ||\ bail "Failed to load group spec from file: $GroupSpecFile\\n$(cat "$GroupSpecFile")\\n" else msg "NO_OP: Would run: $P4BIN -s group -i .LT. ${GroupSpecFile##*/}\\nContents of ${GroupSpecFile##*/}:\\n$(grep -v '^#' "$GroupSpecFile")\\n" fi fi #-------------------------------------------------------------- if [[ $OverallReturnStatus -eq 0 ]]; then declare -i N=1 declare SampleCheckpoint= msg "${H}\\nAll automated processing in Phase 1 completed successfully.\\n\\n" if [[ -n "$PostOpCmd" ]]; then msg "\\nCustom Post-Processing Phase: Executing custom post-processing command:\\n\\t$PostOpCmd" if $PostOpCmd; then msg "\\nThe custom post-processing command indicated success." else errmsg "\\nAlthough the primary processing was successful, the custom post-mkrep command indicated failure." OverallReturnStatus=1 fi else msg "${H}\\nNow Phase 2, manual operations, can begin. This can occur immediately or or long\\nafter Phase 1. Use the most current checkpoints when Phase 2 is executed.\\n" msg "First, create a seed checkpoint. Every edge or replica needs an initial seed checkpoint.\\n" if [[ "$FromServerID" == "p4d_edge"* && "$ReplicaTypeTag" == "ha"* ]]; then msg "STEP $N. Login as ${OSUSER} on the server machine ${FromServerHost} where the edge server (ServerID=$FromServerID) is running." N+=1 else msg "STEP $N. Login as ${OSUSER} on the master server machine (${P4MASTERHOST})." N+=1 fi msg "STEP $N. Set your shell environment with:\\n\\tcd $P4CBIN\\n\\tsource p4_vars $SDPInstance\\n" N+=1 if [[ "$ReplicaTypeTag" == "ffr" ]]; then msg "STEP $N. Define replication filtering. If you choose to filter by using ArchiveDataFilter and/or ClientDataFilter fields of the server spec, make those changes:\\n\\tp4 server $ServerSpec\\n\\nAlternately, if you choose to filter by database table, use 'p4 configure' commands to modify the $ServerSpec#startup.<n> settings related to the ServerID, adding the '-T' flag to the single 'pull' startup command that pulls metadata.\\n" N+=1 fi if [[ "$ReplicaTypeTag" == "edge" || "$ReplicaTypeTag" == "ffr" ]]; then msg "STEP $N. Do a journal rotation to update offline_db:\\n\\trotate_journal.sh ${SDPInstance}\\n\\nThis should take only a few minutes, although may take longer for very large data sets (perhaps up to 30 minutes). The duration is driven by the time it takes to do a journal rotation and replay the rotated journal to the offline_db. This will typically be a small fraction of the duration of a checkpoint.\\n\\n" N+=1 fi if [[ "$ReplicaTypeTag" == "edge" ]]; then SampleCheckpoint="/p4/${SDPInstance}/checkpoints/p4_${SDPInstance}.${ServerSpec#p4d_}.seed.NNNN.gz" msg "STEP $N. As this is an edge server, create the special edge seed checkpoint:\\n\\tnohup edge_dump.sh ${SDPInstance} ${ServerSpec} < /dev/null > /dev/null 2>&1 &\\n" N+=1 msg "STEP $N. Monitor the log until successful completion:\\n\\ttail -f \$(ls -t \$LOGS/edge_dump.*.log|head -1)\\n" N+=1 elif [[ "$ReplicaTypeTag" == "ffr" ]]; then SampleCheckpoint="/p4/${SDPInstance}/checkpoints/p4_${SDPInstance}.${ServerSpec#p4d_}.seed.ckp.gz" msg "STEP $N. As this is a filtered replica, create the special filtered seed checkpoint:\\n\\tnohup p4d_${SDPInstance} -r /p4/${SDPInstance}/offline_db -P $ServerSpec -J off -Z -jd $SampleCheckpoint < /dev/null > \$LOGS/seed.$ServerSpec.log 2>&1 &\\n" N+=1 msg "STEP $N. Monitor the log until successful completion:\\n\\ttail -f \$LOGS/seed.${ServerSpec}.log\\n" N+=1 elif [[ "$FromServerID" == "p4d_edge"* && "$ReplicaTypeTag" == "ha"* ]]; then msg "STEP $N. As this a replica of an edge server, first take a checkpoint on the edge server by running this script:\\n\\trequest_replica_checkpoint.sh ${SDPInstance}\\n\\nThis will execute instantly, as it only requests a checkpoint on the next journal rotation of the master server, and does not directly start checkpoint processing." N+=1 msg "STEP $N. Login as ${OSUSER} on the master server machine ${P4MASTERHOST}." N+=1 msg "STEP $N. Do a journal rotation on the master to trigger the edge server to start\\nits checkpoint. This should take only a few minutes, although may take longer\\nfor very large data sets (perhaps up to 30 minutes). The duration is driven by\\nthe time it takes to do a journal rotation and replay the rotated journal to the\\noffline_db. This will typically be a small fraction of the duration of a checkpoint operation. Do the journal rotation with this command:\\n\\trotate_journal.sh ${SDPInstance}\\n\\n" N+=1 msg "STEP $N. Back on the server machine on which you are creating the edge\\ncheckpoint, monitor the checkpoint until completion. Do so by using the 'watch'\\ncommand to monitor an 'ls -lrt' command to observe the new checkpoint being\\ncreated, and looking out for the creation of a *.md5 file. When the new *.md5\\nfile is created, you know the checkpoint get created. The 'watch' command would look like:\\n\\twatch ls -lrt $JournalPrefix*\\n\\n" N+=1 else SampleCheckpoint="/p4/${SDPInstance}/checkpoints/p4_${SDPInstance}.ckp.NNNN.gz" msg "STEP $N. Create a new regular checkpoint to seed the replica. Execute this command:\\n\\tnohup daily_checkpoint.sh $SDPInstance < /dev/null > /dev/null 2>&1 &\\n\\nNote: This step can be skipped if you choose to wait until the next regular daily checkpoint is created before proceeding on to PART 2." N+=1 msg "STEP $N. Monitor the checkpoint.log file until successful completion:\\n\\ttail -f \$LOGS/checkpoint.log\\n" N+=1 fi msg "\\n=== PART 2 - Load Checkpoint on Replica ===\\n" msg "STEP $N. Login as ${OSUSER}@${ReplicaHost}." N+=1 msg "STEP $N. Set your environment with:\\n\\tcd /p4/common/bin\\n\\tsource p4_vars $SDPInstance\\n" N+=1 if [[ "$ReplicaTypeTag" == "edge" ]]; then msg "STEP $N. Copy the edge seed checkpoint file created in the steps above from\\n${P4MASTERHOST}:${CHECKPOINTS}. Successfully completed checkpoint files have a\\ncorresponding *.md5 file which must also be copied. That might look like:\\n\\tcd $CHECKPOINTS\\n\\tscp -p $P4MASTERHOST:${SampleCheckpoint/gz/gz.md5} .\\n\\tscp -p ${P4MASTERHOST}:${SampleCheckpoint} .\\n\\nReplace NNNN with the appropriate journal counter number.\\n" N+=1 elif [[ "$ReplicaTypeTag" == "ffr" ]]; then msg "STEP $N. Copy the filtered replica seed checkpoint file and created in the steps above from\\n${P4MASTERHOST}:${CHECKPOINTS}. Successfully completed checkpoint files have a corresponding\\n*.md5 file which must also be copied. That might look like:\\n\\tcd $CHECKPOINTS\\n\\tscp -p ${P4MASTERHOST}:${SampleCheckpoint}.md5 .\\n\\tscp -p ${P4MASTERHOST}:${SampleCheckpoint} .\\n" N+=1 elif [[ "$FromServerID" == "p4d_edge"* && "$ReplicaTypeTag" == "ha"* ]]; then SampleCheckpoint="/p4/$SDPInstance/checkpoints.${FromServerID#p4d_}/p4_${SDPInstance}.${FromServerID#p4d_}.ckp.NNN.gz" msg "STEP $N: Copy the checkpoint from the edge server machine ($FromServerHost)\\nto the new edge HA machine. As $OSUSER@{ReplicaHost}, that might look like:\\n\\tcd /p4/$SDPInstance/checkpoints.${FromServerID#p4d_}\\n\\tscp -p ${FromServerHost}:${SampleCheckpoint/gz/md5} .\\n\\tscp -p ${FromServerHost}:${SampleCheckpoint} .\\n\\n" N+=1 else msg "STEP $N. Copy the latest regular checkpoint file created in the steps above from\\n${P4MASTERHOST}:${CHECKPOINTS}. Successfully completed checkpoint files have a corresponding\\n*.md5 file which must also be copied. That might look like this:\\n\\tcd \$CHECKPOINTS\\n\\tscp -p ${P4MASTERHOST}:${SampleCheckpoint/gz/md5} .\\n\\tscp -p ${P4MASTERHOST}:${SampleCheckpoint} .\\n\\nReplace NNNN with the appropriate journal counter number.\\n" N+=1 fi msg "STEP $N. Create $P4ROOT/server.id file like so:\\n\\techo $ServerSpec > $P4ROOT/server.id\\n" N+=1 if [[ "$ReplicaTypeTag" == "ha"* && "$FromServerID" != "p4d_edge"* ]]; then msg "STEP $N. As this machine is a potential target for a 'p4 failover' from the master, it will need a license\\nfile. The IP address in the license file should match that returned by running\\nthe command 'hostname -I' on the replica server machine. The license file should\\nbe copied to this file on that machine: $P4ROOT/license\\n" N+=1 fi msg "STEP $N. Verify that you have enough disk space, e.g. with:\\n\\tdf -h $P4ROOT\\n\\nAt least 30x (zipped checkpoint size) is recommended.\\n" N+=1 msg "STEP $N: Login super super user and replication service user to P4TARGET server, like so:\\n\\tp4 -p $TargetPort login -a < $SDP_ADMIN_PASSWORD_FILE\\n\\tp4 -p $TargetPort login $ServiceUser\\n\\nNote: Do not try to use the SDP p4login script, as tries to detect the service user from data in P4ROOT which isn't there yet.\\n" N+=1 msg "STEP $N. Load the checkpoint like so:\\n\\tnohup load_checkpoint.sh $SampleCheckpoint -i ${SDPInstance} -y < /dev/null > /dev/null 2>&1 &\\n" N+=1 msg "STEP $N. Monitor the log until successful completion:\\n\\ttail -f \$(ls -t \$LOGS/load_checkpoint.*.log|head -1)\\n" N+=1 if [[ "$MetadataOnly" -eq 0 ]]; then msg "STEP $N. OPTIONAL: Kick off a verify to pull over archive files:\\n\\tnohup p4verify.sh $SDPInstance < /dev/null > /dev/null 2>&1 &\\n\\nWait about one minute, then check that it is off to a good start:\\n\\ttail \$LOGS/p4verify.log. That may run for a long while depending on the scale of the versioned file tree." N+=1 fi fi else msg "${H}\\nProcessing completed, but with errors. Scan above output carefully.\\n" fi # Illustrate using $SECONDS to display runtime of a script. msg "That took $((SECONDS/3600)) hours $((SECONDS%3600/60)) minutes $((SECONDS%60)) seconds.\\n" # See the terminate() function, which is really where this script exits. exit "$OverallReturnStatus"
# | Change | User | Description | Committed | |
---|---|---|---|---|---|
#76 | 30419 | C. Thomas Tyler | Restored mkrep.sh#74 so Mark can continue work on '-p4config' option. | ||
#75 | 30376 | C. Thomas Tyler |
Made '-p4config' an undoc feature for now as testing is still in progress and we're shipping SDP r2024.1 now-ish. It has been regression tested and existing functionality works as expected. #review-30377 @mark_zinthefer |
||
#74 | 30355 | C. Thomas Tyler | Fixed usage message typo propagated from similar typo in p4verify.sh. | ||
#73 | 30354 | C. Thomas Tyler | Fixed quoting issue with P4BIN assignment in mkrep.sh. | ||
#72 | 30351 | C. Thomas Tyler |
In mkrep.sh, added some guardrails on setting of 'monitor' and 'serveri' configurables, deferring to commit server values if those are good enough. #review-30352 |
||
#71 | 30345 | Mark Zinthefer |
Added code to handle p4config files. No testing done yet. |
||
#70 | 30321 | C. Thomas Tyler | Fixed unbound variable error in mkrep.sh | ||
#69 | 30241 | C. Thomas Tyler |
mkrep.sh now adds ReplicatingFrom: field to all generated server specs. Previously the ReplicatingFrom: field was only added where required by p4d, i.e. for standby type replicas. However, it's useful to have for all server specs. Driving reason: This is now required for HMS orchestrated global topology upgrades, as it uses the ReplicatingFrom: field of the server spec to determine the outer-to-inner order of upgrades. |
||
#68 | 30238 | C. Thomas Tyler | mkrep.sh: Adapted to use documented usage tip for load_checkpoint.sh; use load.log. | ||
#67 | 30155 | C. Thomas Tyler |
mkrep.sh v3.2.2: Fixed doc typos. Non-functional change. Thanks, Andrei! |
||
#66 | 30012 | C. Thomas Tyler |
mkrep.sh v3.2.1: * Enhanced error handling if 2+ 'standard' or 'commit' server specs exist. * Checks for and gives error for condition where P4MASTER_ID is wrong. #review @robert_cowham @mark_zinthefer |
||
#65 | 29533 | C. Thomas Tyler |
Corrected journalPrefix for a standard 'ha' replica in new logic that better handles 'ham' replicas. Fix to unreleased dev-branch version. |
||
#64 | 29470 | C. Thomas Tyler |
Fixed bad ServerID if '-f <FromServerID>' was specified for master ServerID. Added preflight check for running on commit server (P4MASTER_ID == SERVERID). Fixed bug with infer_type_tag() needed if '-f <FromServerID>' is not edge. Simplified docs for '-f' option. #review-29471 @lee_marzke |
||
#63 | 29336 | C. Thomas Tyler |
Tweaks to output (error from copy/paste). This is a harmless non-functional change. |
||
#62 | 29303 | C. Thomas Tyler |
mkrep.sh v3.1.0: * Added infer_type_tag() function with logic to determine if P4TARGET is filtered or not. * Discouraged use of 'ro' and 'rom' types (as they don't support 'p4 failover'). * Clarified usage to indicate daisy chaining is acceptable from forwarding replicas. * Added preflight check to avoid accidental overwrite of existing server spec. * Added '-os' option to overwrite existing server spec intentionally. * Re-added test for mkrep.sh to default test suite after fixing the test. * Added '-N' option to fully support server spec naming standard that allows for things like 'p4d_edge2_syd' and 'p4d_edge3_syd'. Added docs for same. #review-29304 |
||
#61 | 28619 | C. Thomas Tyler |
Fixed bug in mkrep.sh SSL detection logic. #review-28620 |
||
#60 | 28595 | C. Thomas Tyler | For mkrep.sh: Added hooks to support custom pre- and post-processing. | ||
#59 | 28535 | C. Thomas Tyler |
Fixed typo ('super super') and added some missing dots in the instructions for calling scp. |
||
#58 | 28207 | C. Thomas Tyler |
mkrep.sh no longer requires the '-i <SDP_Instance>' parameter if the SDP_INSTANCE variable is defined. #review-28205 |
||
#57 | 28195 | C. Thomas Tyler | Refined location of SiteTags.cfg.sample file. | ||
#56 | 28193 | C. Thomas Tyler |
Renamed sample files (e.g. SiteTags.cfg) in SDP tarball tree, appending a .sample tag, to make rsync overlay of /p4/common/config safe. Updated related docs in mkrep.sh referring to the sample file, to improve an error message to guide the user to find the sample file. #review-28194 |
||
#55 | 27987 | C. Thomas Tyler |
Doc correction. Non-functional change. |
||
#54 | 27897 | C. Thomas Tyler |
Fixed typo in last change. #review-27898 @d_benedict |
||
#53 | 27895 | C. Thomas Tyler |
Clarified intent of '-r' flag value in docs. This is a doc-only change. #review-27896 @d_benedict |
||
#52 | 27894 | C. Thomas Tyler |
Non-functional internal comment correction in mkrep.sh. Comments implied that HA replicas must be configured as 'Mandatory' (i.e. using the 'mandatory' value in the 'Options:' field of the server spec. The use of 'mandatory' is a choice with trade-offs, not an absolute. |
||
#51 | 27722 | C. Thomas Tyler |
Refinements to @27712: * Resolved one out-of-date file (verify_sdp.sh). * Added missing adoc file for which HTML file had a change (WorkflowEnforcementTriggers.adoc). * Updated revdate/revnumber in *.adoc files. * Additional content updates in Server/Unix/p4/common/etc/cron.d/ReadMe.md. * Bumped version numbers on scripts with Version= def'n. * Generated HTML, PDF, and doc/gen files: - Most HTML and all PDF are generated using Makefiles that call an AsciiDoc utility. - HTML for Perl scripts is generated with pod2html. - doc/gen/*.man.txt files are generated with .../tools/gen_script_man_pages.sh. #review-27712 |
||
#50 | 27640 | C. Thomas Tyler |
Clarify -f flag is only required for edge servers as upstream. #review-27612 |
||
#49 | 27634 | C. Thomas Tyler | Fixed typo in doc; non-functional change. | ||
#48 | 27633 | C. Thomas Tyler |
Tweak in error message; changed from: So if '-f <FromServerID>' is specified with '-t ha' to: So if '-f <FromServerID>' is specified with '-t ha[m]' The error message references the '-t ha', but could also be '-t ham' (for a metadata-only HA replica). |
||
#47 | 27266 | C. Thomas Tyler |
mkrep.sh v2.7.2: * Added smarts to drop info about pulling archvie files if replica is setup as a metadata-only replica. * Fixed grammar error in instructions. |
||
#46 | 27265 | C. Thomas Tyler |
mkrep.sh v2.7.1: * Fixed issue with setting P4TARGET. * Fixed doc typos. * Corrected bad use of 'mandatory'. |
||
#45 | 27250 | C. Thomas Tyler |
Adjusted JournalPrefix standard to account for shared /hxdepots. The JournalPrefix standard now allows for unfiltered replicas (such as HA/DR replicas) to use same journalPrefix value as filtered replicas and edge servers, using per-ServerID checkpoints folder, if they share the same /hxdepots (e.g. NFS-mounted) with the master (e.g. when lbr.replication=shared). Related code change made to mkdirs.sh and mkrep.sh to support the tweaks to the standard. #review-27251 |
||
#44 | 27103 | C. Thomas Tyler |
Removed 'Mandatory' from Description in ServerID in mkrep.sh. The word 'Mandatory' was removed from the generated Description field of the server spec for the HA server. The use of 'Mandatory' is a valid choice, but since the Description field isn't updated dynamically and 'Mandatory' isn't a required part of the nature of an HA replica, it was removed from the Description. |
||
#43 | 27071 | C. Thomas Tyler |
Added Example 3, creating a replica of an edge server. Now fully supports workflow for creating a replica of an edge server. Fixed issue where P4TARGET was set incorrectly when '-f <FromServerID>' is specified. Updated output to more clearly delinate when automated processing completes and when humans pick up the baton. Updated generated guidance to include steps needed when creating a replica of an edge server. General doc enhancements, including adding REPLICA SERVER MACHINE SETUP section. #review-27072 |
||
#42 | 27066 | C. Thomas Tyler |
mkrep.sh v2.5.2: * Corrected guidance suggesting logging a service user with 'p4 login -a'; the '-a' flag doesn't apply to service users. Also added a note on why the SDP 'p4login' can't be used in that step. |
||
#41 | 27053 | C. Thomas Tyler | Corrected Verion id issue. | ||
#40 | 27050 | C. Thomas Tyler |
mkrep.sh v2.5.0: * Added doc references to SDP Server Spec and journalPrefix standards. * Updated journalPrefix value definition to follow the standard. * Removed '-si' "Silent Mode" option, a carry over from the template, as there is no need for it in this script. * Removed display of current SiteTags from '-man' output as it won't generate relibly for AsciiDoc. * Removed '-ssh_opts' and '-skip_ssh' flags from doc. * Removed 'mandatory' from definition of HA replica. This is now deemed a valid conifuration, but not part of the definition of HA. * Removed extraneous text related to 'full setup mode' (from a custom installed version). [A future full setup may be added relying on the Helix Installer.] * Removed including of backup_functions.sh, and dropped logic handled better by the 'p4login' script. * Generated passwords for service users are now discareded. * Fixed various bugs in error messages. To Do in a follow-on changelist: * Update test_MultiSDP.py (which tets mkrep.sh) #review-27051 |
||
#39 | 27010 | C. Thomas Tyler |
Fixed issue generating man page on Mac where 'sha256sum' is not as readily available. Since this is just used to generate a random password, fallback logic is now used: 'sha256sum' is used if available, else 'md5sum' is used if it is afailable, else 'sum' is used as a last resort. |
||
#38 | 26718 | Robert Cowham |
Rename P4MASTER to P4MASTERHOST for clarity with comments in: - mkdirs.cfg/mkdirs.sh - p4_<instance>.vars - other files which reference Remove unnecessary sed for p4p.template |
||
#37 | 26637 | Robert Cowham |
Include script help within doc Requires a couple of tags in the scripts themselves. |
||
#36 | 26445 | C. Thomas Tyler |
mkrep.sh v2.4.x: * ServerID for master server spec now created if needed. * Service user for master ServerID created and password set if needed. * Enhanced preflight checks to do all checks before bailing. * Moved check for ServiceUsers group in Protections into preflight. |
||
#35 | 26381 | C. Thomas Tyler |
mkrep.sh v2.3.2: * Added '-f <From_ServerID>' flag to specify the ServerID from which to replicate, a value used for the ReplicatingFrom: field. * Added safety check to check that if '-t ha' is used with '-f', the value for '-f' refers to an edge server. * Added logic to replace the default value for ReplicatingFrom, replacing naming-convention based logic with logic to dynamically determine the value. This is done by finding the server spec with Services of 'commit-server' or or 'standard', and using it. Or, if that bares no fruit, doing 'p4 info' on the current server to extract the ServerID. * Enhancements to guidance info for creating edge servers and filtered forwarding replicas, with details on using edge_dump.sh or generating filtered replica checkpoints. * Doc content and formatting improvements. * Maintained shellcheck compliance. |
||
#34 | 26139 | Robert Cowham | Allow mkrep.sh to create a replica from a replica. | ||
#33 | 26089 | Robert Cowham |
Fix message which refers to p4d instead of p4 for login. Means it can't be copy/pasted currently. |
||
#32 | 25800 | C. Thomas Tyler |
mkrep.sh will no longer allow a standby replica to initially be 'mandtory' on startup, as required by p4d 2019.1. If it was intended to be mandatory (i.e. has an 'ha' in the ServerID), a new warning message id displayed describing why that converting it to be mandatory should be considered after it is caught up, and why. #review @michael_shields |
||
#31 | 25796 | C. Thomas Tyler |
mkrep.sh will no longer allow a standby replica to initially be 'mandtory' on startup, as required by p4d 2019.1. If it was intended to be mandatory (i.e. has an 'ha' in the ServerID), a new warning message id displayed describing why that converting it to be mandatory should be considered after it is caught up, and why. #review @michael_shields |
||
#30 | 25567 | Robert Cowham | Remove -b 5 on journalcopy threads - not required | ||
#29 | 25561 | Robert Cowham | Fix minor typos | ||
#28 | 25557 | Robert Cowham | Fix failing test | ||
#27 | 25550 | C. Thomas Tyler |
Split HMS from SDP, such that HMS will live on as a separate product, layered on the SDP. See: https://swarm.workshop.perforce.com/jobs/SDP-356 Many of the files deleted here have already been copied to HMS. See these HMS changes: https://swarm.workshop.perforce.com/changes/25531 https://swarm.workshop.perforce.com/changes/25533 And this branch spec: SDP_Split_2019.2_HMS In mkrep.sh, HMS-awareness is removed from mkrep.sh, a core SDP script, and the '-f' (Full Setup) flag has been temporarily removed. This useful functionality will be restored in an HMS script. This is a big change, so shellcheck v0.6.0 was implemented to support it. |
||
#26 | 25258 | Robert Cowham |
Rework the containers in preparation for multi container testing mkrep changes: Remove -c cfg option which was unused anyway Converted tabs to spaces Fixed logic error causing forwarding replicas to bail due to unsupported p4d version Standby servers require ReplicatingFrom field Add seconds to log file name (useful for testing to avoid overwriting files) |
||
#25 | 25210 | Robert Cowham |
Bail on server error #review @tom_tyler |
||
#24 | 25204 | Robert Cowham |
Fix problem with creating server spec #review @tom_tyler |
||
#23 | 25161 | C. Thomas Tyler |
Tweaked replication per input from Perforce Engineering: The 'startup.2' command for a standby replica that does a 'p4 pull -i 1 -L' is a local operation, so the '-b 5' network retry flags were nonsensical and have been removed. #review-25162 @michael_shields |
||
#22 | 25156 | C. Thomas Tyler |
Fixed logic issue handling replicas, so P4D2018.2+ is required only for 'ha' type replicas. Also tweaked to ensure ReplicationFrom: and Options: fields are not applied to server specs for possibly older P4D versions that don't support them. #review-25157 @robert_cowham |
||
#21 | 25152 | Robert Cowham | If bailing then include line no of error from calling script | ||
#20 | 24869 | C. Thomas Tyler |
Various enhancements to mkrep.sh, including adding support for 2018.2 'ha' failover replicas. Moved default location for mkrep.cfg to /p4/common/config. Now references global SDP variable $SDP_ADMIN_PASSWORD_FILE to define location of the admin password file. |
||
#19 | 24248 | C. Thomas Tyler |
Removed all references to journals.rep value for journalPrefix for journalcopy replicas. P4D 2018.1+ is now required for creating standby replicas. Note that the SDP validator (new in a coming SDP release) will help spot journals.rep usage that should be converted when upgrading to 2018.1 fo P4D/SDP. To Do: Rewite the 'seed' stuff to use mkdirs.sh instead. |
||
#18 | 24243 | C. Thomas Tyler |
Enhanced mkrep.sh handling for journalcopy/standby replicas. Changed lbr.replication value for metadata-only replicas from 'ondemand' to 'shared'. There is no behavior change as the those are synonyms, but 'shared' more clearly conveys the intent. Adjusted number of pull threads to be 5 for all full replicas (i.e. not metadata-only replicas). |
||
#17 | 23885 | C. Thomas Tyler | Fixed typo detected in code review after submit. | ||
#16 | 23881 | C. Thomas Tyler |
Set rpl.journalcopy.location=1 for 2018.1+ standby replicas. #review @michael_shields |
||
#15 | 23522 | C. Thomas Tyler |
mkrep.sh tweaks: * Documented existing '-ssh_opts' option. * Implemented and documented new '-skip_ssh' option. * Enhanced cosmetics of showing current geographic site tags. * Upated Version to 2.0.2 reflect big changes with Chris Geen's last uptate, plus this small update. * Added command line parsing logic to make new '-skip_ssh' logic mutually exclusive with '-f' (full setup) flag. Bypassing pre-commit review; there are still more changes to make: * Make it so it benefits from 'Tight Ship' style of management if used (as it does not), but doesn't not introduce a dependency on it. The last change caused a dependency on Tight Ship managment; we want to remove that dependency, but keep the benefit of Tight Ship, if used. Tight Ship means using a '/p4/hms' SDP instance to version changes to things like SDP config files in /p4/common/config. #review-23523 @cgeen |
||
#14 | 23453 | cgeen |
Automation of the edge server creation with mkrep script. Two new variables into the script -f and -se. Details in the man. A valid HMS instance is needed to use the full automation, this is vallidated first. This works by doing a journal rotate and then syncing the last checkpoint over and the subsequent journals. Currently it only deals with new edge servers and not replicas or edge replicas. Reviewer Comment: I love what this is doing, I'm approving so I can play with it. Separately we're working on net suite enhancements, and that will need to be updated to handle this. |
||
#13 | 23266 | C. Thomas Tyler |
Fixes and Enhancements: * Enabled daily_checkpoint.sh operate on edge servers, to keep /p4/N/offline_db current on those hosts for site-local recovery w/o requiring a site-local replica (though having a site-local replica can still be useful). * Disabled live_checkpoint.sh for edge servers. * More fully support topologies using edge severs, in both geographically distributed and horizaontal scaling "wokspace server" solutions. * Fix broken EDGESERVER value definition. * Modified name of SDP counter that gets set when a checkpoint is taken to incorporate ServerID, so now the counter name will look like lastSDPCheckpoint.master.1, or lastSDPCheckpoint.p4d_edge_sfo, rather than just lastSDPCheckpoint. There will be multiple such counters in a topology that uses edge servers, and/or which takes checkpoints on replicas. * Added comments for all functions. For the master server, journalPrefix remains: /p4/N/checkpoints/p4_N The /p4/N/checkpoints is reserved for writing by the master/commit server only. For non-standby (possibly filtered) replicas and edge serves, journalPrefix is: /p4/N/checkpoints.<ShortServerID>/p4_N.<ShortServerID> Here, ShortServerID is just the ServerID with the 'p4d_' prefix trimmed, since it is redundant in this context. See mkrep.sh, which enshines a ServerID (server spec) naming standard, with values like 'p4d_fr_bos' (forwarding replica in Boston) and p4d_edge_blr (Edge server in Bangalore). So the journalPrefix for the p4d_edge_bos replica would be: /p4/N/checkpoints.edge_bos/p4_N.edge_bos For "standby" (aka journalcopy) replicas, journalPrefix is set to /p4/N/journals.rep. which is written to the $LOGS volume, due to the nature of standby replicas using journalPrefix to write active server logs to pre-rotated journals. Some take-away to be updated in docs: * The /p4/N/checkpoints folder must be reserved for checkpoints that originate on the master. It should be safe to rsync this folder (with --delete if desired) to any replica or edge server. This is consistent with the current SDP. * I want to change 'journals.rep' to 'checkpoints.<ShortServerID>' for non-standby replicas, to ensure that checkpoints and journals taken on those hosts are written to a volume where they are backed up. * In sites with multiple edge serves, some sharing achive files ('workspace servers'), multiple edge servers will share the same SAN. So we one checkpoints dir per ServerID, and we want that dir to be on the /hxdepots volume. Note that the journalPrefix for replicas was a fixed /p4/N/journals.rep. This was on the /hxlogs volume - a presumably fast-for-writes volume, but typically NOT backed up and not very large. This change puts it under /p4/N/checkpoints.* for edge servers and non-standby replicas, but ensures other replica types and edge servers can generate checkpoints to a location that is backed up and has plenty of storage capacity. For standby replicas only (which cannot be filtered), the journalPrefix remains /p4/N/journals.rep on the /hxlogs volume. |
||
#12 | 23230 | cgeen |
Updates to the mkrep.sh script. Adds ssh_opts for automation, by default in batch. Updates logging to point at $SDPInstance as hms might not be installed. Users the instance pw file as it might differ from hms. |
||
#11 | 22628 | C. Thomas Tyler |
Fixed minor order-of-processing bug resulting in a harmless error appearing at the end of script processing as cleanTrash() was called to clean garbage files. The run() function was called to clean garbage files/dirs just as a directory that function depended on got cleaned up. The fix was applied to scripts that used libcore.sh, including the template.sh template script. Also corrected comments in p4u_env.sh. Bypassing pre-commit review as this has been well tested. #review-22629 |
||
#10 | 21962 | C. Thomas Tyler |
Updated various scripts to use run() and rrun() functions in favor of predecessor runCmd() and runRemoteCmd(). The older functions won't be removed to avoid breaking scripts that rely on their behavior and have no issues with them. The newer fuctions are more scalable and avoid erroneous "Argument list too long" from bash due to buffer overruns when used with commands with large amounts of output. Enhanced runRemoteCmd() to clean up after itself, as it generated files in /tmp that didn't get automatically cleaned up. If used in scripts called very often (e.g. every 5 minutes in a crontab), this leads to significant issues with /tmp filling up with garbage files over a period of several weeks. Enhanced test_utils.sh to test new run() and rrun() calls. |
||
#9 | 21921 | C. Thomas Tyler |
HMS change, to simplify setup on compliant platforms (i.e. those using bash 4.x). The shebang line for all bash scripts has been standardized to /bin/bash, and bash checks for $BASH_VERSION added where needed. References to having a custom-built /usr/local/bin/bash have been removed. As a trade-off, non-compliant platforms (way-old Linux and Mac using bash 3.x) will have a bit more work to do to work with shebang lines. |
||
#8 | 21030 | C. Thomas Tyler |
mkrep.sh: Added missing 'journalcopy' startup command for standby replicas. Corrected typo in script name. |
||
#7 | 20797 | C. Thomas Tyler |
Added more replica types, forwarding-standby and the metadata-only variant. Added '-L' to 'p4 pull' in startup.1 command for standby replicas. |
||
#6 | 20777 | C. Thomas Tyler |
Tweaked HMS mkrep.sh script to avoid using p4login, as p4login assumes it is being run on a server where there the given SDP instance is configured. Normally the way p4login is used, that's a safe assumption. But in this case mkrep.sh is intended to be called on the HMS server, which may not actually host the SDP instance for which a replica is being made. They will have their p4_N.vars files in /p4/common/config, but may not have a /p4/N/root or even /p4/N directory on the HMS server itself. This introduces a dependency, deemed acceptable for HMS purposes, that all instances that use this mkrep.sh script have the same password for the 'perforce' super user as the hms instance, as it uses only the /p4/common/config/.p4passwd.p4_hms.admin password file (for the /p4/hms instance). |
||
#5 | 20708 | C. Thomas Tyler |
Per discussion: s/checkpoints.rep/journals.rep/g This directory name changed, used in the journalPrefix configurable, is intended to clarify that the should be targeted to for a FAST volume for use with journalcopy, rather than the LARGE volume as would be implied when using a directory with "checkpoints" in the name. |
||
#4 | 20430 | C. Thomas Tyler |
Tweak to mkrep.sh: Tweaked to use p4login script. |
||
#3 | 20363 | C. Thomas Tyler |
Removed references to legacy names for checkpoint scripts. No functional changes. Bypassing pre-commit code review. #review-20364 |
||
#2 | 20170 | Russell C. Jackson (Rusty) |
Moved password and users into the config directory to allow for instance specific users and passwords. Ran into a case where two different teams were sharing the same server hardware and needed this type of differentiation. Surprised that we haven't hit this sooner. Also defaulted mkdirs to use the numeric ports since this is the most common installation. |
||
#1 | 20154 | C. Thomas Tyler |
Added mkrep.sh script to create a replica. This current version does most of the steps of creating a replica in an SDP environment, benefitting from server spec naming standards and conventions. It then provides guidance on the remaining steps (e.g. transferring the checkpoint to the replica host, etc.) It greatly reduces the complexity of creating a replica by distilling the many potential replica types to just those few commonly used, 'fr' (forwarding replica), 'ro' (read-only replica), and 'edge'. The script is self-documenting with a '-h' (short usage) and '-man' (detailed man page) options. This first version is functional but is definitely a work in progress. It will evolve as part of the HMS project, but this part of HMS lives in the stock SDP. |