p4d_base #15

  • //
  • guest/
  • russell_jackson/
  • sdp/
  • Server/
  • Unix/
  • p4/
  • common/
  • bin/
  • p4d_base
  • View
  • Commits
  • Open Download .zip Download (12 KB)
#!/bin/bash
#==============================================================================
# Copyright and license info is available in the LICENSE file included with
# the Server Deployment Package (SDP), and also available online:
# https://swarm.workshop.perforce.com/projects/perforce-software-sdp/view/main/LICENSE
#------------------------------------------------------------------------------

# Helix Server base init script

# Do nothing unless $SDP_INSTANCE is defined.
export SDP_INSTANCE="${1:-Unset}"
export OS_INIT_MECHANISM="${OS_INIT_MECHANISM:-Unset}"
declare -i StartDelay="${SDP_START_DELAY:-2}"
declare -i TailCmdUsable=0
declare -i UseSystemd=0
declare -i ExitCode=0
declare ThisScript="${0##*/}"
declare CmdLine="$0 $*"
declare OpMode="${2:-usage}"
declare Log=
declare TmpDir=
declare TmpLog=
declare TmpJnl=
declare MustUseSystemdMsg=
declare ServiceName=

if [[ "$SDP_INSTANCE" == "Unset" ]]; then
   echo -e "\\nError: The SDP_INSTANCE is not defined.\\n."
   exit 1
fi

# Load SDP controlled shell environment.
# shellcheck disable=SC1091
source /p4/common/bin/p4_vars "$SDP_INSTANCE"
ExitCode=$?

if [[ "$ExitCode" -ne 0 ]]; then
   echo -e "\\nError: Failed to load SDP environment for instance $SDP_INSTANCE.\\n"
   exit 1
fi

# shellcheck disable=SC1090
 source "$P4CBIN/backup_functions.sh"
ExitCode=$?

if [[ "$ExitCode" -ne 0 ]]; then
   echo -e "\\nError: Failed to load SDP lib $P4CBIN/backup_functions.sh.\\n"
   exit 1
fi

if [[ "${P4D_FLAGS:-Unset}" == "Unset" ]]; then
   echo -e "\\nError: P4D_FLAGS is unset after loading SDP environment for instance $SDP_INSTANCE.\\n"
   exit 1
fi

if [[ $(id -u) -eq 0 ]]; then
   exec su - "$OSUSER" -c "$0 $*"
elif [[ $(id -u -n) != "$OSUSER" ]]; then
   echo "$0 can only be run by root or $OSUSER"
   exit 1
fi

if [[ ! -x "$P4DBIN" ]]; then
   echo -e "\\nError: $P4DBIN is not executable."
   exit 2;
fi

# Ensure that the '--pid-file' argument is provided, and add it if it is not.
# This is intended to prevent problems upgrading the SDP in case the
# /p4/common/config/p4_N.vars file isn't updated to use the new template.
if [[ "$P4D_FLAGS" != *"--pid-file"* ]]; then
   export P4D_FLAGS="$P4D_FLAGS --pid-file"
fi

# Determine if systemd is configured for this instance.
#if [[ -n "$(command -v systemctl)" ]]; then
#   ServiceName="${P4DBIN##*/}"
#   if systemctl cat "$ServiceName" > /dev/null 2>&1; then
#      UseSystemd=1
#      MustUseSystemdMsg="\\nError: It appears an attempt was made to start or stop the $ServiceName\\nservice without using systemd. When a systemd unit file is configured, only the\\nsystemd init mechanism can be used to start and stop the service, so that\\nsystemd maintains a more reliable indication of the status of the service. Run\\nas root or as $OSUSER with sudo. For example:\\n\\n\\tsudo systemctl start $ServiceName\\n\\tsudo systemctl stop $ServiceName\\n\\tsystemctl status $ServiceName\\n\\nNote that the 'status' check does not require sudo.\\n\\nThe systemd unit file is:\\n\\n$(systemctl cat "$ServiceName")"
#   fi
#fi

# For P4D 2017.1+, automatically replace '-d' with '--daemonsafe', unless
# we are running with systemd. If running with systemd, we drop the
# '-d'/'--daemonsafe' flags and let systemd handle the process forking.
# Disable shellcheck as we intend a string compare even though it looks like
# we're trying to compare a decimal value.
# shellcheck disable=SC2072
if [[ "$P4D_VERSION" > "2017.1" ]]; then
   if [[ "$UseSystemd" -eq 1 ]]; then
      export P4D_FLAGS=${P4D_FLAGS/ -d / }
      export P4D_FLAGS=${P4D_FLAGS/ --daemonsafe / }
   else
      export P4D_FLAGS=${P4D_FLAGS/ -d / --daemonsafe }
   fi
fi

Log="$LOGS/p4d_init.log"
export LOGFILE="$Log"

TmpDir=$(mktemp -d 2>/dev/null)
if [[ ! -d "$TmpDir" ]]; then
   TmpDir=$(mktemp -d -t 'tmp_p4d_base')
fi

if [[ ! -d "$TmpDir" ]]; then
   echo -e "\\nError: Could not initialize TmpDir [$TmpDir]\\n" | tee -a "$Log"
   exit 1
fi

TmpLog="${TmpDir}/tmp.p4d_base.log"

echo -e "\\nCalled $ThisScript called with command line:\\n$CmdLine" >> "$Log"

set_vars

# If the 'tail' command responds to the '--version' flag, it is useful for our
# purposes in this script. If it does not recognize '--version' (e.g. on OSX),
# then it likely will not work with 'tail -<number>', and so not useful in this
# script.
if [[ -n "$(command -v tail)" ]]; then
   if tail --version > /dev/null 2>&1; then
      TailCmdUsable=1
   fi
fi

# See how we were called.
case "$OpMode" in
   (force_start)
      if [[ -e "$P4ROOT/P4ROOT_not_usable.txt" ]]; then
         echo -e "\\nWarning: The $P4ROOT/P4ROOT_not_usable.txt file exists, indicating\\ndatabases in P4ROOT are not safe to use. Contents:\\n$(cat "$P4ROOT/P4ROOT_not_usable.txt")\\nIgnoring this due to use of force_start option. Review the bottom of this log:\\n$Log\\n" | tee -a "$Log"
      fi

      if [[ "$UseSystemd" -eq 1 && "$OS_INIT_MECHANISM" != "systemd" ]]; then
         echo -e "\\nWarning: An attempt was made to start the $ServiceName service\\nwithout using systemd on a system configured to use systemd. Due to use of\\nforce_start, this will be allowed.\\n" | tee -a "$Log"
      fi

      echo "Starting $P4DBIN $P4D_FLAGS" | tee -a "$Log"
      # Delay start $StartDelay seconds, unless P4ROOT is empty.
      [[ -r "$P4ROOT/db.domain" ]] && sleep "$StartDelay"

      # shellcheck disable=SC2086
      "$P4DBIN" -r $P4ROOT -J $P4JOURNAL -xu > "$TmpLog" 2>&1
      "$P4DBIN" $P4D_FLAGS > "$TmpLog" 2>&1
      ExitCode=$?
      cat "$TmpLog" >> "$Log"
      cat "$TmpLog"
   ;;

   (start)
      if [[ -e "$P4ROOT/P4ROOT_not_usable.txt" ]]; then
         echo -e "\\nError: The $P4ROOT/P4ROOT_not_usable.txt file exists, indicating\\ndatabases in P4ROOT are not safe to use. Contents:\\n$(cat "$P4ROOT/P4ROOT_not_usable.txt")\\nRefusing to start p4d.  Review the bottom of this log:\\n$Log\\n" | tee -a "$Log"
         exit 1
      fi

      if [[ "$UseSystemd" -eq 1 && "$OS_INIT_MECHANISM" != "systemd" ]]; then
         echo -e "$MustUseSystemdMsg"
         exit 1
      fi

      if [[ -r "$P4ROOT/db.domain" ]]; then
         echo "Preflight check: $P4DBIN -r $P4ROOT -xvU" > "$TmpLog"
         "$P4DBIN" -r "$P4ROOT" -xvU >> "$TmpLog" 2>&1
         ExitCode=$?
         echo "EXIT_CODE=$ExitCode" >> "$TmpLog"
         cat "$TmpLog" >> "$Log"
         cat "$TmpLog"

         if [[ "$ExitCode" -ne 0 ]]; then
            echo -e "\\nError: DB check with 'p4d -xvU' failed.  Database integrity is in question.\\nPlease Contact Perforce Support (support@perforce.com).  The force_start option\\nis available, but not recommended.  Review the bottom of this log:\\n$Log\\n" | tee -a "$Log"
            exit 1
         fi

         if [[ -e "$P4JOURNAL" && "$TailCmdUsable" -eq 1 ]]; then
            echo "Preflight journal health check" > "$TmpLog"
            TmpJnl="$TmpDir/jnl.test"
            tail -10000 "$P4JOURNAL" 2>/dev/null | grep -av "@vv@" > "$TmpJnl" 2>> "$TmpLog"

            # Check for first full record. Some records are multiline and we
            # may be part way through.
            FirstLine=$(grep -a -En "^@(ex|nx|pv|rv|dv)@" "$TmpJnl" 2>&1 | head -1 | cut -d: -f1)
            if [[ "$FirstLine" -gt "1" ]]; then
               # Skip lines if necessary to start with a full record.
               mv "$TmpJnl" "${TmpJnl}.1"
               tail -n "+$FirstLine" "${TmpJnl}.1" > "$TmpJnl" 2>> "$Log"
            fi

            "$P4DBIN" -r "$TmpDir" -f -jr "$TmpJnl" >> "$TmpLog" 2>&1
            ExitCode=$?
            echo "EXIT_CODE=$ExitCode" >> "$TmpLog"
            cat "$TmpLog" >> "$Log"
            cat "$TmpLog"

            if [[ "$ExitCode" -ne 0 ]]; then
               echo -e "\\nError: possible corruption at end of journal detected. Journal is being rotated so any corruption is at end of file. You may wish to contact Perforce Support (support@perforce.com).  Server is still being started as normal. NOTE If there is any corruption then replicas will likely stop replicating until this is fixed!! If there are no problems with replication then this error can be ignored. Review this file:\\n$Log\\n" | tee -a "$Log"
               get_journalnum
               p4d_truncate_journal
               subject="ERROR!!! - $HOSTNAME $P4SERVER Possible journal corruption detected."
               mail_sender_opt=$(get_mail_sender_opt)
               echo "Sending mail: $SDPMAIL -s $subject $mail_sender_opt $MAILTO" | tee -a "$Log"
               "$SDPMAIL" -s "$subject" "$mail_sender_opt" "$MAILTO" < "$Log"
            fi
         fi
      fi

      echo "Starting $P4DBIN $P4D_FLAGS" | tee -a "$Log"

      # Delay start $StartDelay seconds, unless P4ROOT is empty.
      [[ -r "$P4ROOT/db.domain" ]] && sleep "$StartDelay"

      # shellcheck disable=SC2086
      "$P4DBIN" -r $P4ROOT -J $P4JOURNAL -xu > "$TmpLog" 2>&1
      "$P4DBIN" $P4D_FLAGS > "$TmpLog" 2>&1
      ExitCode=$?

      cat "$TmpLog" >> "$Log"
      cat "$TmpLog"
   ;;

   (status)
      if [[ -r "$P4ROOT/server.pid" ]]; then
         pid=$(cat "$P4ROOT/server.pid")
         echo -e "\\nThe $P4ROOT/server.pid file contains pid $pid.  Pid info:"
         "$PS" -f -p "$pid" > "$TmpLog"
         ExitCode=$?

         cat "$TmpLog"

         if [[ "$ExitCode" -ne 0 ]]; then
            echo -e "\\nError: A server.pid file exists, but that process id is not running.  This could indicate abnormal process termination.\\n"
         fi
      fi

      if "$P4BIN" -p "$P4PORT" info -s > "$TmpLog" 2>&1; then
         ExitCode=0
      else
         # If we get an SSL trust error, then the server is online.
         if grep -lq 'The authenticity of' "$TmpLog"; then
            # Do a fire & forget attempt to fix the 'p4 trust' issue.
            "$P4BIN" -p "$P4PORT" trust -f -y > /dev/null 2>&1
            ExitCode=0
         else
            ExitCode=1
         fi
      fi

      cat "$TmpLog"
   ;;

   (admin_stop)
      if [[ "$UseSystemd" -eq 1 && "$OS_INIT_MECHANISM" != "systemd" ]]; then
         echo -e "$MustUseSystemdMsg"
         exit 1
      fi

      # If there is no server.pid file, shut down the old fashioned way.
      echo -n "Shutting down $P4DBIN: " | tee -a "$Log"
      if [[ "${P4REPLICA}" == "FALSE" ]]; then
         "$P4CBIN/p4login"
      fi
      echo "$P4BIN -p $P4PORT -u $P4USER admin stop" | tee -a "$Log"
      "$P4BIN" -p "$P4PORT" -u "$P4USER" admin stop 2>&1 | tee -a "$Log"
      sleep 5
      "$P4BIN" -p "$P4PORT" info > /dev/null 2>&1
      ExitCode=$?
      if [[ "$ExitCode" -eq 0 ]]; then
         echo -e "\\nError: Server shutdown failed." | tee -a "$Log"
         exit 1
      else
         exit 0
      fi
   ;;

   (stop)
      if [[ -r "$P4ROOT/server.pid" ]]; then
         pid=$(cat "$P4ROOT/server.pid")

         if "$PS" -p "$pid" > /dev/null 2>&1; then
            if [[ "$UseSystemd" -eq 1 && "$OS_INIT_MECHANISM" != "systemd" ]]; then
               echo -e "$MustUseSystemdMsg"
               exit 1
            fi

            echo -e "\\nSending SIGTERM signal to pid $pid in $P4ROOT/server.pid." | tee -a "$Log"
            kill "$pid" 2>&1 | tee -a "$Log"
            sleep 1

            "$PS" -p "$pid" > /dev/null 2>&1
            status=$?

            if [[ $status -eq 0 ]]; then
               echo -n "Waiting for p4d to shutdown ..." | tee -a "$Log"

               while [[ $status -eq 0 ]]; do
                  echo -n "."
                  sleep 5
                  "$PS" -p "$pid" > /dev/null 2>&1
                  status=$?
               done
            fi

            echo -e "\\nConfirmed shutdown of $P4DBIN." | tee -a "$Log"
         else
            echo -e "\\nError: A server.pid file exists, but that process id is not running.  This could indicate abnormal process termination.\\n" | tee -a "$Log"
            exit 1
         fi
      else
         # If there is no server.pid file, but we can still get a response from
	 # 'p4 info', try to shut down with the front-door method of calling
         # 'p4 admin stop'. If there is no server.pid file and no response from
         # 'p4 info', then p4d is down.
         if "$P4BIN" info -s > /dev/null 2>&1; then
            echo -e "\\nWarning: Missing $P4ROOT/server.pid. Attempting shutdown with 'p4 admin stop'.\\n" | tee -a "$Log"
            echo "$0" "$SDP_INSTANCE" admin_stop | tee -a "$Log"
            $0 "$SDP_INSTANCE" admin_stop
         fi
      fi
   ;;

   (restart)
      $0 "$SDP_INSTANCE" stop
      $0 "$SDP_INSTANCE" start
   ;;

   (*)
      echo -e "\\nUsage: $0 SDP_INSTANCE {start|stop|status|restart|force_start|admin_stop}\\n"
      exit 1
   ;;
esac

[[ -d "$TmpDir" ]] && rm -rf "$TmpLog"

exit "$ExitCode"

# Change User Description Committed
#15 29332 Russell C. Jackson (Rusty) Missed a -xu
#14 29330 Russell C. Jackson (Rusty) Added -xu to avoid startup issues during upgrades.
#13 27950 Russell C. Jackson (Rusty) Fixed issue that can result in false-positives in the pre-flight
#12 27623 Russell C. Jackson (Rusty) Turned off systemctl check
#11 27217 Russell C. Jackson (Rusty) Merged in some changes from the Perforce maintained SDP
#10 27086 Russell C. Jackson (Rusty) Fixed a typo in P4DBIN
#9 23968 Russell C. Jackson (Rusty) Added missing fi on if statement.
#8 23966 Russell C. Jackson (Rusty) Added additional check to make sure all p4d threads for the specified instance are gone instead of relying on the master pid alone.
#7 23751 Russell C. Jackson (Rusty) Change to only set P4JOURNAL on the master server.
It will replicate to the other servers.:
#6 23642 Russell C. Jackson (Rusty) Added a LOGFILE setting for the journal rotation.
#5 23134 Russell C. Jackson (Rusty) Corrected a typo.
#4 23124 Russell C. Jackson (Rusty) Added new 2017.1 flag to supress output.
#3 22981 Russell C. Jackson (Rusty) Made files writeable so they are easier to update once on the server.
#2 22698 Russell C. Jackson (Rusty) Added check to not rotate the journal if the database doesn't exist.
#1 22693 Russell C. Jackson (Rusty) Branched a Unix only version of the SDP.
Removed extra items to create a cleaner tree.
Moved a few items around to make more sense without Windows in the mix.
//guest/perforce_software/sdp/dev/Server/Unix/p4/common/bin/p4d_base
#18 22250 C. Thomas Tyler Further refinements to the new 'rotate journal on p4d start' change:
* Fixed p4d_truncate_journal so it has less environment dependencies
(e.g. doesn't depend on LOGFILE, etc.) and doesn't try sending email.
* Introduced msg() and bail(), counterparts to log() and die() which
don't try to write to LOGFILE and don't try to send email.
* Added call to get_journalnum() before call to p4d_truncate_journal().
* Fixed logic in get_journalnum() so it gets the journal number w/o
needing p4d to be up.
 * I think I fixed the syntax error in bitwise operator check when
setting EDGE_SERVER.  It works on a non-edge server (sets
EDGESERVER=0).  For now I have it doing an
'echo EDGESERVER=$EDGESERVER', but need to test that it correctly
sets EDGESERVER=1 on an edge server.

TO DO: Remove that 'echo EDGESERVER=$EDGESERVER' once we verify it
correctly sets the value for $EDGESERVER. (Or not?)
#17 22239 Russell C. Jackson (Rusty) Change set_vars to look up the edge server directly in the database so the server does
not have to be on-line to check.

Fix for Job: SDP-223
#16 22215 Russell C. Jackson (Rusty) First step towards solving corruption problem with journals and replicas.
Rotate the journal on the master when starting. That makes it easier to remove
any corruption since it will be the last thing in the file.
#15 20559 C. Thomas Tyler Fixed recent regression with 'restart' option in init scripts;
they now need to pass in the SDP_INSTANCE parameter.  Also
corrected usage messages.
#14 20554 C. Thomas Tyler Removed harmless but unnecessary debug message from p4d_base.
#13 20491 C. Thomas Tyler Fixed an environment insulation bug in init scripts.

More testing (and thinking) revealed that the only way to truly
provide a guarantee of insulation from user-set defaults in
~perforce/.bashrc (etc.) is to pass SDP_INSTANCE as a parameter to
the *_base scripts, so that the instance name is explicitly passed
into the su/exec call (when run as root).

This change also includes minor cleanup in init scripts and *_base
scripts.
#12 20448 C. Thomas Tyler Fixed env bug seen when running init scripts as root.

Updated *_base scripts and *_init script templates to a new
standard.  Goals:

1. Init scripts that use corresponding *_base scripts are
minimized so that do nothing more than set SDP_INSTANCE and
then call the corresponding *_base script.

2. The 'su' commands always pass $* instead of $1, deferring
all processing to the *_base script.

3. The shell environment is now guaranteeds to have the same
results regardless of whether it is called as 'root' or as
the defined OSUSER.

4. The p4_vars file is always sourced exactly once.  Two
calls to 'source p4_vars' appear in some cases, one
immediatley before the su/exec call, and another after the
after the 'su/exec'.  Only one or the other is sourced.

5. All init scripts have a reasonably consistent usage message.

6. All init scripts accept a 'status' argument.

This change fixes a bug where 'p4broker_N_init status' run as the
'perforce' user would report many pids unrelated to Perforce
if run as root, e.g. doing 'service p4broker_N_init status'.

This also eliminates a potential issue where the 'perforce'
user might source a p4_vars with a default instance in ~/.profile
or ~/.bashrc, thus invalidating the instance specified when the
user ran the init script as root.
#11 20348 C. Thomas Tyler Use pid to shutdown the p4d process.

Goals:
1. Make shutdown more standard using kill/SIGTERM.
2. Make shutdown more reliable; no need to be logged in, insluated from P4AUTH, etc.
3. Use the now-built-in pid mechanism.
4. On shutdown, make it so the script doesn't return until the p4d process is well
and truly down.

This was implemented with some backward-compatibilty features to simplfy SDP
upgrades:
1. Though the p4_N.vars file should add the required '--pid-flag' to P4D_FLAGS,
this new p4d_base checks and adds it if it is missing.
2. The old 'p4 admin stop' logic is retained as a new 'admin_stop' function, and
is used if the server.pid file does not exist when stop is called.
#10 20170 Russell C. Jackson (Rusty) Moved password and users into the config directory to allow for instance specific
users and passwords. Ran into a case where two different teams were sharing the same
server hardware and needed this type of differentiation. Surprised that we haven't hit
this sooner.

Also defaulted mkdirs to use the numeric ports since this is the most common
installation.
#9 18686 Russell C. Jackson (Rusty) #REVIEW-18670
Added -cset of P4JOURNAL to the start section so that it is always set
to the correct location.
#8 17281 Robert Cowham When stopping, send error to /dev/null
Remove tabs

#review @rjackson @ttyler

ttyler: Looks good!
#7 16335 C. Thomas Tyler Routine Merge Down to dev from main using:
p4 merge -b perforce_software-sdp-dev
#6 16029 C. Thomas Tyler Routine merge to dev from main using:
p4 merge -b perforce_software-sdp-dev
#5 15778 C. Thomas Tyler Routine Merge Down to dev from main.
#4 13906 C. Thomas Tyler Normalized P4INSTANCE to SDP_INSTANCE to get Unix/Windows
implementations in sync.

Reasons:
1. Things that interact with SDP in both Unix and Windows
environments shoudn't have to account for this obscure
SDP difference between Unix and Windows.  (I came across
this doing CBD work).

2. The Windows and Unix scripts have different variable
names for defining the same concept, the SDP instance.
Unix uses P4INSTANCE, while Windows uses SDP_INSTANCE.

3. This instance tag, a data set identifier, is an SDP concept.
I prefer the SDP_INSTANCE name over P4INSTANCE, so I prpose
to normalize to SDP_INSTANCE.

4. The P4INSTANCE name makes it look like a setting that might be
recognized by the p4d itself, which it is not.  (There are other
such things such as P4SERVER that could perhaps be renamed as
a separate task; but I'm not sure we want to totally disallow
the P4 prefix for variable names. It looks too right to be wrong
in same cases, like P4BIN and P4DBIN.  That's a discussion for
another day, outside the scope of this task).

Meanwhile:
* Fixed a bug in the Windows 2013.3 upgrade script that
was referencing undefined P4INSTANCE, as the Windows
environment defined only SDP_INSTANCE.

* Had P4INSTANCE been removed completely, this change would
likely cause trouble for users doing updates for existing
SDP installations.  So, though it involves slight technical debt,
I opted to keep a redundant definition of P4INSTANCE
in p4_vars.template, with comments indicating SDP_INSTANCE should be
used in favor of P4INSTANCE, with a warning that P4INSTANCE
may go away in a future release.  This should avoid unnecessary
upgrade pain.

* In mkdirs.sh, the varialbe name was INSTANCE rather than
SDP_INSTANCE.  I changed that as well.  That required manual
change rather than sub/replace to avoid corrupting other similar
varialbe names (e.g.  MASTERINSTANCE).

This is a trivial change technically (a substitute/replace, plus
tweaks in p4_vars.template), but impacts many files.
#3 12169 Russell C. Jackson (Rusty) Updated copyright date to 2015

 Updated shell scripts to require an instance parameter to eliminate the need
 for calling p4master_run.    Python and Perl still need it since you have to set the
environment for them to run in.

 Incorporated comments from reviewers. Left the . instead of source as that seems
more common in the field and has the same functionality.
#2 11493 Russell C. Jackson (Rusty) Removed echo for Preflight Check.
#1 10638 C. Thomas Tyler Populate perforce_software-sdp-dev.
//guest/perforce_software/sdp/main/Server/Unix/p4/common/bin/p4d_base
#1 10148 C. Thomas Tyler Promoted the Perforce Server Deployment Package to The Workshop.