#!/bin/bash #============================================================================== # Copyright and license info is available in the LICENSE file included with # the Server Deployment Package (SDP), and also available online: # https://swarm.workshop.perforce.com/projects/perforce-software-sdp/view/main/LICENSE #------------------------------------------------------------------------------ # This script will check diskspace available to P4JOURNAL and trigger a journal # rotation based on specified thresholds. This is useful in case you are in danger of # running out of disk space and your rotated journal files are stored on a separate # partition than the active journal. # # This script is using the following external variables: # # SDP_INSTANCE - The instance of Perforce that is being backed up. If not # set in environment, pass in as argument to script. # # P4HOME - Server's home directory. # P4BIN - Command line client name for the instance where disk space is monitored. # P4DBIN - Server executable name for the instance where disk space is monitored. # P4ROOT - Server's root directory. p4/root, p4_N/root # P4PORT - TCP/IP port for the server instance where disk space is monitored. # P4JOURNAL - Location of the Journal for the server where disk space is monitored. # P4JOURNALWARN - Amount of space left (K,M,G,%) before min journal space where an email alert is sent # P4JOURNALWARNALERT - Send an alert if warn threshold is reached (true/false, default: false) # P4JOURNALROTATE - Amount of space left (K,M,G,%) before min journal space to trigger a journal rotation # P4OVERRIDEKEEPJNL - Allow script to temporarily override KEEPJNL to retain enough journals to replay against oldest checkpoint (true/false, default: false) # # USAGE: ./journal_watch.sh <P4JOURNALWARN> <P4JOURNALWARNALERT> <P4JOURNALROTATE> <P4OVERRIDEKEEPJNL (Optional)> # # EXAMPLES: # Run from CLI that will warn via email if less than 20% is available and rotate journal when less than 10% is available # ./journal_watch.sh 20% TRUE 10% TRUE # # Cron job that will warn via email if less than 20% is available and rotate journal when less than 10% is available # 30 * * * * [ -e /p4/common/bin ] && /p4/common/bin/run_if_master.sh ${INSTANCE} /p4/common/bin/journal_watch.sh ${INSTANCE} 20\% TRUE 10\% TRUE # export SDP_INSTANCE=${SDP_INSTANCE:-Undefined} export SDP_INSTANCE=${1:-$SDP_INSTANCE} if [[ "$SDP_INSTANCE" == Undefined ]]; then echo "Instance parameter not supplied." echo "You must supply the Perforce instance as a parameter to this script." exit 1 fi # If environment variable is defined, then use that as default value if cli arguments not supplied export P4JOURNALWARN="${P4JOURNALWARN:-Undefined}" export P4JOURNALWARN="${2:-$P4JOURNALWARN}" export P4JOURNALWARNALERT="${P4JOURNALWARNALERT:-false}" export P4JOURNALWARNALERT="${3:-$P4JOURNALWARNALERT}" export P4JOURNALROTATE="${P4JOURNALROTATE:-Undefined}" export P4JOURNALROTATE="${4:-$P4JOURNALROTATE}" export P4OVERRIDEKEEPJNL="${P4OVERRIDEKEEPJNL:-false}" export P4OVERRIDEKEEPJNL="${5:-$P4OVERRIDEKEEPJNL}" # shellcheck disable=SC1091 source /p4/common/bin/p4_vars "$SDP_INSTANCE" # shellcheck disable=SC1091 source /p4/common/bin/backup_functions_replica.sh LOGFILE="$LOGS/journal_watch.log" ######### Start of Script ########## check_vars set_vars rotate_log_file "$LOGFILE" log "Start $P4SERVER Journal Watch" check_uid check_dirs "$P4CBIN/p4login" get_ckpnum get_journalnum get_journal_stats # Convert values to a common unit (bytes) if [[ "$P4JOURNALMIN" =~ .*%$ ]]; then # If the min value contains a %, pass in total disk size to calculate bytes P4JOURNALMINBYTES=$(convert_to_bytes "$P4JOURNALMIN", "$P4JOURNALTOTAL") else P4JOURNALMINBYTES=$(convert_to_bytes "$P4JOURNALMIN") fi if [[ "$P4JOURNALWARN" =~ .*%$ ]]; then # If the min value contains a %, pass in total disk size to calculate bytes P4JOURNALWARNBYTES=$(convert_to_bytes "$P4JOURNALWARN", "$P4JOURNALTOTAL") else P4JOURNALWARNBYTES=$(convert_to_bytes "$P4JOURNALWARN") fi if [[ "$P4JOURNALROTATE" =~ .*%$ ]]; then # If the min value contains a %, pass in total disk size to calculate bytes P4JOURNALROTATEBYTES=$(convert_to_bytes "$P4JOURNALROTATE", "$P4JOURNALTOTAL") else P4JOURNALROTATEBYTES=$(convert_to_bytes "$P4JOURNALROTATE") fi # Calculate available space for journal file (free space minus minimum required) P4JOURNALAVAILABLE=$((P4JOURNALFREE-P4JOURNALMINBYTES)) # If available space is less than warning threshold, log warning and optionally email alert if [[ "$P4JOURNALAVAILABLE" -lt "$P4JOURNALWARNBYTES" ]]; then log "Warning disk space threshold of ${P4JOURNALWARNBYTES} bytes reached, only ${P4JOURNALAVAILABLE} bytes available" # Convert to all lower case before comparison if [[ "${P4JOURNALWARNALERT,,}" == "true" ]]; then mail_log_file "${HOSTNAME} ${P4SERVER} Journal watch space warning" fi fi # If available space is less than rotate threshold, trigger a journal rotation on the master server if [[ "$P4JOURNALAVAILABLE" -lt "$P4JOURNALROTATEBYTES" ]]; then log "Rotate disk space threshold of ${P4JOURNALROTATEBYTES} bytes reached, only ${P4JOURNALAVAILABLE} bytes available" if [[ "$EDGESERVER" -eq 0 && "$STANDBYSERVER" -eq 0 ]]; then ckp_running truncate_journal ckp_complete else truncate_journal_on_master fi mail_log_file "${HOSTNAME} ${P4SERVER} Journal watch space warning" fi # If checkpoints exist, calculate the number of journals to keep based on oldest checkpoint on disk to ensure enough # journals are available for recovery (if P4OVERRIDEKEEPJNL is set to true) if [[ -n "$OLDESTCHECKPOINT" && "${P4OVERRIDEKEEPJNL,,}" == "true" ]]; then if [[ "$EDGESERVER" -eq 0 && "$STANDBYSERVER" -eq 0 ]]; then KEEPJNLS=$((JOURNALNUM + 1 - OLDESTCHECKPOINT)) else KEEPJNLS=$((JOURNALNUM + 2 - OLDESTCHECKPOINT)) fi log "Based on oldest checkpoint of ${OLDESTCHECKPOINT}, temporarily setting KEEPJNLS to ${KEEPJNLS} to ensure enough journals are available for recovery." fi remove_old_checkpoints_and_journals check_disk_space remove_old_logs log "End $P4SERVER Journal Watch"
# | Change | User | Description | Committed | |
---|---|---|---|---|---|
#5 | 26452 | ashaikh | Fix incorrect backup functions import statement. | ||
#4 | 26156 | C. Thomas Tyler |
Shellcheck v0.6.0 and style compliance changes. Fixed minor bugs related to capturing output, driven by shellcheck changes. Fixed sync_replica.sh for standby replicas with the configurable rpl.journalcopy.location=1 (SDP-424), removing an unnecessary and broken check. Fixed test for pre-existing checkpoints in function recreate_offline_db_files() so that it checks only for the master server, fixing an issue where it would report "No checkpoints found - run live_checkpoint.sh" when used on a replica where checkpoints might legatimately not exist. Also fixed the actual test itself. Replaced P4COMMITSERVER variable with P4MASTERPORT to support daisy chain scenarios, removing the assumption that all servers target only the master. (This assumption was made only in journal_watch.sh). Enhanced check_vars() to report individual missing environment variables, and to add more info on how to fix environment problems (e.g. adding to p4_vars or p4_N.vars files). Fixed bug in check_dirs() where a missing directory check intended to result in a die() call would result in a syntax error instead. These files have been field tested. |
||
#3 | 25592 | C. Thomas Tyler |
chmod +x for shell scripts. To Do: These scripts need tests and docs added. #review @ashaikh |
||
#2 | 25192 | ashaikh |
Change log rotation to only rotate journal_watch log Currently this script rotates all logs everytime it runs. However, if you run the journal_watch script on a frequent basis, you may not have much of a history of other logs files. |
||
#1 | 24189 | ashaikh |
Add a new SDP script to warn and/or rotate live journal due to configurable low disk space condition A new journal_watch.sh script is being introduced with alerting and journal truncate functionality for enviroments with high Perforce activity. Two separate thresholds can be configured, one for a warn alert (notify via email - optional) and another to trigger a journal rotation. Values for the threshold can be defined via K, M, G or a percentage of disk space. These values represent the available disk space on the journal volume (free space minus filesys.P4Journal.min) before the script takes aption. Another optional feature is to enable KEEPJNL override which dynamically calculates the number of journals to keep based on the oldest checkpoint on disk. When this is enabled, KEEPJNL is temporarily overridden during the journal_watch.sh run to prevent removing any journals that may be required to recover from a checkpoint on disk and all the journals required to bring the server back to a current state. This script can be run on a master/commit, edge and replica server. If the edge or replica server is running low on journal space, it will trigger a journal rotation on the master/commit server (based on P4TARGET value), which then will cause the journals to rotate on the edge/replica servers. <code>#review</code> |