#!/bin/bash
#==============================================================================
# Copyright and license info is available in the LICENSE file included with
# the Server Deployment Package (SDP), and also available online:
# https://swarm.workshop.perforce.com/projects/perforce-software-sdp/view/main/LICENSE
#------------------------------------------------------------------------------

#------------------------------------------------------------------------------
# Version ID Block. Relies on +k filetype modifier.
# VersionID='$Id: //p4-sdp/dev_c2s/Unsupported/Samples/bin/purge_revisions.sh#2 $ $Change: 31472 $'

# This script will allow you to archive files and optionally purge files based
# on a configurable number of days and minimum revisions that you want to keep.
# This is useful if you want to keep a certain number of days worth of files
# instead of a specific number of revisions.
#
# Note: If you run this script with purge mode disabled, and then enable it after
# the fact, all previously archived files specified in the configuration file will
# be purged if the configured criteria is met.
#
# Prior to running this script, you may want to disable server locks for archive
# to reduce impact to end users.
# https://www.perforce.com/perforce/doc.current/manuals/cmdref/Content/CmdRef/configurables.configurables.html#server.locks.archive
#
# SDP_INSTANCE - The instance of Perforce that is being backed up. If not
# set in environment, pass in as argument to script.
#
# P4_ARCHIVE_CONFIG - The location of the config file used to determine retention.
# If not set in environment, pass in as argument to script. This can be stored on
# a physical disk or somewhere in perforce.
#
# P4_ARCHIVE_DEPOT - Depot to archive the files in (string)
#
# P4_ARCHIVE_REPORT_MODE - Do not archive revisions; report on which revisions would
# have been archived (bool - default: true)
#
# P4_ARCHIVE_TEXT - Archive text files (or other revisions stored in delta format,
# such as files of type binary+D) (bool - default: false)
#
# P4_PURGE_MODE - Enables purging of files after they are archived (bool - default: false)
#
# CONFIG FILE FORMAT:
# The config file should contain a list of file paths, number of days and minimum
# of revisions to keep in a tab delimited format.
#
#    <PATH>    <DAYS>  <MINIMUM REVISIONS>
#
# Example:
#     //test/1.txt	10	1
#     //test/2.txt	1	3
#     //test/3.txt	10	10
#     //test/4.txt	30	3
#     //test/5.txt	30	8
#
# USAGE: ./purge_revisions.sh <SDP_INSTANCE> <P4_ARCHIVE_CONFIG> <P4_ARCHIVE_DEPOT> <P4_ARCHIVE_REPORT_MODE (Optional)> <P4_ARCHIVE_TEXT (Optional)> <P4_PURGE_MODE (Optional)>
#
# EXAMPLES:
#   Run from CLI that will archive files as defined in the config file
#      ./purge_revisions.sh 1 /p4/common/config/p4_1.p4purge.cfg archive FALSE
#
#   Cron job that will will archive files as defined in the config file, including text files
#      30 0 * * * [ -e /p4/common/bin ] && /p4/common/bin/run_if_master.sh ${INSTANCE} /p4/common/bin/purge_revisions.sh ${INSTANCE} /p4/common/config/p4_1.p4purge.cfg archive FALSE FALSE
#

purge_revisions() {
    # Set vars from arguments
    local ARCHIVE_PATH=$1
    local ARCHIVE_DAYS=$2
    local ARCHIVE_MIN_REV=$3


    # Calculate archive date based on number of days specified in config file
    ARCHIVE_DATE=$(date -d "-${ARCHIVE_DAYS} days" +%Y/%m/%d)
    # Get head revision info for file
    ARCHIVE_HEAD_INFO=$($P4BIN -F "%headRev% %headTime%" fstat "${ARCHIVE_PATH}")

    # Split head revision info into separate vars
    IFS=$' ' read -r ARCHIVE_HEAD_REV ARCHIVE_HEAD_TIME <<< "${ARCHIVE_HEAD_INFO}"
    unset IFS

    # Convert epoch time to standard time
    ARCHIVE_HEAD_TIME=$(date -d @${ARCHIVE_HEAD_TIME} +%Y/%m/%d)

    # If the head revision is less than min revision, there's nothing to do
    if [[ "$ARCHIVE_HEAD_REV" -le "$ARCHIVE_MIN_REV" ]]; then
        log "Head revision of ${ARCHIVE_HEAD_REV} for ${ARCHIVE_PATH} is less than or equal to ${ARCHIVE_MIN_REV}, skipping."

        return
    fi

    # Get total number of revisions that are not archived or purged
    ARCHIVE_TOTAL_REVS=($($P4BIN -ztag -F "%rev%,%time%" files -a -e "${ARCHIVE_PATH}"))

    # If the total revisions is less than min revision, there's nothing to do, even though files are older
    # than the number of days specified
    if [[ ${#ARCHIVE_TOTAL_REVS[@]} -le "$ARCHIVE_MIN_REV" ]]; then
        log "Total revisions of ${#ARCHIVE_TOTAL_REVS[@]} for ${ARCHIVE_PATH} is less than or equal to ${ARCHIVE_MIN_REV}, skipping."
        # If purge mode is enabled, and we previously archived files, then we'll need to see if there is anything to purge
        if [[ "${P4_PURGE_MODE,,}" == "true" ]]; then
            # Generate path of file in archive depot
            ARCHIVE_DEPOT_PATH="//${P4_ARCHIVE_DEPOT}/${ARCHIVE_PATH:2}"
            # Get max rev info for file in archive
            ARCHIVE_MAX_REV=$($P4BIN -ztag -F "%rev%" files -A "${ARCHIVE_DEPOT_PATH}")

            # Get the most recent revision number of file that is not archived
            IFS=$',' read -ra ARCHIVE_MIN_REV_TO_KEEP <<< ${ARCHIVE_TOTAL_REVS[-1]}
            unset IFS

            # If the most recent archive rev of the file is less than the lowest minimum revision to keep, then file can be purged
            if [[ "$ARCHIVE_MAX_REV" != '' && "$ARCHIVE_MAX_REV" -lt "${ARCHIVE_MIN_REV_TO_KEEP[0]}" ]]; then
                log "########## Purging previously archived files for ${ARCHIVE_PATH} before ${ARCHIVE_DATE} ##########"
                { time $P4BIN archive -D $P4_ARCHIVE_DEPOT $P4_ARCHIVE_ARGS -p "${ARCHIVE_PATH}"\#${ARCHIVE_MAX_REV}; } >> "$LOGFILE" 2>&1
            fi
        fi

        return
    fi

    # Get number of revisions before the number of days specified
    ARCHIVE_REVS=($($P4BIN -ztag -F "%rev%,%time%" files -a -e "${ARCHIVE_PATH}@${ARCHIVE_DATE}"))

    if [[ $(( ${#ARCHIVE_TOTAL_REVS[@]} - ${#ARCHIVE_REVS[@]} )) -ge "$ARCHIVE_MIN_REV" ]]; then
        # Archiving files since there are more revisions available than the minimum revisions specified
        log "########## Archiving files for ${ARCHIVE_PATH} before ${ARCHIVE_DATE} ##########"
        { time $P4BIN archive -D $P4_ARCHIVE_DEPOT $P4_ARCHIVE_ARGS "${ARCHIVE_PATH}"@${ARCHIVE_DATE}; } >> "$LOGFILE" 2>&1

        if [[ "${P4_PURGE_MODE,,}" == "true" ]]; then
            log "########## Purging files for ${ARCHIVE_PATH} before ${ARCHIVE_DATE} ##########"
            { time $P4BIN archive -D $P4_ARCHIVE_DEPOT $P4_ARCHIVE_ARGS -p "${ARCHIVE_PATH}"@${ARCHIVE_DATE}; } >> "$LOGFILE" 2>&1
        fi
    else
        # Since the difference of total revisions available and number of revisions before the archive date is less
        # than minimum revisions, archive up to the minimum number of revisions
        log "########## Archiving files for ${ARCHIVE_PATH} before ${ARCHIVE_DATE} ##########"
        log "Minimum revisions specified in config: ${ARCHIVE_MIN_REV}"

        # Get the most recent revision number to keep by using the minimum number of revisions as the index
        IFS=$',' read -ra ARCHIVE_MIN_REV_TO_KEEP <<< ${ARCHIVE_TOTAL_REVS[${ARCHIVE_MIN_REV}]}
        unset IFS

        # Grab the revision number which is the first value in the revision;epoch time array
        log "Calculated number of revisions to archive before: ${ARCHIVE_MIN_REV_TO_KEEP[0]}"
        { time $P4BIN archive -D $P4_ARCHIVE_DEPOT $P4_ARCHIVE_ARGS "${ARCHIVE_PATH}"\#${ARCHIVE_MIN_REV_TO_KEEP[0]}; } >> "$LOGFILE" 2>&1

        if [[ "${P4_PURGE_MODE,,}" == "true" ]]; then
            log "########## Purging files for ${ARCHIVE_PATH} before ${ARCHIVE_DATE} ##########"
            { time $P4BIN archive -D $P4_ARCHIVE_DEPOT $P4_ARCHIVE_ARGS -p "${ARCHIVE_PATH}"\#${ARCHIVE_MIN_REV_TO_KEEP[0]}; } >> "$LOGFILE" 2>&1
        fi
    fi
}

# Regex pattern for patterns you don't want to allow in paths separated by `|`
EXCLUDE_PATH_CHARS='\.\.\.\.\.'
P4_ARCHIVE_ARGS='-h'

export SDP_INSTANCE=${SDP_INSTANCE:-Undefined}
export SDP_INSTANCE=${1:-$SDP_INSTANCE}
if [[ $SDP_INSTANCE == Undefined ]]; then
    echo "Instance parameter not supplied."
    echo "You must supply the Perforce instance as a parameter to this script."
    exit 1
fi

export P4_ARCHIVE_CONFIG=${P4_ARCHIVE_CONFIG:-Undefined}
export P4_ARCHIVE_CONFIG=${2:-$P4_ARCHIVE_CONFIG}
if [[ $P4_ARCHIVE_CONFIG == Undefined ]]; then
    echo "Location of the config file not supplied."
    echo "You must supply the location of the config file to this script."
    exit 1
fi

export P4_ARCHIVE_DEPOT=${P4_ARCHIVE_DEPOT:-Undefined}
export P4_ARCHIVE_DEPOT=${3:-$P4_ARCHIVE_DEPOT}
if [[ $P4_ARCHIVE_DEPOT == Undefined ]]; then
    echo "Archive depot not supplied."
    echo "You must supply a name of the archive depot to archive files to."
    exit 1
fi

export P4_ARCHIVE_REPORT_MODE=${P4_ARCHIVE_REPORT_MODE:-true}
export P4_ARCHIVE_REPORT_MODE=${4:-$P4_ARCHIVE_REPORT_MODE}
export P4_ARCHIVE_TEXT=${P4_ARCHIVE_TEXT:-false}
export P4_ARCHIVE_TEXT=${5:-$P4_ARCHIVE_TEXT}
export P4_PURGE_MODE=${P4_PURGE_MODE:-false}
export P4_PURGE_MODE=${6:-$P4_PURGE_MODE}

. /p4/common/bin/p4_vars $SDP_INSTANCE
. /p4/common/bin/backup_functions.sh
LOGFILE=$LOGS/purge_revisions.log

######### Validate Config File ##########
# Check if config file exists and is readable
if [[ "${P4_ARCHIVE_CONFIG:0:2}" == '//' ]]; then
    P4_ARCHIVE_CONFIG_CONTENTS=($($P4BIN print -q "${P4_ARCHIVE_CONFIG}" 2>&1))
    # If p4 command returns the archive file path, then the file was not found
    if [[ "${P4_ARCHIVE_CONFIG_CONTENTS}" == "${P4_ARCHIVE_CONFIG}" ]]; then
        echo "Config file ${P4_ARCHIVE_CONFIG} does not exist or is not readable!"
    fi
    # Read contents of config file stored in perforce and get duplicates
    P4_ARCHIVE_CONFIG_DUPES=($($P4BIN print -q "${P4_ARCHIVE_CONFIG}" | $AWK '{ print $1 }' | uniq -D))
else
    if ! [[ -r "$P4_ARCHIVE_CONFIG" ]]; then
        die "Config file ${P4_ARCHIVE_CONFIG} does not exist or is not readable!"
    fi
    # Read contents of config file on disk and get duplicates
    P4_ARCHIVE_CONFIG_DUPES=($(cat ${P4_ARCHIVE_CONFIG} | $AWK '{ print $1 }' | uniq -D))
fi

# Ensure no duplicate paths exist
if [[ "${#P4_ARCHIVE_CONFIG_DUPES[@]}" -gt 0 ]]; then
    log "Duplicate path(s) found, please check config: ${P4_ARCHIVE_CONFIG}"
    log ${P4_ARCHIVE_CONFIG_DUPES[@]}
    die "Aborting, please remove duplicate path(s) from config file!"
fi

declare -a P4_ARCHIVE

# Load config file into array
if [[ "${P4_ARCHIVE_CONFIG:0:2}" == '//' ]]; then
    readarray P4_ARCHIVE <<< "$($P4BIN print -q "${P4_ARCHIVE_CONFIG}")"
else
    readarray P4_ARCHIVE < $P4_ARCHIVE_CONFIG
fi

######### Start of Script ##########
check_vars
set_vars
rotate_log_file $LOGFILE
log "Start $P4SERVER Purge Revisions"
check_uid
check_dirs
$P4CBIN/p4login

# Build P4 archive args based on flags
if [[ "${P4_ARCHIVE_REPORT_MODE,,}" == "true" ]]; then
    log "########## RUNNING IN REPORT MODE ##########"
    P4_ARCHIVE_ARGS="${P4_ARCHIVE_ARGS} -n"
fi
if [[ "${P4_ARCHIVE_TEXT,,}" == "true" ]]; then
    log "########## TEXT ARCHIVE MODE ENABLED ##########"
    P4_ARCHIVE_ARGS="${P4_ARCHIVE_ARGS} -t"
fi
if [[ "${P4_PURGE_MODE,,}" == "true" ]]; then
    log "########## PURGE MODE ENABLED ##########"
fi

# Parse archive revision config
let i=0
while (( ${#P4_ARCHIVE[@]} > i )); do
    # Split each line into separate vars
    IFS=$'\t' read -r ARCHIVE_PATH ARCHIVE_DAYS ARCHIVE_MIN_REV <<< "${P4_ARCHIVE[i]}"
    unset IFS

    # Ensure there are no empty vars
    if [[ -z "$ARCHIVE_PATH" || -z "$ARCHIVE_DAYS" || -z "$ARCHIVE_MIN_REV" ]]; then
        log "One or more vars found to be empty, make sure config file is tab delimited, skipping ${P4_ARCHIVE[i]}"
    # Look for excluded characters in path
    elif [[ "$ARCHIVE_PATH" =~ ($EXCLUDE_PATH_CHARS) ]]; then
        log "Invalid characters found in path ${ARCHIVE_PATH}, skipping."
    elif ! [[ "$ARCHIVE_DAYS" =~ ^[0-9]+$ ]]; then
        log "Invalid characters found in days ${ARCHIVE_DAYS} for ${ARCHIVE_PATH}, skipping."
    elif ! [[ "$ARCHIVE_MIN_REV" =~ ^[0-9]+$ ]]; then
        log "Invalid characters found in min rev ${ARCHIVE_MIN_REV} for ${ARCHIVE_PATH}, skipping."
    elif [[ "$ARCHIVE_PATH" =~ \*|\.\.\. ]]; then
        # Set split delimiter to new line so filenames with spaces aren't broken up
        IFS=$'\n'
        # If using wildcard or globbing, get list of all files in the path
        ARCHIVE_FILES=($($P4BIN -ztag -F "%depotFile%" files "${ARCHIVE_PATH}"))
        # Unset delimiter
        unset IFS
        # Loop through each path and archive/purge based on the days/min rev for that path
        for ARCHIVE_FILE in "${ARCHIVE_FILES[@]}"; do
            purge_revisions "$ARCHIVE_FILE" $ARCHIVE_DAYS $ARCHIVE_MIN_REV
        done
    else
        purge_revisions "$ARCHIVE_PATH" $ARCHIVE_DAYS $ARCHIVE_MIN_REV
    fi

    (( i++ ))
done

check_disk_space
remove_old_logs
log "End $P4SERVER Purge Revisions"
