daily_checkpoint.ps1 #3

  • //
  • guest/
  • russell_jackson/
  • sdp/
  • Server/
  • Windows/
  • p4/
  • common/
  • bin/
  • daily_checkpoint.ps1
  • View
  • Commits
  • Open Download .zip Download (43 KB)
#==============================================================================
# daily_checkpoint.ps1 - Offline checkpoint script for Perforce on Windows.
#
# Performs an offline checkpoint of the Perforce server database without
# requiring the SDP or any particular directory layout.  All paths are
# supplied as parameters or derived from a configuration file.
#
# Workflow:
#   1. Validate environment and directories
#   2. Rotate the active journal (master only; edge/replica/standby servers
#      skip rotation and wait for the already-rotated journal via replication)
#   3. Replay numbered journals to the offline database
#   4. Dump a new checkpoint from the offline database
#   5. Recreate the offline database from the new checkpoint
#   6. Compress and archive journals
#   7. Clean up old checkpoints, journals, and logs
#
# Usage:
#   .\daily_checkpoint.ps1 -P4ROOT D:\p4data\root -OfflineDB D:\p4data\offline_db `
#       -Checkpoints E:\backups\checkpoints -Journals E:\backups\journals `
#       -Logs D:\p4data\logs -P4D C:\p4\p4d.exe -P4 C:\p4\p4.exe `
#       -P4PORT ssl:1666 -P4USER perforce
#
#   .\daily_checkpoint.ps1 -ConfigFile D:\p4data\checkpoint_config.ps1
#
#   Multi-part parallel checkpoints (p4d 2023.2+, creates numbered part files):
#   .\daily_checkpoint.ps1 -ConfigFile D:\p4data\checkpoint_config.ps1 -MultiPartParallel
#
# Configuration file:
#   Source a .ps1 file that sets the $script: variables listed below.
#   See the parameter descriptions for what each one controls.
#
#==============================================================================

[CmdletBinding()]
param(
    # Path to a .ps1 configuration file that sets $script: variables.
    # When provided, all other parameters become optional overrides.
    [Parameter()]
    [string]$ConfigFile,

    # P4ROOT - the live server database directory (contains db.* files).
    [Parameter()]
    [string]$P4ROOT,

    # Offline database directory (a second copy of db.* used for checkpoints).
    [Parameter()]
    [string]$OfflineDB,

    # Directory where checkpoint files are stored.
    [Parameter()]
    [string]$Checkpoints,

    # Directory where rotated journal files are stored.
    [Parameter()]
    [string]$Journals,

    # Directory for log files.
    [Parameter()]
    [string]$Logs,

    # Full path to p4d.exe.
    [Parameter()]
    [string]$P4D,

    # Full path to p4.exe.
    [Parameter()]
    [string]$P4,

    # Perforce server port (e.g. ssl:1666, 1666).
    [Parameter()]
    [string]$P4PORT,

    # Port of the master/commit server (defaults to P4PORT).
    [Parameter()]
    [string]$P4MASTERPORT,

    # Perforce admin user for journal rotation and counter updates.
    [Parameter()]
    [string]$P4USER,

    # Prefix for checkpoint/journal filenames (e.g. "p4_master"). Defaults to "p4".
    [Parameter()]
    [string]$ServerName,

    # Number of checkpoints to retain. 0 = keep all.
    [Parameter()]
    [int]$KeepCheckpoints = 7,

    # Number of journals to retain. 0 = keep all.
    [Parameter()]
    [int]$KeepJournals = 112,

    # Number of log files to retain. 0 = keep all.
    [Parameter()]
    [int]$KeepLogs = 350,

    # Number of threads for parallel checkpoints (p4d 2022.1+). 0 = disable parallel.
    [Parameter()]
    [int]$ParallelThreads = 32,

    # SMTP server for email notifications. Leave empty to disable.
    [Parameter()]
    [string]$SmtpServer,

    # Email recipient for error notifications.
    [Parameter()]
    [string]$MailTo,

    # Email sender address.
    [Parameter()]
    [string]$MailFrom,

    # Path to a password file (first line = password) for p4 login.
    [Parameter()]
    [string]$PasswordFile,

    # Enable multi-part parallel checkpoints (p4d 2023.2+).
    # Creates numbered part files (e.g. p4.ckp.123.0.gz, p4.ckp.123.1.gz, ...)
    # instead of a single directory.  Requires ParallelThreads > 0.
    [Parameter()]
    [switch]$MultiPartParallel
)

Set-StrictMode -Version Latest
$ErrorActionPreference = "Stop"

#==============================================================================
# Script-scope state
#==============================================================================
$script:P4ROOT       = $null
$script:OFFLINE_DB   = $null
$script:CHECKPOINTS  = $null
$script:JOURNALS     = $null
$script:LOGS         = $null
$script:P4BIN        = $null
$script:P4DBIN       = $null
$script:P4PORT       = $null
$script:P4MASTERPORT = $null
$script:P4USER       = $null
$script:P4SERVER     = $null
$script:LOGFILE      = $null

$script:SERVERID          = $null
$script:SERVER_TYPE       = "p4d_master"
$script:JOURNALNUM        = 0
$script:CHECKPOINTNUM     = 0
$script:OFFLINEJNLNUM     = 0
$script:EDGESERVER        = 0
$script:REPLICASERVER     = 0
$script:STANDBYSERVER     = 0
$script:DoParallelCheckpoints    = 0
$script:DoMultiPartParallel      = 0
$script:NumCheckPointThreads     = 32
$script:P4D_VERSION              = $null

$script:KEEPCKPS  = 7
$script:KEEPJNLS  = 112
$script:KEEPLOGS  = 350

$script:MAILTO     = $null
$script:MAILFROM   = $null
$script:SMTPSERVER = $null
$script:PASSWORDFILE = $null

#==============================================================================
# Logging
#==============================================================================
function Write-Log {
    param([string]$Message)

    $timestamp = Get-Date -Format "ddd MM/dd/yyyy HH:mm:ss"
    $entry = "$timestamp $($MyInvocation.ScriptName): $Message"

    if ($script:LOGFILE) {
        Add-Content -Path $script:LOGFILE -Value $entry
    } else {
        Write-Host $entry
    }
}

#==============================================================================
# Die - log error, optionally send email, remove semaphore, and exit.
#==============================================================================
function Invoke-Die {
    param([string]$Message)

    Write-Log "ERROR!!! - $env:COMPUTERNAME $script:P4SERVER $($MyInvocation.ScriptName): $Message"

    if ($script:LOGFILE -and $script:MAILTO -and $script:SMTPSERVER) {
        try {
            $from = if ($script:MAILFROM) { $script:MAILFROM } else { "perforce@$env:COMPUTERNAME" }
            Send-MailMessage -To $script:MAILTO -From $from `
                -Subject "ERROR!!! - $env:COMPUTERNAME $script:P4SERVER checkpoint" `
                -Body (Get-Content $script:LOGFILE -Raw) `
                -SmtpServer $script:SMTPSERVER -ErrorAction SilentlyContinue
        } catch {
            Write-Log "Warning: Failed to send email notification: $_"
        }
    }

    # Remove semaphore
    if ($script:LOGS) {
        $semaphore = Join-Path $script:LOGS "ckp_running.txt"
        if (Test-Path $semaphore) { Remove-Item -Force $semaphore }
    }

    throw $Message
}

#==============================================================================
# Initialize environment from parameters and optional config file.
#==============================================================================
function Initialize-Environment {
    # Load config file first (parameters override it below)
    if ($ConfigFile) {
        if (-not (Test-Path $ConfigFile)) {
            Write-Host "Error: Configuration file '$ConfigFile' not found."
            exit 1
        }
        . $ConfigFile
    }

    # Apply parameter overrides — parameters take precedence over config file
    if ($P4ROOT)       { $script:P4ROOT       = $P4ROOT }
    if ($OfflineDB)    { $script:OFFLINE_DB    = $OfflineDB }
    if ($Checkpoints)  { $script:CHECKPOINTS   = $Checkpoints }
    if ($Journals)     { $script:JOURNALS      = $Journals }
    if ($Logs)         { $script:LOGS          = $Logs }
    if ($P4D)          { $script:P4DBIN        = $P4D }
    if ($P4)           { $script:P4BIN         = $P4 }
    if ($P4PORT)       { $script:P4PORT        = $P4PORT }
    if ($P4MASTERPORT) { $script:P4MASTERPORT  = $P4MASTERPORT }
    if ($P4USER)       { $script:P4USER        = $P4USER }
    if ($ServerName)   { $script:P4SERVER      = $ServerName }
    if ($SmtpServer)   { $script:SMTPSERVER    = $SmtpServer }
    if ($MailTo)       { $script:MAILTO        = $MailTo }
    if ($MailFrom)     { $script:MAILFROM      = $MailFrom }
    if ($PasswordFile) { $script:PASSWORDFILE  = $PasswordFile }

    $script:KEEPCKPS = $KeepCheckpoints
    $script:KEEPJNLS = $KeepJournals
    $script:KEEPLOGS = $KeepLogs
    $script:NumCheckPointThreads = $ParallelThreads
    if ($MultiPartParallel) { $script:DoMultiPartParallel = 1 }

    # Default P4MASTERPORT to P4PORT
    if (-not $script:P4MASTERPORT) { $script:P4MASTERPORT = $script:P4PORT }

    # Default server name
    if (-not $script:P4SERVER) { $script:P4SERVER = "p4" }

    # Read server.id if present
    if ($script:P4ROOT -and (Test-Path (Join-Path $script:P4ROOT "server.id"))) {
        $script:SERVERID = (Get-Content (Join-Path $script:P4ROOT "server.id") -First 1).Trim()
    }

    # Read server type if present
    if ($script:P4ROOT) {
        $serverTypeFile = Join-Path $script:P4ROOT "server_type.txt"
        if (Test-Path $serverTypeFile) {
            $script:SERVER_TYPE = (Get-Content $serverTypeFile -First 1).Trim()
        }
    }

    # Detect p4d version for parallel checkpoint support
    if ($script:P4DBIN -and (Test-Path $script:P4DBIN)) {
        try {
            $versionOutput = & $script:P4DBIN -V 2>&1
            $versionLine = $versionOutput | Where-Object { $_ -match "Rev\." } | Select-Object -First 1
            if ($versionLine -match "(\d{4}\.\d)") {
                $script:P4D_VERSION = $Matches[1]
            }
        } catch {
            $script:P4D_VERSION = "0"
        }
    }

    # Set log file
    if ($script:LOGS) {
        $script:LOGFILE = Join-Path $script:LOGS "checkpoint.log"
    }
}

#==============================================================================
# Determine server topology (edge, replica, standby).
#==============================================================================
function Set-ServerVars {
    # Parallel checkpoint support for p4d 2022.1+
    if ($script:P4D_VERSION -and $script:P4D_VERSION -gt "2022.1" -and $script:NumCheckPointThreads -gt 0) {
        $script:DoParallelCheckpoints = 1
    }

    # Multi-part parallel checkpoint support for p4d 2023.2+
    if ($script:DoMultiPartParallel -eq 1) {
        if ($script:NumCheckPointThreads -le 0) {
            Invoke-Die "MultiPartParallel requires ParallelThreads > 0."
        }
        if (-not ($script:P4D_VERSION -and $script:P4D_VERSION -ge "2023.2")) {
            Invoke-Die "MultiPartParallel requires p4d 2023.2 or later (detected: $($script:P4D_VERSION))."
        }
        # Multi-part supersedes directory-based parallel
        $script:DoParallelCheckpoints = 0
    }

    # Detect edge/replica/standby from db.server
    if ($script:SERVERID) {
        $servicesData = $null
        try {
            $output = & $script:P4DBIN -r $script:P4ROOT -J off -L NUL -k db.server -jd - 2>&1
            $serverLine = $output | Where-Object { $_ -match "@db.server@ @$($script:SERVERID)@" }
            if ($serverLine) {
                $fields = $serverLine -split "@"
                if ($fields.Count -ge 13) {
                    $servicesData = [int]$fields[12]
                }
            }
        } catch { }

        if ($servicesData) {
            $script:EDGESERVER    = if ($servicesData -band 4096) { 1 } else { 0 }
            $script:REPLICASERVER = if ($servicesData -eq 2533) { 1 } else { 0 }
            $script:STANDBYSERVER = if ($servicesData -eq 35141 -or $servicesData -eq 35301) { 1 } else { 0 }
        }
    }
}

#==============================================================================
# Validate required variables and disk space.
#==============================================================================
function Test-Vars {
    $requiredVars = @(
        @{ Name = "P4ROOT";     Value = $script:P4ROOT },
        @{ Name = "OfflineDB";  Value = $script:OFFLINE_DB },
        @{ Name = "Checkpoints"; Value = $script:CHECKPOINTS },
        @{ Name = "Journals";   Value = $script:JOURNALS },
        @{ Name = "Logs";       Value = $script:LOGS },
        @{ Name = "P4D";        Value = $script:P4DBIN },
        @{ Name = "P4";         Value = $script:P4BIN },
        @{ Name = "P4PORT";     Value = $script:P4PORT },
        @{ Name = "P4USER";     Value = $script:P4USER }
    )

    $ok = $true
    foreach ($var in $requiredVars) {
        if (-not $var.Value) {
            Write-Host "Error: Required parameter -$($var.Name) is NOT set."
            $ok = $false
        }
    }

    foreach ($bin in @($script:P4BIN, $script:P4DBIN)) {
        if ($bin -and -not (Test-Path $bin)) {
            Write-Host "Error: Binary not found: $bin"
            $ok = $false
        }
    }

    if (-not $ok) {
        Write-Host "Aborting due to errors in preflight checks."
        Write-Host "Supply required values via parameters or a -ConfigFile."
        exit 1
    }

    # Check disk space (~2GB minimum on offline_db volume)
    if (Test-Path $script:OFFLINE_DB) {
        $drive = (Resolve-Path $script:OFFLINE_DB).Drive
        if ($drive) {
            $freeGB = [math]::Round((Get-PSDrive $drive.Name).Free / 1GB, 2)
            if ($freeGB -lt 2) {
                Invoke-Die "Available space on offline_db volume is ${freeGB}GB, less than the required 2GB."
            }
        }
    }
}

#==============================================================================
# Verify required directories exist and are accessible.
#==============================================================================
function Test-Dirs {
    $dirsOK = $true
    $badDirs = @()

    foreach ($dir in @($script:OFFLINE_DB, $script:CHECKPOINTS, $script:JOURNALS, $script:LOGS)) {
        if (-not (Test-Path $dir)) {
            Write-Log "Error: Dir $dir does not exist."
            $badDirs += "$dir (missing)"
            $dirsOK = $false
        } else {
            try {
                $testFile = Join-Path $dir ".p4_write_test"
                [IO.File]::WriteAllText($testFile, "test")
                Remove-Item $testFile -Force
            } catch {
                Write-Log "Error: Dir $dir is not writable."
                $badDirs += "$dir (not writable)"
                $dirsOK = $false
            }
        }
    }

    if (-not $dirsOK) {
        Invoke-Die "Directory check failed: $($badDirs -join ', '). Aborting."
    }
}

#==============================================================================
# Check the offline database is in a usable state.
#==============================================================================
function Test-OfflineDbUsable {
    $usableFile = Join-Path $script:OFFLINE_DB "offline_db_usable.txt"
    if (-not (Test-Path $usableFile)) {
        Invoke-Die "Offline database not in a usable state. Expected file $usableFile not found. Rebuild the offline database from a checkpoint first."
    }

    $countersFile = Join-Path $script:OFFLINE_DB "db.counters"
    if (-not (Test-Path $countersFile)) {
        Invoke-Die "Offline database file $countersFile not found. Create it by running a live checkpoint first."
    }
}

#==============================================================================
# Semaphore management - prevent concurrent checkpoint operations.
#==============================================================================
function Set-CkpRunning {
    $semaphore = Join-Path $script:LOGS "ckp_running.txt"
    if (Test-Path $semaphore) {
        Invoke-Die "Last checkpoint not complete. Semaphore file $semaphore exists. Check the backup process or remove this file if the prior run is confirmed to have finished."
    }
    "Checkpoint running." | Out-File -FilePath $semaphore -Encoding UTF8
}

function Remove-CkpRunning {
    $semaphore = Join-Path $script:LOGS "ckp_running.txt"
    if (Test-Path $semaphore) { Remove-Item -Force $semaphore }
}

#==============================================================================
# Get journal number from live db.counters.
#==============================================================================
function Get-JournalNum {
    $countersFile = Join-Path $script:P4ROOT "db.counters"

    if (Test-Path $countersFile) {
        $output = & $script:P4DBIN -r $script:P4ROOT -k db.counters -jd - 2>&1
        $journalLine = $output | Where-Object { $_ -match "@journal@" }
        if ($journalLine) {
            $fields = ($journalLine -split "@")
            $nextCheckpointNum = $fields[7]
            if ($nextCheckpointNum -match '^\d+$') {
                $script:JOURNALNUM = [int]$nextCheckpointNum
            } else {
                Invoke-Die "The journal counter value [$nextCheckpointNum] is invalid. It must be numeric."
            }
        } else {
            $script:JOURNALNUM = 0
        }
    } else {
        $script:JOURNALNUM = 0
    }

    # Edge/replica/standby: journal already rotated on master
    if ($script:EDGESERVER -eq 1 -or $script:REPLICASERVER -eq 1 -or $script:STANDBYSERVER -eq 1) {
        $script:JOURNALNUM = $script:JOURNALNUM - 1
    }

    $script:CHECKPOINTNUM = $script:JOURNALNUM + 1
}

#==============================================================================
# Get journal number from offline db.counters.
#==============================================================================
function Get-OfflineJournalNum {
    Test-OfflineDbUsable

    $output = & $script:P4DBIN -r $script:OFFLINE_DB -jd - db.counters 2>&1
    $journalLine = $output | Where-Object { $_ -match "@journal@" }
    if ($journalLine) {
        $fields = ($journalLine -split "@")
        $offlineNum = $fields[7]
        if ($offlineNum -match '^\d+$') {
            $script:OFFLINEJNLNUM = [int]$offlineNum
        } else {
            Invoke-Die "The offline journal counter value [$offlineNum] is invalid."
        }
    } else {
        Invoke-Die "Cannot get the offline journal number from $($script:OFFLINE_DB)\db.counters."
    }

    Write-Log "Offline journal number is: $($script:OFFLINEJNLNUM)"
    $script:CHECKPOINTNUM = $script:OFFLINEJNLNUM
    Write-Log "Offline checkpoint number is: $($script:CHECKPOINTNUM)"
}

#==============================================================================
# Rotate the active journal via 'p4 admin journal'.
#==============================================================================
function Invoke-TruncateJournal {
    $checkpointFile = Join-Path $script:CHECKPOINTS "$($script:P4SERVER).ckp.$($script:CHECKPOINTNUM).gz"
    $journalFile    = Join-Path $script:JOURNALS "$($script:P4SERVER).jnl.$($script:JOURNALNUM)"

    if ($script:EDGESERVER -eq 1 -or $script:REPLICASERVER -eq 1 -or $script:STANDBYSERVER -eq 1) {
        # Edge/replica/standby servers do not rotate the journal themselves.
        # The master rotates it and the numbered journal arrives via replication.
        Write-Log "This is an edge/replica/standby server (SERVER_TYPE=$($script:SERVER_TYPE)). Skipping journal rotation."
        Write-Log "Waiting for rotated journal file $journalFile to appear via replication..."

        $maxWait = 360
        $waitCount = 0
        while (-not (Test-Path $journalFile)) {
            Start-Sleep -Seconds 5
            $waitCount++
            if ($waitCount -ge $maxWait) {
                Invoke-Die "Timed out waiting for replicated journal file $journalFile after $($maxWait * 5) seconds. Ensure the master has rotated its journal."
            }
        }

        Write-Log "Rotated journal file $journalFile found."
    } elseif ($script:SERVER_TYPE -eq "p4d_master") {
        if (Test-Path $checkpointFile) {
            Invoke-Die "Checkpoint $checkpointFile already exists, check the backup process."
        }
        if (Test-Path $journalFile) {
            Invoke-Die "Journal $journalFile already exists, check the backup process."
        }

        Write-Log "Truncating journal..."
        Invoke-P4Login

        $sw = [System.Diagnostics.Stopwatch]::StartNew()
        $result = & $script:P4BIN -p $script:P4MASTERPORT admin journal 2>&1
        $sw.Stop()
        Write-Log "Journal rotation completed in $($sw.Elapsed.TotalSeconds) seconds."

        if ($LASTEXITCODE -ne 0) {
            Write-Log ($result | Out-String)
            Invoke-Die "Journal rotation on $($script:P4MASTERPORT) failed."
        }

        # Wait for the rotated journal file to appear
        $maxWait = 360
        $waitCount = 0
        while (-not (Test-Path $journalFile)) {
            Start-Sleep -Seconds 5
            $waitCount++
            if ($waitCount -ge $maxWait) {
                Invoke-Die "Timed out waiting for journal file $journalFile after $($maxWait * 5) seconds."
            }
        }

        Invoke-P4Login
    } else {
        Write-Log "Warning: Unrecognized server type '$($script:SERVER_TYPE)'. Proceeding with journal rotation as master."
        if (Test-Path $checkpointFile) {
            Invoke-Die "Checkpoint $checkpointFile already exists, check the backup process."
        }
        if (Test-Path $journalFile) {
            Invoke-Die "Journal $journalFile already exists, check the backup process."
        }

        Write-Log "Truncating journal..."
        Invoke-P4Login

        $sw = [System.Diagnostics.Stopwatch]::StartNew()
        $result = & $script:P4BIN -p $script:P4MASTERPORT admin journal 2>&1
        $sw.Stop()
        Write-Log "Journal rotation completed in $($sw.Elapsed.TotalSeconds) seconds."

        if ($LASTEXITCODE -ne 0) {
            Write-Log ($result | Out-String)
            Invoke-Die "Journal rotation on $($script:P4MASTERPORT) failed."
        }

        $maxWait = 360
        $waitCount = 0
        while (-not (Test-Path $journalFile)) {
            Start-Sleep -Seconds 5
            $waitCount++
            if ($waitCount -ge $maxWait) {
                Invoke-Die "Timed out waiting for journal file $journalFile after $($maxWait * 5) seconds."
            }
        }

        Invoke-P4Login
    }
}

#==============================================================================
# Login to Perforce.
#==============================================================================
function Invoke-P4Login {
    if ($script:PASSWORDFILE -and (Test-Path $script:PASSWORDFILE)) {
        $password = (Get-Content $script:PASSWORDFILE -First 1).Trim()
        $password | & $script:P4BIN -u $script:P4USER -p $script:P4PORT login 2>&1 | Out-Null
    }
}

#==============================================================================
# Replay numbered journal files to the offline database.
#==============================================================================
function Invoke-ReplayJournalsToOfflineDb {
    Write-Log "Replay any unreplayed journals to the offline database."
    Test-OfflineDbUsable
    Get-OfflineJournalNum

    # Replay from checkpoints directory
    Invoke-ReplayJournalsFromDir -Dir $script:CHECKPOINTS
    Get-OfflineJournalNum
    # Replay from journals directory
    Invoke-ReplayJournalsFromDir -Dir $script:JOURNALS
}

function Invoke-ReplayJournalsFromDir {
    param([string]$Dir)

    $jnlFiles = Get-ChildItem -Path $Dir -Filter "*.jnl.*" -File -ErrorAction SilentlyContinue |
        Where-Object { $_.Name -match '\.jnl\.\d+(\.gz)?$' }

    if (-not $jnlFiles -or $jnlFiles.Count -eq 0) { return }

    # Extract journal numbers
    $jnlNums = @()
    foreach ($f in $jnlFiles) {
        if ($f.Name -match '\.jnl\.(\d+)(\.gz)?$') {
            $jnlNums += [int]$Matches[1]
        }
    }
    $jnlNums = $jnlNums | Sort-Object -Unique
    if ($jnlNums.Count -eq 0) { return }

    $firstNum = ($jnlNums | Measure-Object -Minimum).Minimum
    $lastNum  = ($jnlNums | Measure-Object -Maximum).Maximum

    Write-Log "FIRSTJOURNALNUM=$firstNum"
    Write-Log "LASTJOURNALNUM=$lastNum"
    Write-Log "OFFLINEJNLNUM=$($script:OFFLINEJNLNUM)"

    if ($firstNum -le $script:OFFLINEJNLNUM) {
        for ($j = $script:OFFLINEJNLNUM; $j -le $lastNum; $j++) {
            $numberedJournal   = Join-Path $Dir "$($script:P4SERVER).jnl.$j"
            $numberedJournalGz = "${numberedJournal}.gz"
            $usableFile        = Join-Path $script:OFFLINE_DB "offline_db_usable.txt"

            $replayTarget = $null
            if (Test-Path $numberedJournalGz)  { $replayTarget = $numberedJournalGz }
            elseif (Test-Path $numberedJournal) { $replayTarget = $numberedJournal }
            else { continue }

            Write-Log "Replay journal $replayTarget to offline db."
            Remove-Item -Force $usableFile -ErrorAction SilentlyContinue

            $sw = [System.Diagnostics.Stopwatch]::StartNew()
            & $script:P4DBIN -r $script:OFFLINE_DB -jr -f $replayTarget 2>&1 |
                Out-File -Append -FilePath $script:LOGFILE
            $sw.Stop()

            if ($LASTEXITCODE -ne 0) {
                Invoke-Die "Offline journal replay of $replayTarget to $($script:OFFLINE_DB) failed."
            }
            Write-Log "Journal replay completed in $($sw.Elapsed.TotalSeconds) seconds."
            "Offline journal files restored successfully." | Out-File -FilePath $usableFile -Encoding UTF8
        }
    }
}

#==============================================================================
# Dump a new checkpoint from the offline database.
#==============================================================================
function Invoke-DumpCheckpoint {
    $rootDir = $script:OFFLINE_DB

    Write-Log "Dump out new checkpoint from db files in $rootDir."

    $newCheckpoint    = Join-Path $script:CHECKPOINTS "$($script:P4SERVER).ckp.$($script:CHECKPOINTNUM).gz"
    $newCheckpointMD5 = "${newCheckpoint}.md5"

    # Skip if checkpoint already exists with MD5
    if ((Test-Path $newCheckpoint) -and (Test-Path $newCheckpointMD5)) {
        Write-Log "Warning: Skipping generation of existing checkpoint $newCheckpoint. MD5 file already exists."
        return
    }

    if ($script:DoMultiPartParallel -eq 1) {
        $multiPartPrefix = Join-Path $script:CHECKPOINTS "$($script:P4SERVER).ckp.$($script:CHECKPOINTNUM)"
        $checkpointCmd = "$($script:P4DBIN) -r `"$rootDir`" --multipart -z -jd -N $($script:NumCheckPointThreads) `"$multiPartPrefix`""
        Write-Log "Running: $checkpointCmd"

        $sw = [System.Diagnostics.Stopwatch]::StartNew()
        & $script:P4DBIN -r $rootDir --multipart -z -jd -N $script:NumCheckPointThreads $multiPartPrefix 2>&1 |
            Out-File -Append -FilePath $script:LOGFILE
        $sw.Stop()
    } elseif ($script:DoParallelCheckpoints -eq 1) {
        $parallelDir = Join-Path $script:CHECKPOINTS "$($script:P4SERVER).ckp.$($script:CHECKPOINTNUM)"
        $checkpointCmd = "$($script:P4DBIN) -r `"$rootDir`" -z -jdpm -N $($script:NumCheckPointThreads) `"$parallelDir`""
        Write-Log "Running: $checkpointCmd"

        $sw = [System.Diagnostics.Stopwatch]::StartNew()
        & $script:P4DBIN -r $rootDir -z -jdpm -N $script:NumCheckPointThreads $parallelDir 2>&1 |
            Out-File -Append -FilePath $script:LOGFILE
        $sw.Stop()
    } else {
        $checkpointCmd = "$($script:P4DBIN) -r `"$rootDir`" -jd -z `"$newCheckpoint`""
        Write-Log "Running: $checkpointCmd"

        $sw = [System.Diagnostics.Stopwatch]::StartNew()
        & $script:P4DBIN -r $rootDir -jd -z $newCheckpoint 2>&1 |
            Out-File -Append -FilePath $script:LOGFILE
        $sw.Stop()
    }

    if ($LASTEXITCODE -ne 0) {
        Invoke-Die "New checkpoint dump from $rootDir FAILED. Command was: $checkpointCmd"
    }

    Write-Log "New checkpoint dump succeeded in $($sw.Elapsed.TotalSeconds) seconds."
}

#==============================================================================
# Recreate the offline database from the latest checkpoint.
#==============================================================================
function Invoke-RecreateOfflineDbFiles {
    $checkpointsDir = $script:CHECKPOINTS

    # Find the latest checkpoint
    if ($script:DoMultiPartParallel -eq 1) {
        # Multi-part parallel creates files like p4.ckp.123.0.gz, p4.ckp.123.1.gz, ...
        # and a corresponding .md5 file for the set.
        $md5Files = Get-ChildItem -Path $checkpointsDir -Filter "$($script:P4SERVER).ckp.*.md5" -File -ErrorAction SilentlyContinue |
            Sort-Object LastWriteTime -Descending
        if (-not $md5Files -or $md5Files.Count -eq 0) {
            Remove-CkpRunning
            Invoke-Die "No multi-part parallel checkpoints found in $checkpointsDir. Run a live checkpoint first."
        }
        $latestMD5 = $md5Files[0].FullName
    } elseif ($script:DoParallelCheckpoints -eq 1) {
        $md5Files = Get-ChildItem -Path $checkpointsDir -Filter "*.md5" -File -ErrorAction SilentlyContinue |
            Sort-Object LastWriteTime -Descending
        if (-not $md5Files -or $md5Files.Count -eq 0) {
            Remove-CkpRunning
            Invoke-Die "No parallel checkpoint dirs found in $checkpointsDir. Run a live checkpoint first."
        }
        $latestMD5 = $md5Files[0].FullName
    } else {
        $md5Files = Get-ChildItem -Path $checkpointsDir -Filter "$($script:P4SERVER).ckp.*.md5" -File -ErrorAction SilentlyContinue |
            Sort-Object LastWriteTime -Descending
        if (-not $md5Files -or $md5Files.Count -eq 0) {
            Remove-CkpRunning
            Invoke-Die "No checkpoints found in $checkpointsDir with prefix $($script:P4SERVER). Run a live checkpoint first."
        }
        $latestMD5 = $md5Files[0].FullName
    }

    # Remove old offline db files
    $usableFile = Join-Path $script:OFFLINE_DB "offline_db_usable.txt"
    Remove-Item -Force $usableFile -ErrorAction SilentlyContinue
    Get-ChildItem -Path $script:OFFLINE_DB -Filter "db.*" -File -ErrorAction SilentlyContinue |
        Remove-Item -Force
    $saveDir = Join-Path $script:OFFLINE_DB "save"
    if (Test-Path $saveDir) {
        Get-ChildItem -Path $saveDir -Filter "db.*" -File -ErrorAction SilentlyContinue |
            Remove-Item -Force
    }

    # Restore from checkpoint
    if ($script:DoMultiPartParallel -eq 1) {
        $multiPartPrefix = $latestMD5 -replace '\.md5$', ''
        Write-Log "Recovering from last multi-part parallel checkpoint, $multiPartPrefix."
        $restoreCmd = "$($script:P4DBIN) -r `"$($script:OFFLINE_DB)`" --multipart -z -jr -N $($script:NumCheckPointThreads) `"$multiPartPrefix`""
        Write-Log "Running: $restoreCmd"

        $sw = [System.Diagnostics.Stopwatch]::StartNew()
        & $script:P4DBIN -r $script:OFFLINE_DB --multipart -z -jr -N $script:NumCheckPointThreads $multiPartPrefix 2>&1 |
            Out-File -Append -FilePath $script:LOGFILE
        $sw.Stop()
    } elseif ($script:DoParallelCheckpoints -eq 1) {
        $parallelDir = $latestMD5 -replace '\.md5$', ''
        Write-Log "Recovering from last parallel checkpoint dir, $parallelDir."
        $restoreCmd = "$($script:P4DBIN) -r `"$($script:OFFLINE_DB)`" -z -jrp -N $($script:NumCheckPointThreads) `"$parallelDir`""
        Write-Log "Running: $restoreCmd"

        $sw = [System.Diagnostics.Stopwatch]::StartNew()
        & $script:P4DBIN -r $script:OFFLINE_DB -z -jrp -N $script:NumCheckPointThreads $parallelDir 2>&1 |
            Out-File -Append -FilePath $script:LOGFILE
        $sw.Stop()
    } else {
        if ($latestMD5 -match '\.gz\.md5$') {
            $lastCheckpoint = $latestMD5 -replace '\.md5$', ''
        } else {
            $lastCheckpoint = ($latestMD5 -replace '\.md5$', '') + ".gz"
        }

        if (-not (Test-Path $lastCheckpoint)) {
            Invoke-Die "Missing last checkpoint file: $lastCheckpoint. Abort!"
        }

        Write-Log "Recovering from last full checkpoint, $lastCheckpoint."
        $restoreCmd = "$($script:P4DBIN) -r `"$($script:OFFLINE_DB)`" -jr -z `"$lastCheckpoint`""
        Write-Log "Running: $restoreCmd"

        $sw = [System.Diagnostics.Stopwatch]::StartNew()
        & $script:P4DBIN -r $script:OFFLINE_DB -jr -z $lastCheckpoint 2>&1 |
            Out-File -Append -FilePath $script:LOGFILE
        $sw.Stop()
    }

    if ($LASTEXITCODE -ne 0) {
        Invoke-Die "Restore of checkpoint to $($script:OFFLINE_DB) failed!"
    }

    Write-Log "Offline db restored in $($sw.Elapsed.TotalSeconds) seconds."
    "Offline db file restored successfully." | Out-File -FilePath $usableFile -Encoding UTF8
}

#==============================================================================
# Compress and move old journals to the checkpoints directory.
#==============================================================================
function Invoke-GzipMvJournals {
    Write-Log "Compress journals and move to checkpoints volume."

    $jnlFiles = Get-ChildItem -Path $script:JOURNALS -Filter "*.jnl.*" -File -ErrorAction SilentlyContinue |
        Where-Object { $_.Name -notmatch '\.gz$' }

    if (-not $jnlFiles -or $jnlFiles.Count -le 2) { return }

    $jnlNums = @()
    foreach ($f in $jnlFiles) {
        if ($f.Name -match '\.jnl\.(\d+)$') {
            $jnlNums += [int]$Matches[1]
        }
    }
    $jnlNums = $jnlNums | Sort-Object

    if ($jnlNums.Count -le 2) { return }

    # Compress all but the latest
    $firstNum = $jnlNums[0]
    $lastNum  = $jnlNums[-2]

    Write-Log "FIRSTJOURNALNUM=$firstNum"
    Write-Log "LASTJOURNALNUM=$lastNum"

    for ($j = $firstNum; $j -le $lastNum; $j++) {
        $numberedJournal = Join-Path $script:JOURNALS "$($script:P4SERVER).jnl.$j"
        if (Test-Path $numberedJournal) {
            Write-Log "Compressing $numberedJournal"

            $sw = [System.Diagnostics.Stopwatch]::StartNew()

            $sourceFile = $numberedJournal
            $destFile   = Join-Path $script:CHECKPOINTS "$($script:P4SERVER).jnl.${j}.gz"
            try {
                $sourceStream = [System.IO.File]::OpenRead($sourceFile)
                $destStream   = [System.IO.File]::Create($destFile)
                $gzipStream   = New-Object System.IO.Compression.GZipStream($destStream,
                    [System.IO.Compression.CompressionMode]::Compress)
                $sourceStream.CopyTo($gzipStream)
                $gzipStream.Close()
                $destStream.Close()
                $sourceStream.Close()

                Remove-Item -Force $sourceFile
            } catch {
                Invoke-Die "Compression of $numberedJournal failed: $_"
            }

            $sw.Stop()
            Write-Log "Compressed in $($sw.Elapsed.TotalSeconds) seconds."
        }
    }
}

#==============================================================================
# Remove old checkpoints and journals based on retention settings.
#==============================================================================
function Remove-OldCheckpointsAndJournals {
    if ($script:KEEPCKPS -eq 0) {
        Write-Log "Skipping cleanup of old checkpoints because KeepCheckpoints is set to 0."
        return
    }

    Write-Log "Deleting obsolete checkpoints and journals. Keeping latest $($script:KEEPCKPS)."

    $checkpointsDir = $script:CHECKPOINTS

    if (Test-Path $checkpointsDir) {
        # Clean up multi-part parallel checkpoint files (e.g. p4.ckp.123.0.gz, p4.ckp.123.1.gz, ...)
        $multiPartFiles = Get-ChildItem -Path $checkpointsDir -Filter "$($script:P4SERVER).ckp.*" -File -ErrorAction SilentlyContinue |
            Where-Object { $_.Name -match '\.ckp\.(\d+)\.\d+\.gz(\.md5)?$' }
        if ($multiPartFiles) {
            $multiPartNums = $multiPartFiles |
                ForEach-Object {
                    if ($_.Name -match '\.ckp\.(\d+)\.\d+\.gz') {
                        [int]$Matches[1]
                    }
                } |
                Sort-Object -Unique -Descending

            $multiPartToRemove = $multiPartNums | Select-Object -Skip $script:KEEPCKPS
            foreach ($num in $multiPartToRemove) {
                $pattern = "$($script:P4SERVER).ckp.${num}.*"
                Write-Log "Removing multi-part checkpoint $pattern"
                Get-ChildItem -Path $checkpointsDir -Filter $pattern -File -ErrorAction SilentlyContinue |
                    Remove-Item -Force
            }
        }

        # Clean up parallel checkpoint directories
        $parallelDirs = Get-ChildItem -Path $checkpointsDir -Directory -Filter "$($script:P4SERVER).ckp.*" -ErrorAction SilentlyContinue
        if ($parallelDirs) {
            $dirsToRemove = $parallelDirs |
                ForEach-Object {
                    if ($_.Name -match '\.ckp\.(\d+)$') {
                        [PSCustomObject]@{ Dir = $_; Num = [int]$Matches[1] }
                    }
                } |
                Sort-Object Num -Descending |
                Select-Object -Skip $script:KEEPCKPS

            foreach ($item in $dirsToRemove) {
                $pattern = "$($script:P4SERVER).ckp.$($item.Num)*"
                Write-Log "Removing $pattern"
                Get-ChildItem -Path $checkpointsDir -Filter $pattern -ErrorAction SilentlyContinue |
                    Remove-Item -Force -Recurse
            }
        }

        # Clean up non-parallel checkpoint files
        $ckpFiles = Get-ChildItem -Path $checkpointsDir -Filter "$($script:P4SERVER).ckp.*.gz" -File -ErrorAction SilentlyContinue |
            Where-Object { $_.Name -notmatch '\.md5$' }
        if ($ckpFiles) {
            $filesToRemove = $ckpFiles |
                ForEach-Object {
                    if ($_.Name -match '\.ckp\.(\d+)\.gz$') {
                        [PSCustomObject]@{ File = $_; Num = [int]$Matches[1] }
                    }
                } |
                Sort-Object Num -Descending |
                Select-Object -Skip $script:KEEPCKPS

            foreach ($item in $filesToRemove) {
                $pattern = "$($script:P4SERVER).ckp.$($item.Num)*"
                Write-Log "Removing $pattern"
                Get-ChildItem -Path $checkpointsDir -Filter $pattern -File -ErrorAction SilentlyContinue |
                    Remove-Item -Force
            }
        }

        # Clean up old journal files
        if ($script:KEEPJNLS -gt 0) {
            $jnlFiles = Get-ChildItem -Path $checkpointsDir -Filter "$($script:P4SERVER).jnl.*" -File -ErrorAction SilentlyContinue
            if ($jnlFiles) {
                $jnlToRemove = $jnlFiles |
                    ForEach-Object {
                        if ($_.Name -match '\.jnl\.(\d+)') {
                            [PSCustomObject]@{ File = $_; Num = [int]$Matches[1] }
                        }
                    } |
                    Sort-Object Num -Descending |
                    Select-Object -Skip $script:KEEPJNLS

                foreach ($item in $jnlToRemove) {
                    $pattern = "$($script:P4SERVER).jnl.$($item.Num)*"
                    Write-Log "Removing $pattern"
                    Get-ChildItem -Path $checkpointsDir -Filter $pattern -File -ErrorAction SilentlyContinue |
                        Remove-Item -Force
                }
            }
        }
    }
}

#==============================================================================
# Remove old log files based on retention settings.
#==============================================================================
function Remove-OldLogs {
    if ($script:KEEPJNLS -gt 0) {
        Write-Log "Deleting old checkpoint logs. Keeping latest $($script:KEEPJNLS)."
        Remove-OldLogFiles -Prefix "checkpoint.log" -Keep $script:KEEPJNLS
    }

    if ($script:KEEPLOGS -eq 0) {
        Write-Log "Skipping cleanup of old server logs because KeepLogs is set to 0."
        return
    }

    Write-Log "Deleting old server logs. Keeping latest $($script:KEEPLOGS)."

    $logPrefixes = @(
        "log", "p4broker.log", "broker_rotate.log", "audit.log",
        "sync_replica.log", "replica_status.log", "replica_cleanup.log",
        "upgrade.log", "p4verify.log", "monitor_metrics.log"
    )

    foreach ($prefix in $logPrefixes) {
        Remove-OldLogFiles -Prefix $prefix -Keep $script:KEEPLOGS
    }
}

function Remove-OldLogFiles {
    param(
        [string]$Prefix,
        [int]$Keep
    )

    $files = Get-ChildItem -Path $script:LOGS -Filter "${Prefix}*" -File -ErrorAction SilentlyContinue |
        Sort-Object LastWriteTime -Descending |
        Select-Object -Skip $Keep

    foreach ($f in $files) {
        Write-Log "rm $($f.FullName)"
        Remove-Item -Force $f.FullName
    }
}

#==============================================================================
# Rotate last run log files.
#==============================================================================
function Invoke-RotateLastRunLogs {
    $datestamp = Get-Date -Format "yyyyMMdd-HHmmss"

    # Rotate checkpoint log
    if ($script:LOGFILE -and (Test-Path $script:LOGFILE)) {
        $rotated = "$($script:LOGFILE).$($script:JOURNALNUM).$datestamp"
        Move-Item -Force $script:LOGFILE $rotated -ErrorAction SilentlyContinue
    }

    # Rotate server logs with compression
    $logsToRotate = @("log", "p4broker.log", "audit.log", "monitor_metrics.log")
    foreach ($logName in $logsToRotate) {
        $logPath = Join-Path $script:LOGS $logName
        if (Test-Path $logPath) {
            $rotated = "${logPath}.$($script:JOURNALNUM).$datestamp"
            Move-Item -Force $logPath $rotated -ErrorAction SilentlyContinue
            if (Test-Path $rotated) {
                try {
                    $sourceStream = [System.IO.File]::OpenRead($rotated)
                    $destStream   = [System.IO.File]::Create("${rotated}.gz")
                    $gzipStream   = New-Object System.IO.Compression.GZipStream($destStream,
                        [System.IO.Compression.CompressionMode]::Compress)
                    $sourceStream.CopyTo($gzipStream)
                    $gzipStream.Close()
                    $destStream.Close()
                    $sourceStream.Close()
                    Remove-Item -Force $rotated
                } catch {
                    Write-Log "Warning: Failed to compress $rotated : $_"
                }
            }
        }
    }
}

#==============================================================================
# Set the checkpoint counter in Perforce.
#==============================================================================
function Set-CheckpointCounter {
    Invoke-P4Login

    $counterValue = "$(Get-Date -Format 'yyyy/MM/dd HH:mm:ss zzz')"
    $counterName  = "LastCheckpoint"
    if ($script:SERVERID) { $counterName = "LastCheckpoint.$($script:SERVERID)" }

    $targetPort = $script:P4PORT
    if ($script:EDGESERVER -eq 1 -or $script:STANDBYSERVER -eq 1) {
        $targetPort = $script:P4MASTERPORT
    }

    & $script:P4BIN -u $script:P4USER -p $targetPort counter $counterName $counterValue 2>&1 | Out-Null
}

#==============================================================================
# Main execution
#==============================================================================
function Main {
    $totalSw = [System.Diagnostics.Stopwatch]::StartNew()

    Initialize-Environment
    Test-Vars
    Set-ServerVars
    Get-JournalNum
    Invoke-RotateLastRunLogs

    Write-Log "Start $($script:P4SERVER) Checkpoint"

    Test-Dirs
    Test-OfflineDbUsable
    Set-CkpRunning

    try {
        Invoke-P4Login
        Get-JournalNum
        Invoke-TruncateJournal
        Invoke-ReplayJournalsToOfflineDb
        Get-OfflineJournalNum
        Invoke-DumpCheckpoint
        Invoke-RecreateOfflineDbFiles
        Invoke-GzipMvJournals
        Remove-OldCheckpointsAndJournals
        Remove-OldLogs

        $totalSw.Stop()
        Write-Log "End $($script:P4SERVER) Checkpoint (Total time: $($totalSw.Elapsed.TotalSeconds) seconds)"

        Set-CheckpointCounter
        Remove-CkpRunning
    } catch {
        Write-Log "ERROR: $_"
        Remove-CkpRunning
        throw
    }
}

# Run
Main
# Change User Description Committed
#3 32506 Russell C. Jackson (Rusty) changed in Invoke-TruncateJournal (line ~501):

Before: The function only checked for $script:SERVER_TYPE -eq "p4d_master" and did nothing for other server types — edge/replica/standby servers would silently skip journal handling entirely.                                 

After: The function now has three branches:                                                                                                                                                                                     

1. Edge/replica/standby (checked first via $script:EDGESERVER, $script:REPLICASERVER, $script:STANDBYSERVER): Skips p4 admin journal rotation entirely, logs that it's waiting, and polls for the already-rotated
journal file to appear via replication (same 30-minute timeout as the master path).
     2. Master (p4d_master): Unchanged behavior — validates no duplicate files exist, rotates the journal, waits for the file.                                                                                                       
3. Unknown server type (fallback): Logs a warning and proceeds with rotation as if it were a master, so the script doesn't silently fail on unrecognized configurations.                                                       

The Get-JournalNum function already correctly adjusts the journal number for edge/replica/standby (subtracts 1 since the counter is already incremented by the replicated rotation), so no changes were needed there.
#2 32505 Russell C. Jackson (Rusty) Added support for parallel checkpoints.
#1 32504 Russell C. Jackson (Rusty) Windows scripts