#==============================================================================
# daily_checkpoint.ps1 - Offline checkpoint script for Perforce on Windows.
#
# Performs an offline checkpoint of the Perforce server database without
# requiring the SDP or any particular directory layout. All paths are
# supplied as parameters or derived from a configuration file.
#
# Workflow:
# 1. Validate environment and directories
# 2. Rotate the active journal (master only; edge/replica/standby servers
# skip rotation and wait for the already-rotated journal via replication)
# 3. Replay numbered journals to the offline database
# 4. Dump a new checkpoint from the offline database
# 5. Recreate the offline database from the new checkpoint
# 6. Compress and archive journals
# 7. Clean up old checkpoints, journals, and logs
#
# Usage:
# .\daily_checkpoint.ps1 -P4ROOT D:\p4data\root -OfflineDB D:\p4data\offline_db `
# -Checkpoints E:\backups\checkpoints -Journals E:\backups\journals `
# -Logs D:\p4data\logs -P4D C:\p4\p4d.exe -P4 C:\p4\p4.exe `
# -P4PORT ssl:1666 -P4USER perforce
#
# .\daily_checkpoint.ps1 -ConfigFile D:\p4data\checkpoint_config.ps1
#
# Multi-part parallel checkpoints (p4d 2023.2+, creates numbered part files):
# .\daily_checkpoint.ps1 -ConfigFile D:\p4data\checkpoint_config.ps1 -MultiPartParallel
#
# Configuration file:
# Source a .ps1 file that sets the $script: variables listed below.
# See the parameter descriptions for what each one controls.
#
#==============================================================================
[CmdletBinding()]
param(
# Path to a .ps1 configuration file that sets $script: variables.
# When provided, all other parameters become optional overrides.
[Parameter()]
[string]$ConfigFile,
# P4ROOT - the live server database directory (contains db.* files).
[Parameter()]
[string]$P4ROOT,
# Offline database directory (a second copy of db.* used for checkpoints).
[Parameter()]
[string]$OfflineDB,
# Directory where checkpoint files are stored.
[Parameter()]
[string]$Checkpoints,
# Directory where rotated journal files are stored.
[Parameter()]
[string]$Journals,
# Directory for log files.
[Parameter()]
[string]$Logs,
# Full path to p4d.exe.
[Parameter()]
[string]$P4D,
# Full path to p4.exe.
[Parameter()]
[string]$P4,
# Perforce server port (e.g. ssl:1666, 1666).
[Parameter()]
[string]$P4PORT,
# Port of the master/commit server (defaults to P4PORT).
[Parameter()]
[string]$P4MASTERPORT,
# Perforce admin user for journal rotation and counter updates.
[Parameter()]
[string]$P4USER,
# Prefix for checkpoint/journal filenames (e.g. "p4_master"). Defaults to "p4".
[Parameter()]
[string]$ServerName,
# Number of checkpoints to retain. 0 = keep all.
[Parameter()]
[int]$KeepCheckpoints = 7,
# Number of journals to retain. 0 = keep all.
[Parameter()]
[int]$KeepJournals = 112,
# Number of log files to retain. 0 = keep all.
[Parameter()]
[int]$KeepLogs = 350,
# Number of threads for parallel checkpoints (p4d 2022.1+). 0 = disable parallel.
[Parameter()]
[int]$ParallelThreads = 32,
# SMTP server for email notifications. Leave empty to disable.
[Parameter()]
[string]$SmtpServer,
# Email recipient for error notifications.
[Parameter()]
[string]$MailTo,
# Email sender address.
[Parameter()]
[string]$MailFrom,
# Path to a password file (first line = password) for p4 login.
[Parameter()]
[string]$PasswordFile,
# Enable multi-part parallel checkpoints (p4d 2023.2+).
# Creates numbered part files (e.g. p4.ckp.123.0.gz, p4.ckp.123.1.gz, ...)
# instead of a single directory. Requires ParallelThreads > 0.
[Parameter()]
[switch]$MultiPartParallel
)
Set-StrictMode -Version Latest
$ErrorActionPreference = "Stop"
#==============================================================================
# Script-scope state
#==============================================================================
$script:P4ROOT = $null
$script:OFFLINE_DB = $null
$script:CHECKPOINTS = $null
$script:JOURNALS = $null
$script:LOGS = $null
$script:P4BIN = $null
$script:P4DBIN = $null
$script:P4PORT = $null
$script:P4MASTERPORT = $null
$script:P4USER = $null
$script:P4SERVER = $null
$script:LOGFILE = $null
$script:SERVERID = $null
$script:SERVER_TYPE = "p4d_master"
$script:JOURNALNUM = 0
$script:CHECKPOINTNUM = 0
$script:OFFLINEJNLNUM = 0
$script:EDGESERVER = 0
$script:REPLICASERVER = 0
$script:STANDBYSERVER = 0
$script:DoParallelCheckpoints = 0
$script:DoMultiPartParallel = 0
$script:NumCheckPointThreads = 32
$script:P4D_VERSION = $null
$script:KEEPCKPS = 7
$script:KEEPJNLS = 112
$script:KEEPLOGS = 350
$script:MAILTO = $null
$script:MAILFROM = $null
$script:SMTPSERVER = $null
$script:PASSWORDFILE = $null
#==============================================================================
# Logging
#==============================================================================
function Write-Log {
param([string]$Message)
$timestamp = Get-Date -Format "ddd MM/dd/yyyy HH:mm:ss"
$entry = "$timestamp $($MyInvocation.ScriptName): $Message"
if ($script:LOGFILE) {
Add-Content -Path $script:LOGFILE -Value $entry
} else {
Write-Host $entry
}
}
#==============================================================================
# Die - log error, optionally send email, remove semaphore, and exit.
#==============================================================================
function Invoke-Die {
param([string]$Message)
Write-Log "ERROR!!! - $env:COMPUTERNAME $script:P4SERVER $($MyInvocation.ScriptName): $Message"
if ($script:LOGFILE -and $script:MAILTO -and $script:SMTPSERVER) {
try {
$from = if ($script:MAILFROM) { $script:MAILFROM } else { "perforce@$env:COMPUTERNAME" }
Send-MailMessage -To $script:MAILTO -From $from `
-Subject "ERROR!!! - $env:COMPUTERNAME $script:P4SERVER checkpoint" `
-Body (Get-Content $script:LOGFILE -Raw) `
-SmtpServer $script:SMTPSERVER -ErrorAction SilentlyContinue
} catch {
Write-Log "Warning: Failed to send email notification: $_"
}
}
# Remove semaphore
if ($script:LOGS) {
$semaphore = Join-Path $script:LOGS "ckp_running.txt"
if (Test-Path $semaphore) { Remove-Item -Force $semaphore }
}
throw $Message
}
#==============================================================================
# Initialize environment from parameters and optional config file.
#==============================================================================
function Initialize-Environment {
# Load config file first (parameters override it below)
if ($ConfigFile) {
if (-not (Test-Path $ConfigFile)) {
Write-Host "Error: Configuration file '$ConfigFile' not found."
exit 1
}
. $ConfigFile
}
# Apply parameter overrides — parameters take precedence over config file
if ($P4ROOT) { $script:P4ROOT = $P4ROOT }
if ($OfflineDB) { $script:OFFLINE_DB = $OfflineDB }
if ($Checkpoints) { $script:CHECKPOINTS = $Checkpoints }
if ($Journals) { $script:JOURNALS = $Journals }
if ($Logs) { $script:LOGS = $Logs }
if ($P4D) { $script:P4DBIN = $P4D }
if ($P4) { $script:P4BIN = $P4 }
if ($P4PORT) { $script:P4PORT = $P4PORT }
if ($P4MASTERPORT) { $script:P4MASTERPORT = $P4MASTERPORT }
if ($P4USER) { $script:P4USER = $P4USER }
if ($ServerName) { $script:P4SERVER = $ServerName }
if ($SmtpServer) { $script:SMTPSERVER = $SmtpServer }
if ($MailTo) { $script:MAILTO = $MailTo }
if ($MailFrom) { $script:MAILFROM = $MailFrom }
if ($PasswordFile) { $script:PASSWORDFILE = $PasswordFile }
$script:KEEPCKPS = $KeepCheckpoints
$script:KEEPJNLS = $KeepJournals
$script:KEEPLOGS = $KeepLogs
$script:NumCheckPointThreads = $ParallelThreads
if ($MultiPartParallel) { $script:DoMultiPartParallel = 1 }
# Default P4MASTERPORT to P4PORT
if (-not $script:P4MASTERPORT) { $script:P4MASTERPORT = $script:P4PORT }
# Default server name
if (-not $script:P4SERVER) { $script:P4SERVER = "p4" }
# Read server.id if present
if ($script:P4ROOT -and (Test-Path (Join-Path $script:P4ROOT "server.id"))) {
$script:SERVERID = (Get-Content (Join-Path $script:P4ROOT "server.id") -First 1).Trim()
}
# Read server type if present
if ($script:P4ROOT) {
$serverTypeFile = Join-Path $script:P4ROOT "server_type.txt"
if (Test-Path $serverTypeFile) {
$script:SERVER_TYPE = (Get-Content $serverTypeFile -First 1).Trim()
}
}
# Detect p4d version for parallel checkpoint support
if ($script:P4DBIN -and (Test-Path $script:P4DBIN)) {
try {
$versionOutput = & $script:P4DBIN -V 2>&1
$versionLine = $versionOutput | Where-Object { $_ -match "Rev\." } | Select-Object -First 1
if ($versionLine -match "(\d{4}\.\d)") {
$script:P4D_VERSION = $Matches[1]
}
} catch {
$script:P4D_VERSION = "0"
}
}
# Set log file
if ($script:LOGS) {
$script:LOGFILE = Join-Path $script:LOGS "checkpoint.log"
}
}
#==============================================================================
# Determine server topology (edge, replica, standby).
#==============================================================================
function Set-ServerVars {
# Parallel checkpoint support for p4d 2022.1+
if ($script:P4D_VERSION -and $script:P4D_VERSION -gt "2022.1" -and $script:NumCheckPointThreads -gt 0) {
$script:DoParallelCheckpoints = 1
}
# Multi-part parallel checkpoint support for p4d 2023.2+
if ($script:DoMultiPartParallel -eq 1) {
if ($script:NumCheckPointThreads -le 0) {
Invoke-Die "MultiPartParallel requires ParallelThreads > 0."
}
if (-not ($script:P4D_VERSION -and $script:P4D_VERSION -ge "2023.2")) {
Invoke-Die "MultiPartParallel requires p4d 2023.2 or later (detected: $($script:P4D_VERSION))."
}
# Multi-part supersedes directory-based parallel
$script:DoParallelCheckpoints = 0
}
# Detect edge/replica/standby from db.server
if ($script:SERVERID) {
$servicesData = $null
try {
$output = & $script:P4DBIN -r $script:P4ROOT -J off -L NUL -k db.server -jd - 2>&1
$serverLine = $output | Where-Object { $_ -match "@db.server@ @$($script:SERVERID)@" }
if ($serverLine) {
$fields = $serverLine -split "@"
if ($fields.Count -ge 13) {
$servicesData = [int]$fields[12]
}
}
} catch { }
if ($servicesData) {
$script:EDGESERVER = if ($servicesData -band 4096) { 1 } else { 0 }
$script:REPLICASERVER = if ($servicesData -eq 2533) { 1 } else { 0 }
$script:STANDBYSERVER = if ($servicesData -eq 35141 -or $servicesData -eq 35301) { 1 } else { 0 }
}
}
}
#==============================================================================
# Validate required variables and disk space.
#==============================================================================
function Test-Vars {
$requiredVars = @(
@{ Name = "P4ROOT"; Value = $script:P4ROOT },
@{ Name = "OfflineDB"; Value = $script:OFFLINE_DB },
@{ Name = "Checkpoints"; Value = $script:CHECKPOINTS },
@{ Name = "Journals"; Value = $script:JOURNALS },
@{ Name = "Logs"; Value = $script:LOGS },
@{ Name = "P4D"; Value = $script:P4DBIN },
@{ Name = "P4"; Value = $script:P4BIN },
@{ Name = "P4PORT"; Value = $script:P4PORT },
@{ Name = "P4USER"; Value = $script:P4USER }
)
$ok = $true
foreach ($var in $requiredVars) {
if (-not $var.Value) {
Write-Host "Error: Required parameter -$($var.Name) is NOT set."
$ok = $false
}
}
foreach ($bin in @($script:P4BIN, $script:P4DBIN)) {
if ($bin -and -not (Test-Path $bin)) {
Write-Host "Error: Binary not found: $bin"
$ok = $false
}
}
if (-not $ok) {
Write-Host "Aborting due to errors in preflight checks."
Write-Host "Supply required values via parameters or a -ConfigFile."
exit 1
}
# Check disk space (~2GB minimum on offline_db volume)
if (Test-Path $script:OFFLINE_DB) {
$drive = (Resolve-Path $script:OFFLINE_DB).Drive
if ($drive) {
$freeGB = [math]::Round((Get-PSDrive $drive.Name).Free / 1GB, 2)
if ($freeGB -lt 2) {
Invoke-Die "Available space on offline_db volume is ${freeGB}GB, less than the required 2GB."
}
}
}
}
#==============================================================================
# Verify required directories exist and are accessible.
#==============================================================================
function Test-Dirs {
$dirsOK = $true
$badDirs = @()
foreach ($dir in @($script:OFFLINE_DB, $script:CHECKPOINTS, $script:JOURNALS, $script:LOGS)) {
if (-not (Test-Path $dir)) {
Write-Log "Error: Dir $dir does not exist."
$badDirs += "$dir (missing)"
$dirsOK = $false
} else {
try {
$testFile = Join-Path $dir ".p4_write_test"
[IO.File]::WriteAllText($testFile, "test")
Remove-Item $testFile -Force
} catch {
Write-Log "Error: Dir $dir is not writable."
$badDirs += "$dir (not writable)"
$dirsOK = $false
}
}
}
if (-not $dirsOK) {
Invoke-Die "Directory check failed: $($badDirs -join ', '). Aborting."
}
}
#==============================================================================
# Check the offline database is in a usable state.
#==============================================================================
function Test-OfflineDbUsable {
$usableFile = Join-Path $script:OFFLINE_DB "offline_db_usable.txt"
if (-not (Test-Path $usableFile)) {
Invoke-Die "Offline database not in a usable state. Expected file $usableFile not found. Rebuild the offline database from a checkpoint first."
}
$countersFile = Join-Path $script:OFFLINE_DB "db.counters"
if (-not (Test-Path $countersFile)) {
Invoke-Die "Offline database file $countersFile not found. Create it by running a live checkpoint first."
}
}
#==============================================================================
# Semaphore management - prevent concurrent checkpoint operations.
#==============================================================================
function Set-CkpRunning {
$semaphore = Join-Path $script:LOGS "ckp_running.txt"
if (Test-Path $semaphore) {
Invoke-Die "Last checkpoint not complete. Semaphore file $semaphore exists. Check the backup process or remove this file if the prior run is confirmed to have finished."
}
"Checkpoint running." | Out-File -FilePath $semaphore -Encoding UTF8
}
function Remove-CkpRunning {
$semaphore = Join-Path $script:LOGS "ckp_running.txt"
if (Test-Path $semaphore) { Remove-Item -Force $semaphore }
}
#==============================================================================
# Get journal number from live db.counters.
#==============================================================================
function Get-JournalNum {
$countersFile = Join-Path $script:P4ROOT "db.counters"
if (Test-Path $countersFile) {
$output = & $script:P4DBIN -r $script:P4ROOT -k db.counters -jd - 2>&1
$journalLine = $output | Where-Object { $_ -match "@journal@" }
if ($journalLine) {
$fields = ($journalLine -split "@")
$nextCheckpointNum = $fields[7]
if ($nextCheckpointNum -match '^\d+$') {
$script:JOURNALNUM = [int]$nextCheckpointNum
} else {
Invoke-Die "The journal counter value [$nextCheckpointNum] is invalid. It must be numeric."
}
} else {
$script:JOURNALNUM = 0
}
} else {
$script:JOURNALNUM = 0
}
# Edge/replica/standby: journal already rotated on master
if ($script:EDGESERVER -eq 1 -or $script:REPLICASERVER -eq 1 -or $script:STANDBYSERVER -eq 1) {
$script:JOURNALNUM = $script:JOURNALNUM - 1
}
$script:CHECKPOINTNUM = $script:JOURNALNUM + 1
}
#==============================================================================
# Get journal number from offline db.counters.
#==============================================================================
function Get-OfflineJournalNum {
Test-OfflineDbUsable
$output = & $script:P4DBIN -r $script:OFFLINE_DB -jd - db.counters 2>&1
$journalLine = $output | Where-Object { $_ -match "@journal@" }
if ($journalLine) {
$fields = ($journalLine -split "@")
$offlineNum = $fields[7]
if ($offlineNum -match '^\d+$') {
$script:OFFLINEJNLNUM = [int]$offlineNum
} else {
Invoke-Die "The offline journal counter value [$offlineNum] is invalid."
}
} else {
Invoke-Die "Cannot get the offline journal number from $($script:OFFLINE_DB)\db.counters."
}
Write-Log "Offline journal number is: $($script:OFFLINEJNLNUM)"
$script:CHECKPOINTNUM = $script:OFFLINEJNLNUM
Write-Log "Offline checkpoint number is: $($script:CHECKPOINTNUM)"
}
#==============================================================================
# Rotate the active journal via 'p4 admin journal'.
#==============================================================================
function Invoke-TruncateJournal {
$checkpointFile = Join-Path $script:CHECKPOINTS "$($script:P4SERVER).ckp.$($script:CHECKPOINTNUM).gz"
$journalFile = Join-Path $script:JOURNALS "$($script:P4SERVER).jnl.$($script:JOURNALNUM)"
if ($script:EDGESERVER -eq 1 -or $script:REPLICASERVER -eq 1 -or $script:STANDBYSERVER -eq 1) {
# Edge/replica/standby servers do not rotate the journal themselves.
# The master rotates it and the numbered journal arrives via replication.
Write-Log "This is an edge/replica/standby server (SERVER_TYPE=$($script:SERVER_TYPE)). Skipping journal rotation."
Write-Log "Waiting for rotated journal file $journalFile to appear via replication..."
$maxWait = 360
$waitCount = 0
while (-not (Test-Path $journalFile)) {
Start-Sleep -Seconds 5
$waitCount++
if ($waitCount -ge $maxWait) {
Invoke-Die "Timed out waiting for replicated journal file $journalFile after $($maxWait * 5) seconds. Ensure the master has rotated its journal."
}
}
Write-Log "Rotated journal file $journalFile found."
} elseif ($script:SERVER_TYPE -eq "p4d_master") {
if (Test-Path $checkpointFile) {
Invoke-Die "Checkpoint $checkpointFile already exists, check the backup process."
}
if (Test-Path $journalFile) {
Invoke-Die "Journal $journalFile already exists, check the backup process."
}
Write-Log "Truncating journal..."
Invoke-P4Login
$sw = [System.Diagnostics.Stopwatch]::StartNew()
$result = & $script:P4BIN -p $script:P4MASTERPORT admin journal 2>&1
$sw.Stop()
Write-Log "Journal rotation completed in $($sw.Elapsed.TotalSeconds) seconds."
if ($LASTEXITCODE -ne 0) {
Write-Log ($result | Out-String)
Invoke-Die "Journal rotation on $($script:P4MASTERPORT) failed."
}
# Wait for the rotated journal file to appear
$maxWait = 360
$waitCount = 0
while (-not (Test-Path $journalFile)) {
Start-Sleep -Seconds 5
$waitCount++
if ($waitCount -ge $maxWait) {
Invoke-Die "Timed out waiting for journal file $journalFile after $($maxWait * 5) seconds."
}
}
Invoke-P4Login
} else {
Write-Log "Warning: Unrecognized server type '$($script:SERVER_TYPE)'. Proceeding with journal rotation as master."
if (Test-Path $checkpointFile) {
Invoke-Die "Checkpoint $checkpointFile already exists, check the backup process."
}
if (Test-Path $journalFile) {
Invoke-Die "Journal $journalFile already exists, check the backup process."
}
Write-Log "Truncating journal..."
Invoke-P4Login
$sw = [System.Diagnostics.Stopwatch]::StartNew()
$result = & $script:P4BIN -p $script:P4MASTERPORT admin journal 2>&1
$sw.Stop()
Write-Log "Journal rotation completed in $($sw.Elapsed.TotalSeconds) seconds."
if ($LASTEXITCODE -ne 0) {
Write-Log ($result | Out-String)
Invoke-Die "Journal rotation on $($script:P4MASTERPORT) failed."
}
$maxWait = 360
$waitCount = 0
while (-not (Test-Path $journalFile)) {
Start-Sleep -Seconds 5
$waitCount++
if ($waitCount -ge $maxWait) {
Invoke-Die "Timed out waiting for journal file $journalFile after $($maxWait * 5) seconds."
}
}
Invoke-P4Login
}
}
#==============================================================================
# Login to Perforce.
#==============================================================================
function Invoke-P4Login {
if ($script:PASSWORDFILE -and (Test-Path $script:PASSWORDFILE)) {
$password = (Get-Content $script:PASSWORDFILE -First 1).Trim()
$password | & $script:P4BIN -u $script:P4USER -p $script:P4PORT login 2>&1 | Out-Null
}
}
#==============================================================================
# Replay numbered journal files to the offline database.
#==============================================================================
function Invoke-ReplayJournalsToOfflineDb {
Write-Log "Replay any unreplayed journals to the offline database."
Test-OfflineDbUsable
Get-OfflineJournalNum
# Replay from checkpoints directory
Invoke-ReplayJournalsFromDir -Dir $script:CHECKPOINTS
Get-OfflineJournalNum
# Replay from journals directory
Invoke-ReplayJournalsFromDir -Dir $script:JOURNALS
}
function Invoke-ReplayJournalsFromDir {
param([string]$Dir)
$jnlFiles = Get-ChildItem -Path $Dir -Filter "*.jnl.*" -File -ErrorAction SilentlyContinue |
Where-Object { $_.Name -match '\.jnl\.\d+(\.gz)?$' }
if (-not $jnlFiles -or $jnlFiles.Count -eq 0) { return }
# Extract journal numbers
$jnlNums = @()
foreach ($f in $jnlFiles) {
if ($f.Name -match '\.jnl\.(\d+)(\.gz)?$') {
$jnlNums += [int]$Matches[1]
}
}
$jnlNums = $jnlNums | Sort-Object -Unique
if ($jnlNums.Count -eq 0) { return }
$firstNum = ($jnlNums | Measure-Object -Minimum).Minimum
$lastNum = ($jnlNums | Measure-Object -Maximum).Maximum
Write-Log "FIRSTJOURNALNUM=$firstNum"
Write-Log "LASTJOURNALNUM=$lastNum"
Write-Log "OFFLINEJNLNUM=$($script:OFFLINEJNLNUM)"
if ($firstNum -le $script:OFFLINEJNLNUM) {
for ($j = $script:OFFLINEJNLNUM; $j -le $lastNum; $j++) {
$numberedJournal = Join-Path $Dir "$($script:P4SERVER).jnl.$j"
$numberedJournalGz = "${numberedJournal}.gz"
$usableFile = Join-Path $script:OFFLINE_DB "offline_db_usable.txt"
$replayTarget = $null
if (Test-Path $numberedJournalGz) { $replayTarget = $numberedJournalGz }
elseif (Test-Path $numberedJournal) { $replayTarget = $numberedJournal }
else { continue }
Write-Log "Replay journal $replayTarget to offline db."
Remove-Item -Force $usableFile -ErrorAction SilentlyContinue
$sw = [System.Diagnostics.Stopwatch]::StartNew()
& $script:P4DBIN -r $script:OFFLINE_DB -jr -f $replayTarget 2>&1 |
Out-File -Append -FilePath $script:LOGFILE
$sw.Stop()
if ($LASTEXITCODE -ne 0) {
Invoke-Die "Offline journal replay of $replayTarget to $($script:OFFLINE_DB) failed."
}
Write-Log "Journal replay completed in $($sw.Elapsed.TotalSeconds) seconds."
"Offline journal files restored successfully." | Out-File -FilePath $usableFile -Encoding UTF8
}
}
}
#==============================================================================
# Dump a new checkpoint from the offline database.
#==============================================================================
function Invoke-DumpCheckpoint {
$rootDir = $script:OFFLINE_DB
Write-Log "Dump out new checkpoint from db files in $rootDir."
$newCheckpoint = Join-Path $script:CHECKPOINTS "$($script:P4SERVER).ckp.$($script:CHECKPOINTNUM).gz"
$newCheckpointMD5 = "${newCheckpoint}.md5"
# Skip if checkpoint already exists with MD5
if ((Test-Path $newCheckpoint) -and (Test-Path $newCheckpointMD5)) {
Write-Log "Warning: Skipping generation of existing checkpoint $newCheckpoint. MD5 file already exists."
return
}
if ($script:DoMultiPartParallel -eq 1) {
$multiPartPrefix = Join-Path $script:CHECKPOINTS "$($script:P4SERVER).ckp.$($script:CHECKPOINTNUM)"
$checkpointCmd = "$($script:P4DBIN) -r `"$rootDir`" --multipart -z -jd -N $($script:NumCheckPointThreads) `"$multiPartPrefix`""
Write-Log "Running: $checkpointCmd"
$sw = [System.Diagnostics.Stopwatch]::StartNew()
& $script:P4DBIN -r $rootDir --multipart -z -jd -N $script:NumCheckPointThreads $multiPartPrefix 2>&1 |
Out-File -Append -FilePath $script:LOGFILE
$sw.Stop()
} elseif ($script:DoParallelCheckpoints -eq 1) {
$parallelDir = Join-Path $script:CHECKPOINTS "$($script:P4SERVER).ckp.$($script:CHECKPOINTNUM)"
$checkpointCmd = "$($script:P4DBIN) -r `"$rootDir`" -z -jdpm -N $($script:NumCheckPointThreads) `"$parallelDir`""
Write-Log "Running: $checkpointCmd"
$sw = [System.Diagnostics.Stopwatch]::StartNew()
& $script:P4DBIN -r $rootDir -z -jdpm -N $script:NumCheckPointThreads $parallelDir 2>&1 |
Out-File -Append -FilePath $script:LOGFILE
$sw.Stop()
} else {
$checkpointCmd = "$($script:P4DBIN) -r `"$rootDir`" -jd -z `"$newCheckpoint`""
Write-Log "Running: $checkpointCmd"
$sw = [System.Diagnostics.Stopwatch]::StartNew()
& $script:P4DBIN -r $rootDir -jd -z $newCheckpoint 2>&1 |
Out-File -Append -FilePath $script:LOGFILE
$sw.Stop()
}
if ($LASTEXITCODE -ne 0) {
Invoke-Die "New checkpoint dump from $rootDir FAILED. Command was: $checkpointCmd"
}
Write-Log "New checkpoint dump succeeded in $($sw.Elapsed.TotalSeconds) seconds."
}
#==============================================================================
# Recreate the offline database from the latest checkpoint.
#==============================================================================
function Invoke-RecreateOfflineDbFiles {
$checkpointsDir = $script:CHECKPOINTS
# Find the latest checkpoint
if ($script:DoMultiPartParallel -eq 1) {
# Multi-part parallel creates files like p4.ckp.123.0.gz, p4.ckp.123.1.gz, ...
# and a corresponding .md5 file for the set.
$md5Files = Get-ChildItem -Path $checkpointsDir -Filter "$($script:P4SERVER).ckp.*.md5" -File -ErrorAction SilentlyContinue |
Sort-Object LastWriteTime -Descending
if (-not $md5Files -or $md5Files.Count -eq 0) {
Remove-CkpRunning
Invoke-Die "No multi-part parallel checkpoints found in $checkpointsDir. Run a live checkpoint first."
}
$latestMD5 = $md5Files[0].FullName
} elseif ($script:DoParallelCheckpoints -eq 1) {
$md5Files = Get-ChildItem -Path $checkpointsDir -Filter "*.md5" -File -ErrorAction SilentlyContinue |
Sort-Object LastWriteTime -Descending
if (-not $md5Files -or $md5Files.Count -eq 0) {
Remove-CkpRunning
Invoke-Die "No parallel checkpoint dirs found in $checkpointsDir. Run a live checkpoint first."
}
$latestMD5 = $md5Files[0].FullName
} else {
$md5Files = Get-ChildItem -Path $checkpointsDir -Filter "$($script:P4SERVER).ckp.*.md5" -File -ErrorAction SilentlyContinue |
Sort-Object LastWriteTime -Descending
if (-not $md5Files -or $md5Files.Count -eq 0) {
Remove-CkpRunning
Invoke-Die "No checkpoints found in $checkpointsDir with prefix $($script:P4SERVER). Run a live checkpoint first."
}
$latestMD5 = $md5Files[0].FullName
}
# Remove old offline db files
$usableFile = Join-Path $script:OFFLINE_DB "offline_db_usable.txt"
Remove-Item -Force $usableFile -ErrorAction SilentlyContinue
Get-ChildItem -Path $script:OFFLINE_DB -Filter "db.*" -File -ErrorAction SilentlyContinue |
Remove-Item -Force
$saveDir = Join-Path $script:OFFLINE_DB "save"
if (Test-Path $saveDir) {
Get-ChildItem -Path $saveDir -Filter "db.*" -File -ErrorAction SilentlyContinue |
Remove-Item -Force
}
# Restore from checkpoint
if ($script:DoMultiPartParallel -eq 1) {
$multiPartPrefix = $latestMD5 -replace '\.md5$', ''
Write-Log "Recovering from last multi-part parallel checkpoint, $multiPartPrefix."
$restoreCmd = "$($script:P4DBIN) -r `"$($script:OFFLINE_DB)`" --multipart -z -jr -N $($script:NumCheckPointThreads) `"$multiPartPrefix`""
Write-Log "Running: $restoreCmd"
$sw = [System.Diagnostics.Stopwatch]::StartNew()
& $script:P4DBIN -r $script:OFFLINE_DB --multipart -z -jr -N $script:NumCheckPointThreads $multiPartPrefix 2>&1 |
Out-File -Append -FilePath $script:LOGFILE
$sw.Stop()
} elseif ($script:DoParallelCheckpoints -eq 1) {
$parallelDir = $latestMD5 -replace '\.md5$', ''
Write-Log "Recovering from last parallel checkpoint dir, $parallelDir."
$restoreCmd = "$($script:P4DBIN) -r `"$($script:OFFLINE_DB)`" -z -jrp -N $($script:NumCheckPointThreads) `"$parallelDir`""
Write-Log "Running: $restoreCmd"
$sw = [System.Diagnostics.Stopwatch]::StartNew()
& $script:P4DBIN -r $script:OFFLINE_DB -z -jrp -N $script:NumCheckPointThreads $parallelDir 2>&1 |
Out-File -Append -FilePath $script:LOGFILE
$sw.Stop()
} else {
if ($latestMD5 -match '\.gz\.md5$') {
$lastCheckpoint = $latestMD5 -replace '\.md5$', ''
} else {
$lastCheckpoint = ($latestMD5 -replace '\.md5$', '') + ".gz"
}
if (-not (Test-Path $lastCheckpoint)) {
Invoke-Die "Missing last checkpoint file: $lastCheckpoint. Abort!"
}
Write-Log "Recovering from last full checkpoint, $lastCheckpoint."
$restoreCmd = "$($script:P4DBIN) -r `"$($script:OFFLINE_DB)`" -jr -z `"$lastCheckpoint`""
Write-Log "Running: $restoreCmd"
$sw = [System.Diagnostics.Stopwatch]::StartNew()
& $script:P4DBIN -r $script:OFFLINE_DB -jr -z $lastCheckpoint 2>&1 |
Out-File -Append -FilePath $script:LOGFILE
$sw.Stop()
}
if ($LASTEXITCODE -ne 0) {
Invoke-Die "Restore of checkpoint to $($script:OFFLINE_DB) failed!"
}
Write-Log "Offline db restored in $($sw.Elapsed.TotalSeconds) seconds."
"Offline db file restored successfully." | Out-File -FilePath $usableFile -Encoding UTF8
}
#==============================================================================
# Compress and move old journals to the checkpoints directory.
#==============================================================================
function Invoke-GzipMvJournals {
Write-Log "Compress journals and move to checkpoints volume."
$jnlFiles = Get-ChildItem -Path $script:JOURNALS -Filter "*.jnl.*" -File -ErrorAction SilentlyContinue |
Where-Object { $_.Name -notmatch '\.gz$' }
if (-not $jnlFiles -or $jnlFiles.Count -le 2) { return }
$jnlNums = @()
foreach ($f in $jnlFiles) {
if ($f.Name -match '\.jnl\.(\d+)$') {
$jnlNums += [int]$Matches[1]
}
}
$jnlNums = $jnlNums | Sort-Object
if ($jnlNums.Count -le 2) { return }
# Compress all but the latest
$firstNum = $jnlNums[0]
$lastNum = $jnlNums[-2]
Write-Log "FIRSTJOURNALNUM=$firstNum"
Write-Log "LASTJOURNALNUM=$lastNum"
for ($j = $firstNum; $j -le $lastNum; $j++) {
$numberedJournal = Join-Path $script:JOURNALS "$($script:P4SERVER).jnl.$j"
if (Test-Path $numberedJournal) {
Write-Log "Compressing $numberedJournal"
$sw = [System.Diagnostics.Stopwatch]::StartNew()
$sourceFile = $numberedJournal
$destFile = Join-Path $script:CHECKPOINTS "$($script:P4SERVER).jnl.${j}.gz"
try {
$sourceStream = [System.IO.File]::OpenRead($sourceFile)
$destStream = [System.IO.File]::Create($destFile)
$gzipStream = New-Object System.IO.Compression.GZipStream($destStream,
[System.IO.Compression.CompressionMode]::Compress)
$sourceStream.CopyTo($gzipStream)
$gzipStream.Close()
$destStream.Close()
$sourceStream.Close()
Remove-Item -Force $sourceFile
} catch {
Invoke-Die "Compression of $numberedJournal failed: $_"
}
$sw.Stop()
Write-Log "Compressed in $($sw.Elapsed.TotalSeconds) seconds."
}
}
}
#==============================================================================
# Remove old checkpoints and journals based on retention settings.
#==============================================================================
function Remove-OldCheckpointsAndJournals {
if ($script:KEEPCKPS -eq 0) {
Write-Log "Skipping cleanup of old checkpoints because KeepCheckpoints is set to 0."
return
}
Write-Log "Deleting obsolete checkpoints and journals. Keeping latest $($script:KEEPCKPS)."
$checkpointsDir = $script:CHECKPOINTS
if (Test-Path $checkpointsDir) {
# Clean up multi-part parallel checkpoint files (e.g. p4.ckp.123.0.gz, p4.ckp.123.1.gz, ...)
$multiPartFiles = Get-ChildItem -Path $checkpointsDir -Filter "$($script:P4SERVER).ckp.*" -File -ErrorAction SilentlyContinue |
Where-Object { $_.Name -match '\.ckp\.(\d+)\.\d+\.gz(\.md5)?$' }
if ($multiPartFiles) {
$multiPartNums = $multiPartFiles |
ForEach-Object {
if ($_.Name -match '\.ckp\.(\d+)\.\d+\.gz') {
[int]$Matches[1]
}
} |
Sort-Object -Unique -Descending
$multiPartToRemove = $multiPartNums | Select-Object -Skip $script:KEEPCKPS
foreach ($num in $multiPartToRemove) {
$pattern = "$($script:P4SERVER).ckp.${num}.*"
Write-Log "Removing multi-part checkpoint $pattern"
Get-ChildItem -Path $checkpointsDir -Filter $pattern -File -ErrorAction SilentlyContinue |
Remove-Item -Force
}
}
# Clean up parallel checkpoint directories
$parallelDirs = Get-ChildItem -Path $checkpointsDir -Directory -Filter "$($script:P4SERVER).ckp.*" -ErrorAction SilentlyContinue
if ($parallelDirs) {
$dirsToRemove = $parallelDirs |
ForEach-Object {
if ($_.Name -match '\.ckp\.(\d+)$') {
[PSCustomObject]@{ Dir = $_; Num = [int]$Matches[1] }
}
} |
Sort-Object Num -Descending |
Select-Object -Skip $script:KEEPCKPS
foreach ($item in $dirsToRemove) {
$pattern = "$($script:P4SERVER).ckp.$($item.Num)*"
Write-Log "Removing $pattern"
Get-ChildItem -Path $checkpointsDir -Filter $pattern -ErrorAction SilentlyContinue |
Remove-Item -Force -Recurse
}
}
# Clean up non-parallel checkpoint files
$ckpFiles = Get-ChildItem -Path $checkpointsDir -Filter "$($script:P4SERVER).ckp.*.gz" -File -ErrorAction SilentlyContinue |
Where-Object { $_.Name -notmatch '\.md5$' }
if ($ckpFiles) {
$filesToRemove = $ckpFiles |
ForEach-Object {
if ($_.Name -match '\.ckp\.(\d+)\.gz$') {
[PSCustomObject]@{ File = $_; Num = [int]$Matches[1] }
}
} |
Sort-Object Num -Descending |
Select-Object -Skip $script:KEEPCKPS
foreach ($item in $filesToRemove) {
$pattern = "$($script:P4SERVER).ckp.$($item.Num)*"
Write-Log "Removing $pattern"
Get-ChildItem -Path $checkpointsDir -Filter $pattern -File -ErrorAction SilentlyContinue |
Remove-Item -Force
}
}
# Clean up old journal files
if ($script:KEEPJNLS -gt 0) {
$jnlFiles = Get-ChildItem -Path $checkpointsDir -Filter "$($script:P4SERVER).jnl.*" -File -ErrorAction SilentlyContinue
if ($jnlFiles) {
$jnlToRemove = $jnlFiles |
ForEach-Object {
if ($_.Name -match '\.jnl\.(\d+)') {
[PSCustomObject]@{ File = $_; Num = [int]$Matches[1] }
}
} |
Sort-Object Num -Descending |
Select-Object -Skip $script:KEEPJNLS
foreach ($item in $jnlToRemove) {
$pattern = "$($script:P4SERVER).jnl.$($item.Num)*"
Write-Log "Removing $pattern"
Get-ChildItem -Path $checkpointsDir -Filter $pattern -File -ErrorAction SilentlyContinue |
Remove-Item -Force
}
}
}
}
}
#==============================================================================
# Remove old log files based on retention settings.
#==============================================================================
function Remove-OldLogs {
if ($script:KEEPJNLS -gt 0) {
Write-Log "Deleting old checkpoint logs. Keeping latest $($script:KEEPJNLS)."
Remove-OldLogFiles -Prefix "checkpoint.log" -Keep $script:KEEPJNLS
}
if ($script:KEEPLOGS -eq 0) {
Write-Log "Skipping cleanup of old server logs because KeepLogs is set to 0."
return
}
Write-Log "Deleting old server logs. Keeping latest $($script:KEEPLOGS)."
$logPrefixes = @(
"log", "p4broker.log", "broker_rotate.log", "audit.log",
"sync_replica.log", "replica_status.log", "replica_cleanup.log",
"upgrade.log", "p4verify.log", "monitor_metrics.log"
)
foreach ($prefix in $logPrefixes) {
Remove-OldLogFiles -Prefix $prefix -Keep $script:KEEPLOGS
}
}
function Remove-OldLogFiles {
param(
[string]$Prefix,
[int]$Keep
)
$files = Get-ChildItem -Path $script:LOGS -Filter "${Prefix}*" -File -ErrorAction SilentlyContinue |
Sort-Object LastWriteTime -Descending |
Select-Object -Skip $Keep
foreach ($f in $files) {
Write-Log "rm $($f.FullName)"
Remove-Item -Force $f.FullName
}
}
#==============================================================================
# Rotate last run log files.
#==============================================================================
function Invoke-RotateLastRunLogs {
$datestamp = Get-Date -Format "yyyyMMdd-HHmmss"
# Rotate checkpoint log
if ($script:LOGFILE -and (Test-Path $script:LOGFILE)) {
$rotated = "$($script:LOGFILE).$($script:JOURNALNUM).$datestamp"
Move-Item -Force $script:LOGFILE $rotated -ErrorAction SilentlyContinue
}
# Rotate server logs with compression
$logsToRotate = @("log", "p4broker.log", "audit.log", "monitor_metrics.log")
foreach ($logName in $logsToRotate) {
$logPath = Join-Path $script:LOGS $logName
if (Test-Path $logPath) {
$rotated = "${logPath}.$($script:JOURNALNUM).$datestamp"
Move-Item -Force $logPath $rotated -ErrorAction SilentlyContinue
if (Test-Path $rotated) {
try {
$sourceStream = [System.IO.File]::OpenRead($rotated)
$destStream = [System.IO.File]::Create("${rotated}.gz")
$gzipStream = New-Object System.IO.Compression.GZipStream($destStream,
[System.IO.Compression.CompressionMode]::Compress)
$sourceStream.CopyTo($gzipStream)
$gzipStream.Close()
$destStream.Close()
$sourceStream.Close()
Remove-Item -Force $rotated
} catch {
Write-Log "Warning: Failed to compress $rotated : $_"
}
}
}
}
}
#==============================================================================
# Set the checkpoint counter in Perforce.
#==============================================================================
function Set-CheckpointCounter {
Invoke-P4Login
$counterValue = "$(Get-Date -Format 'yyyy/MM/dd HH:mm:ss zzz')"
$counterName = "LastCheckpoint"
if ($script:SERVERID) { $counterName = "LastCheckpoint.$($script:SERVERID)" }
$targetPort = $script:P4PORT
if ($script:EDGESERVER -eq 1 -or $script:STANDBYSERVER -eq 1) {
$targetPort = $script:P4MASTERPORT
}
& $script:P4BIN -u $script:P4USER -p $targetPort counter $counterName $counterValue 2>&1 | Out-Null
}
#==============================================================================
# Main execution
#==============================================================================
function Main {
$totalSw = [System.Diagnostics.Stopwatch]::StartNew()
Initialize-Environment
Test-Vars
Set-ServerVars
Get-JournalNum
Invoke-RotateLastRunLogs
Write-Log "Start $($script:P4SERVER) Checkpoint"
Test-Dirs
Test-OfflineDbUsable
Set-CkpRunning
try {
Invoke-P4Login
Get-JournalNum
Invoke-TruncateJournal
Invoke-ReplayJournalsToOfflineDb
Get-OfflineJournalNum
Invoke-DumpCheckpoint
Invoke-RecreateOfflineDbFiles
Invoke-GzipMvJournals
Remove-OldCheckpointsAndJournals
Remove-OldLogs
$totalSw.Stop()
Write-Log "End $($script:P4SERVER) Checkpoint (Total time: $($totalSw.Elapsed.TotalSeconds) seconds)"
Set-CheckpointCounter
Remove-CkpRunning
} catch {
Write-Log "ERROR: $_"
Remove-CkpRunning
throw
}
}
# Run
Main
| # | Change | User | Description | Committed | |
|---|---|---|---|---|---|
| #3 | 32506 | Russell C. Jackson (Rusty) |
changed in Invoke-TruncateJournal (line ~501): Before: The function only checked for $script:SERVER_TYPE -eq "p4d_master" and did nothing for other server types — edge/replica/standby servers would silently skip journal handling entirely. After: The function now has three branches: 1. Edge/replica/standby (checked first via $script:EDGESERVER, $script:REPLICASERVER, $script:STANDBYSERVER): Skips p4 admin journal rotation entirely, logs that it's waiting, and polls for the already-rotated journal file to appear via replication (same 30-minute timeout as the master path). 2. Master (p4d_master): Unchanged behavior — validates no duplicate files exist, rotates the journal, waits for the file. 3. Unknown server type (fallback): Logs a warning and proceeds with rotation as if it were a master, so the script doesn't silently fail on unrecognized configurations. The Get-JournalNum function already correctly adjusts the journal number for edge/replica/standby (subtracts 1 since the counter is already incremented by the replicated rotation), so no changes were needed there. |
||
| #2 | 32505 | Russell C. Jackson (Rusty) | Added support for parallel checkpoints. | ||
| #1 | 32504 | Russell C. Jackson (Rusty) | Windows scripts |