#============================================================================== # daily_checkpoint.ps1 - Offline checkpoint script for Perforce on Windows. # # Performs an offline checkpoint of the Perforce server database without # requiring the SDP or any particular directory layout. All paths are # supplied as parameters or derived from a configuration file. # # Workflow: # 1. Validate environment and directories # 2. Rotate the active journal (master only; edge/replica/standby servers # skip rotation and wait for the already-rotated journal via replication) # 3. Replay numbered journals to the offline database # 4. Dump a new checkpoint from the offline database # 5. Recreate the offline database from the new checkpoint # 6. Compress and archive journals # 7. Clean up old checkpoints, journals, and logs # # Usage: # .\daily_checkpoint.ps1 -P4ROOT D:\p4data\root -OfflineDB D:\p4data\offline_db ` # -Checkpoints E:\backups\checkpoints -Journals E:\backups\journals ` # -Logs D:\p4data\logs -P4D C:\p4\p4d.exe -P4 C:\p4\p4.exe ` # -P4PORT ssl:1666 -P4USER perforce # # .\daily_checkpoint.ps1 -ConfigFile D:\p4data\checkpoint_config.ps1 # # Multi-part parallel checkpoints (p4d 2023.2+, creates numbered part files): # .\daily_checkpoint.ps1 -ConfigFile D:\p4data\checkpoint_config.ps1 -MultiPartParallel # # Configuration file: # Source a .ps1 file that sets the $script: variables listed below. # See the parameter descriptions for what each one controls. # #============================================================================== [CmdletBinding()] param( # Path to a .ps1 configuration file that sets $script: variables. # When provided, all other parameters become optional overrides. [Parameter()] [string]$ConfigFile, # P4ROOT - the live server database directory (contains db.* files). [Parameter()] [string]$P4ROOT, # Offline database directory (a second copy of db.* used for checkpoints). [Parameter()] [string]$OfflineDB, # Directory where checkpoint files are stored. [Parameter()] [string]$Checkpoints, # Directory where rotated journal files are stored. [Parameter()] [string]$Journals, # Directory for log files. [Parameter()] [string]$Logs, # Full path to p4d.exe. [Parameter()] [string]$P4D, # Full path to p4.exe. [Parameter()] [string]$P4, # Perforce server port (e.g. ssl:1666, 1666). [Parameter()] [string]$P4PORT, # Port of the master/commit server (defaults to P4PORT). [Parameter()] [string]$P4MASTERPORT, # Perforce admin user for journal rotation and counter updates. [Parameter()] [string]$P4USER, # Prefix for checkpoint/journal filenames (e.g. "p4_master"). Defaults to "p4". [Parameter()] [string]$ServerName, # Number of checkpoints to retain. 0 = keep all. [Parameter()] [int]$KeepCheckpoints = 7, # Number of journals to retain. 0 = keep all. [Parameter()] [int]$KeepJournals = 112, # Number of log files to retain. 0 = keep all. [Parameter()] [int]$KeepLogs = 350, # Number of threads for parallel checkpoints (p4d 2022.1+). 0 = disable parallel. [Parameter()] [int]$ParallelThreads = 32, # SMTP server for email notifications. Leave empty to disable. [Parameter()] [string]$SmtpServer, # Email recipient for error notifications. [Parameter()] [string]$MailTo, # Email sender address. [Parameter()] [string]$MailFrom, # Path to a password file (first line = password) for p4 login. [Parameter()] [string]$PasswordFile, # Enable multi-part parallel checkpoints (p4d 2023.2+). # Creates numbered part files (e.g. p4.ckp.123.0.gz, p4.ckp.123.1.gz, ...) # instead of a single directory. Requires ParallelThreads > 0. [Parameter()] [switch]$MultiPartParallel ) Set-StrictMode -Version Latest $ErrorActionPreference = "Stop" #============================================================================== # Script-scope state #============================================================================== $script:P4ROOT = $null $script:OFFLINE_DB = $null $script:CHECKPOINTS = $null $script:JOURNALS = $null $script:LOGS = $null $script:P4BIN = $null $script:P4DBIN = $null $script:P4PORT = $null $script:P4MASTERPORT = $null $script:P4USER = $null $script:P4SERVER = $null $script:LOGFILE = $null $script:SERVERID = $null $script:SERVER_TYPE = "p4d_master" $script:JOURNALNUM = 0 $script:CHECKPOINTNUM = 0 $script:OFFLINEJNLNUM = 0 $script:EDGESERVER = 0 $script:REPLICASERVER = 0 $script:STANDBYSERVER = 0 $script:DoParallelCheckpoints = 0 $script:DoMultiPartParallel = 0 $script:NumCheckPointThreads = 32 $script:P4D_VERSION = $null $script:KEEPCKPS = 7 $script:KEEPJNLS = 112 $script:KEEPLOGS = 350 $script:MAILTO = $null $script:MAILFROM = $null $script:SMTPSERVER = $null $script:PASSWORDFILE = $null #============================================================================== # Logging #============================================================================== function Write-Log { param([string]$Message) $timestamp = Get-Date -Format "ddd MM/dd/yyyy HH:mm:ss" $entry = "$timestamp $($MyInvocation.ScriptName): $Message" if ($script:LOGFILE) { Add-Content -Path $script:LOGFILE -Value $entry } else { Write-Host $entry } } #============================================================================== # Die - log error, optionally send email, remove semaphore, and exit. #============================================================================== function Invoke-Die { param([string]$Message) Write-Log "ERROR!!! - $env:COMPUTERNAME $script:P4SERVER $($MyInvocation.ScriptName): $Message" if ($script:LOGFILE -and $script:MAILTO -and $script:SMTPSERVER) { try { $from = if ($script:MAILFROM) { $script:MAILFROM } else { "perforce@$env:COMPUTERNAME" } Send-MailMessage -To $script:MAILTO -From $from ` -Subject "ERROR!!! - $env:COMPUTERNAME $script:P4SERVER checkpoint" ` -Body (Get-Content $script:LOGFILE -Raw) ` -SmtpServer $script:SMTPSERVER -ErrorAction SilentlyContinue } catch { Write-Log "Warning: Failed to send email notification: $_" } } # Remove semaphore if ($script:LOGS) { $semaphore = Join-Path $script:LOGS "ckp_running.txt" if (Test-Path $semaphore) { Remove-Item -Force $semaphore } } throw $Message } #============================================================================== # Initialize environment from parameters and optional config file. #============================================================================== function Initialize-Environment { # Load config file first (parameters override it below) if ($ConfigFile) { if (-not (Test-Path $ConfigFile)) { Write-Host "Error: Configuration file '$ConfigFile' not found." exit 1 } . $ConfigFile } # Apply parameter overrides — parameters take precedence over config file if ($P4ROOT) { $script:P4ROOT = $P4ROOT } if ($OfflineDB) { $script:OFFLINE_DB = $OfflineDB } if ($Checkpoints) { $script:CHECKPOINTS = $Checkpoints } if ($Journals) { $script:JOURNALS = $Journals } if ($Logs) { $script:LOGS = $Logs } if ($P4D) { $script:P4DBIN = $P4D } if ($P4) { $script:P4BIN = $P4 } if ($P4PORT) { $script:P4PORT = $P4PORT } if ($P4MASTERPORT) { $script:P4MASTERPORT = $P4MASTERPORT } if ($P4USER) { $script:P4USER = $P4USER } if ($ServerName) { $script:P4SERVER = $ServerName } if ($SmtpServer) { $script:SMTPSERVER = $SmtpServer } if ($MailTo) { $script:MAILTO = $MailTo } if ($MailFrom) { $script:MAILFROM = $MailFrom } if ($PasswordFile) { $script:PASSWORDFILE = $PasswordFile } $script:KEEPCKPS = $KeepCheckpoints $script:KEEPJNLS = $KeepJournals $script:KEEPLOGS = $KeepLogs $script:NumCheckPointThreads = $ParallelThreads if ($MultiPartParallel) { $script:DoMultiPartParallel = 1 } # Default P4MASTERPORT to P4PORT if (-not $script:P4MASTERPORT) { $script:P4MASTERPORT = $script:P4PORT } # Default server name if (-not $script:P4SERVER) { $script:P4SERVER = "p4" } # Read server.id if present if ($script:P4ROOT -and (Test-Path (Join-Path $script:P4ROOT "server.id"))) { $script:SERVERID = (Get-Content (Join-Path $script:P4ROOT "server.id") -First 1).Trim() } # Read server type if present if ($script:P4ROOT) { $serverTypeFile = Join-Path $script:P4ROOT "server_type.txt" if (Test-Path $serverTypeFile) { $script:SERVER_TYPE = (Get-Content $serverTypeFile -First 1).Trim() } } # Detect p4d version for parallel checkpoint support if ($script:P4DBIN -and (Test-Path $script:P4DBIN)) { try { $versionOutput = & $script:P4DBIN -V 2>&1 $versionLine = $versionOutput | Where-Object { $_ -match "Rev\." } | Select-Object -First 1 if ($versionLine -match "(\d{4}\.\d)") { $script:P4D_VERSION = $Matches[1] } } catch { $script:P4D_VERSION = "0" } } # Set log file if ($script:LOGS) { $script:LOGFILE = Join-Path $script:LOGS "checkpoint.log" } } #============================================================================== # Determine server topology (edge, replica, standby). #============================================================================== function Set-ServerVars { # Parallel checkpoint support for p4d 2022.1+ if ($script:P4D_VERSION -and $script:P4D_VERSION -gt "2022.1" -and $script:NumCheckPointThreads -gt 0) { $script:DoParallelCheckpoints = 1 } # Multi-part parallel checkpoint support for p4d 2023.2+ if ($script:DoMultiPartParallel -eq 1) { if ($script:NumCheckPointThreads -le 0) { Invoke-Die "MultiPartParallel requires ParallelThreads > 0." } if (-not ($script:P4D_VERSION -and $script:P4D_VERSION -ge "2023.2")) { Invoke-Die "MultiPartParallel requires p4d 2023.2 or later (detected: $($script:P4D_VERSION))." } # Multi-part supersedes directory-based parallel $script:DoParallelCheckpoints = 0 } # Detect edge/replica/standby from db.server if ($script:SERVERID) { $servicesData = $null try { $output = & $script:P4DBIN -r $script:P4ROOT -J off -L NUL -k db.server -jd - 2>&1 $serverLine = $output | Where-Object { $_ -match "@db.server@ @$($script:SERVERID)@" } if ($serverLine) { $fields = $serverLine -split "@" if ($fields.Count -ge 13) { $servicesData = [int]$fields[12] } } } catch { } if ($servicesData) { $script:EDGESERVER = if ($servicesData -band 4096) { 1 } else { 0 } $script:REPLICASERVER = if ($servicesData -eq 2533) { 1 } else { 0 } $script:STANDBYSERVER = if ($servicesData -eq 35141 -or $servicesData -eq 35301) { 1 } else { 0 } } } } #============================================================================== # Validate required variables and disk space. #============================================================================== function Test-Vars { $requiredVars = @( @{ Name = "P4ROOT"; Value = $script:P4ROOT }, @{ Name = "OfflineDB"; Value = $script:OFFLINE_DB }, @{ Name = "Checkpoints"; Value = $script:CHECKPOINTS }, @{ Name = "Journals"; Value = $script:JOURNALS }, @{ Name = "Logs"; Value = $script:LOGS }, @{ Name = "P4D"; Value = $script:P4DBIN }, @{ Name = "P4"; Value = $script:P4BIN }, @{ Name = "P4PORT"; Value = $script:P4PORT }, @{ Name = "P4USER"; Value = $script:P4USER } ) $ok = $true foreach ($var in $requiredVars) { if (-not $var.Value) { Write-Host "Error: Required parameter -$($var.Name) is NOT set." $ok = $false } } foreach ($bin in @($script:P4BIN, $script:P4DBIN)) { if ($bin -and -not (Test-Path $bin)) { Write-Host "Error: Binary not found: $bin" $ok = $false } } if (-not $ok) { Write-Host "Aborting due to errors in preflight checks." Write-Host "Supply required values via parameters or a -ConfigFile." exit 1 } # Check disk space (~2GB minimum on offline_db volume) if (Test-Path $script:OFFLINE_DB) { $drive = (Resolve-Path $script:OFFLINE_DB).Drive if ($drive) { $freeGB = [math]::Round((Get-PSDrive $drive.Name).Free / 1GB, 2) if ($freeGB -lt 2) { Invoke-Die "Available space on offline_db volume is ${freeGB}GB, less than the required 2GB." } } } } #============================================================================== # Verify required directories exist and are accessible. #============================================================================== function Test-Dirs { $dirsOK = $true $badDirs = @() foreach ($dir in @($script:OFFLINE_DB, $script:CHECKPOINTS, $script:JOURNALS, $script:LOGS)) { if (-not (Test-Path $dir)) { Write-Log "Error: Dir $dir does not exist." $badDirs += "$dir (missing)" $dirsOK = $false } else { try { $testFile = Join-Path $dir ".p4_write_test" [IO.File]::WriteAllText($testFile, "test") Remove-Item $testFile -Force } catch { Write-Log "Error: Dir $dir is not writable." $badDirs += "$dir (not writable)" $dirsOK = $false } } } if (-not $dirsOK) { Invoke-Die "Directory check failed: $($badDirs -join ', '). Aborting." } } #============================================================================== # Check the offline database is in a usable state. #============================================================================== function Test-OfflineDbUsable { $usableFile = Join-Path $script:OFFLINE_DB "offline_db_usable.txt" if (-not (Test-Path $usableFile)) { Invoke-Die "Offline database not in a usable state. Expected file $usableFile not found. Rebuild the offline database from a checkpoint first." } $countersFile = Join-Path $script:OFFLINE_DB "db.counters" if (-not (Test-Path $countersFile)) { Invoke-Die "Offline database file $countersFile not found. Create it by running a live checkpoint first." } } #============================================================================== # Semaphore management - prevent concurrent checkpoint operations. #============================================================================== function Set-CkpRunning { $semaphore = Join-Path $script:LOGS "ckp_running.txt" if (Test-Path $semaphore) { Invoke-Die "Last checkpoint not complete. Semaphore file $semaphore exists. Check the backup process or remove this file if the prior run is confirmed to have finished." } "Checkpoint running." | Out-File -FilePath $semaphore -Encoding UTF8 } function Remove-CkpRunning { $semaphore = Join-Path $script:LOGS "ckp_running.txt" if (Test-Path $semaphore) { Remove-Item -Force $semaphore } } #============================================================================== # Get journal number from live db.counters. #============================================================================== function Get-JournalNum { $countersFile = Join-Path $script:P4ROOT "db.counters" if (Test-Path $countersFile) { $output = & $script:P4DBIN -r $script:P4ROOT -k db.counters -jd - 2>&1 $journalLine = $output | Where-Object { $_ -match "@journal@" } if ($journalLine) { $fields = ($journalLine -split "@") $nextCheckpointNum = $fields[7] if ($nextCheckpointNum -match '^\d+$') { $script:JOURNALNUM = [int]$nextCheckpointNum } else { Invoke-Die "The journal counter value [$nextCheckpointNum] is invalid. It must be numeric." } } else { $script:JOURNALNUM = 0 } } else { $script:JOURNALNUM = 0 } # Edge/replica/standby: journal already rotated on master if ($script:EDGESERVER -eq 1 -or $script:REPLICASERVER -eq 1 -or $script:STANDBYSERVER -eq 1) { $script:JOURNALNUM = $script:JOURNALNUM - 1 } $script:CHECKPOINTNUM = $script:JOURNALNUM + 1 } #============================================================================== # Get journal number from offline db.counters. #============================================================================== function Get-OfflineJournalNum { Test-OfflineDbUsable $output = & $script:P4DBIN -r $script:OFFLINE_DB -jd - db.counters 2>&1 $journalLine = $output | Where-Object { $_ -match "@journal@" } if ($journalLine) { $fields = ($journalLine -split "@") $offlineNum = $fields[7] if ($offlineNum -match '^\d+$') { $script:OFFLINEJNLNUM = [int]$offlineNum } else { Invoke-Die "The offline journal counter value [$offlineNum] is invalid." } } else { Invoke-Die "Cannot get the offline journal number from $($script:OFFLINE_DB)\db.counters." } Write-Log "Offline journal number is: $($script:OFFLINEJNLNUM)" $script:CHECKPOINTNUM = $script:OFFLINEJNLNUM Write-Log "Offline checkpoint number is: $($script:CHECKPOINTNUM)" } #============================================================================== # Rotate the active journal via 'p4 admin journal'. #============================================================================== function Invoke-TruncateJournal { $checkpointFile = Join-Path $script:CHECKPOINTS "$($script:P4SERVER).ckp.$($script:CHECKPOINTNUM).gz" $journalFile = Join-Path $script:JOURNALS "$($script:P4SERVER).jnl.$($script:JOURNALNUM)" if ($script:EDGESERVER -eq 1 -or $script:REPLICASERVER -eq 1 -or $script:STANDBYSERVER -eq 1) { # Edge/replica/standby servers do not rotate the journal themselves. # The master rotates it and the numbered journal arrives via replication. Write-Log "This is an edge/replica/standby server (SERVER_TYPE=$($script:SERVER_TYPE)). Skipping journal rotation." Write-Log "Waiting for rotated journal file $journalFile to appear via replication..." $maxWait = 360 $waitCount = 0 while (-not (Test-Path $journalFile)) { Start-Sleep -Seconds 5 $waitCount++ if ($waitCount -ge $maxWait) { Invoke-Die "Timed out waiting for replicated journal file $journalFile after $($maxWait * 5) seconds. Ensure the master has rotated its journal." } } Write-Log "Rotated journal file $journalFile found." } elseif ($script:SERVER_TYPE -eq "p4d_master") { if (Test-Path $checkpointFile) { Invoke-Die "Checkpoint $checkpointFile already exists, check the backup process." } if (Test-Path $journalFile) { Invoke-Die "Journal $journalFile already exists, check the backup process." } Write-Log "Truncating journal..." Invoke-P4Login $sw = [System.Diagnostics.Stopwatch]::StartNew() $result = & $script:P4BIN -p $script:P4MASTERPORT admin journal 2>&1 $sw.Stop() Write-Log "Journal rotation completed in $($sw.Elapsed.TotalSeconds) seconds." if ($LASTEXITCODE -ne 0) { Write-Log ($result | Out-String) Invoke-Die "Journal rotation on $($script:P4MASTERPORT) failed." } # Wait for the rotated journal file to appear $maxWait = 360 $waitCount = 0 while (-not (Test-Path $journalFile)) { Start-Sleep -Seconds 5 $waitCount++ if ($waitCount -ge $maxWait) { Invoke-Die "Timed out waiting for journal file $journalFile after $($maxWait * 5) seconds." } } Invoke-P4Login } else { Write-Log "Warning: Unrecognized server type '$($script:SERVER_TYPE)'. Proceeding with journal rotation as master." if (Test-Path $checkpointFile) { Invoke-Die "Checkpoint $checkpointFile already exists, check the backup process." } if (Test-Path $journalFile) { Invoke-Die "Journal $journalFile already exists, check the backup process." } Write-Log "Truncating journal..." Invoke-P4Login $sw = [System.Diagnostics.Stopwatch]::StartNew() $result = & $script:P4BIN -p $script:P4MASTERPORT admin journal 2>&1 $sw.Stop() Write-Log "Journal rotation completed in $($sw.Elapsed.TotalSeconds) seconds." if ($LASTEXITCODE -ne 0) { Write-Log ($result | Out-String) Invoke-Die "Journal rotation on $($script:P4MASTERPORT) failed." } $maxWait = 360 $waitCount = 0 while (-not (Test-Path $journalFile)) { Start-Sleep -Seconds 5 $waitCount++ if ($waitCount -ge $maxWait) { Invoke-Die "Timed out waiting for journal file $journalFile after $($maxWait * 5) seconds." } } Invoke-P4Login } } #============================================================================== # Login to Perforce. #============================================================================== function Invoke-P4Login { if ($script:PASSWORDFILE -and (Test-Path $script:PASSWORDFILE)) { $password = (Get-Content $script:PASSWORDFILE -First 1).Trim() $password | & $script:P4BIN -u $script:P4USER -p $script:P4PORT login 2>&1 | Out-Null } } #============================================================================== # Replay numbered journal files to the offline database. #============================================================================== function Invoke-ReplayJournalsToOfflineDb { Write-Log "Replay any unreplayed journals to the offline database." Test-OfflineDbUsable Get-OfflineJournalNum # Replay from checkpoints directory Invoke-ReplayJournalsFromDir -Dir $script:CHECKPOINTS Get-OfflineJournalNum # Replay from journals directory Invoke-ReplayJournalsFromDir -Dir $script:JOURNALS } function Invoke-ReplayJournalsFromDir { param([string]$Dir) $jnlFiles = Get-ChildItem -Path $Dir -Filter "*.jnl.*" -File -ErrorAction SilentlyContinue | Where-Object { $_.Name -match '\.jnl\.\d+(\.gz)?$' } if (-not $jnlFiles -or $jnlFiles.Count -eq 0) { return } # Extract journal numbers $jnlNums = @() foreach ($f in $jnlFiles) { if ($f.Name -match '\.jnl\.(\d+)(\.gz)?$') { $jnlNums += [int]$Matches[1] } } $jnlNums = $jnlNums | Sort-Object -Unique if ($jnlNums.Count -eq 0) { return } $firstNum = ($jnlNums | Measure-Object -Minimum).Minimum $lastNum = ($jnlNums | Measure-Object -Maximum).Maximum Write-Log "FIRSTJOURNALNUM=$firstNum" Write-Log "LASTJOURNALNUM=$lastNum" Write-Log "OFFLINEJNLNUM=$($script:OFFLINEJNLNUM)" if ($firstNum -le $script:OFFLINEJNLNUM) { for ($j = $script:OFFLINEJNLNUM; $j -le $lastNum; $j++) { $numberedJournal = Join-Path $Dir "$($script:P4SERVER).jnl.$j" $numberedJournalGz = "${numberedJournal}.gz" $usableFile = Join-Path $script:OFFLINE_DB "offline_db_usable.txt" $replayTarget = $null if (Test-Path $numberedJournalGz) { $replayTarget = $numberedJournalGz } elseif (Test-Path $numberedJournal) { $replayTarget = $numberedJournal } else { continue } Write-Log "Replay journal $replayTarget to offline db." Remove-Item -Force $usableFile -ErrorAction SilentlyContinue $sw = [System.Diagnostics.Stopwatch]::StartNew() & $script:P4DBIN -r $script:OFFLINE_DB -jr -f $replayTarget 2>&1 | Out-File -Append -FilePath $script:LOGFILE $sw.Stop() if ($LASTEXITCODE -ne 0) { Invoke-Die "Offline journal replay of $replayTarget to $($script:OFFLINE_DB) failed." } Write-Log "Journal replay completed in $($sw.Elapsed.TotalSeconds) seconds." "Offline journal files restored successfully." | Out-File -FilePath $usableFile -Encoding UTF8 } } } #============================================================================== # Dump a new checkpoint from the offline database. #============================================================================== function Invoke-DumpCheckpoint { $rootDir = $script:OFFLINE_DB Write-Log "Dump out new checkpoint from db files in $rootDir." $newCheckpoint = Join-Path $script:CHECKPOINTS "$($script:P4SERVER).ckp.$($script:CHECKPOINTNUM).gz" $newCheckpointMD5 = "${newCheckpoint}.md5" # Skip if checkpoint already exists with MD5 if ((Test-Path $newCheckpoint) -and (Test-Path $newCheckpointMD5)) { Write-Log "Warning: Skipping generation of existing checkpoint $newCheckpoint. MD5 file already exists." return } if ($script:DoMultiPartParallel -eq 1) { $multiPartPrefix = Join-Path $script:CHECKPOINTS "$($script:P4SERVER).ckp.$($script:CHECKPOINTNUM)" $checkpointCmd = "$($script:P4DBIN) -r `"$rootDir`" --multipart -z -jd -N $($script:NumCheckPointThreads) `"$multiPartPrefix`"" Write-Log "Running: $checkpointCmd" $sw = [System.Diagnostics.Stopwatch]::StartNew() & $script:P4DBIN -r $rootDir --multipart -z -jd -N $script:NumCheckPointThreads $multiPartPrefix 2>&1 | Out-File -Append -FilePath $script:LOGFILE $sw.Stop() } elseif ($script:DoParallelCheckpoints -eq 1) { $parallelDir = Join-Path $script:CHECKPOINTS "$($script:P4SERVER).ckp.$($script:CHECKPOINTNUM)" $checkpointCmd = "$($script:P4DBIN) -r `"$rootDir`" -z -jdpm -N $($script:NumCheckPointThreads) `"$parallelDir`"" Write-Log "Running: $checkpointCmd" $sw = [System.Diagnostics.Stopwatch]::StartNew() & $script:P4DBIN -r $rootDir -z -jdpm -N $script:NumCheckPointThreads $parallelDir 2>&1 | Out-File -Append -FilePath $script:LOGFILE $sw.Stop() } else { $checkpointCmd = "$($script:P4DBIN) -r `"$rootDir`" -jd -z `"$newCheckpoint`"" Write-Log "Running: $checkpointCmd" $sw = [System.Diagnostics.Stopwatch]::StartNew() & $script:P4DBIN -r $rootDir -jd -z $newCheckpoint 2>&1 | Out-File -Append -FilePath $script:LOGFILE $sw.Stop() } if ($LASTEXITCODE -ne 0) { Invoke-Die "New checkpoint dump from $rootDir FAILED. Command was: $checkpointCmd" } Write-Log "New checkpoint dump succeeded in $($sw.Elapsed.TotalSeconds) seconds." } #============================================================================== # Recreate the offline database from the latest checkpoint. #============================================================================== function Invoke-RecreateOfflineDbFiles { $checkpointsDir = $script:CHECKPOINTS # Find the latest checkpoint if ($script:DoMultiPartParallel -eq 1) { # Multi-part parallel creates files like p4.ckp.123.0.gz, p4.ckp.123.1.gz, ... # and a corresponding .md5 file for the set. $md5Files = Get-ChildItem -Path $checkpointsDir -Filter "$($script:P4SERVER).ckp.*.md5" -File -ErrorAction SilentlyContinue | Sort-Object LastWriteTime -Descending if (-not $md5Files -or $md5Files.Count -eq 0) { Remove-CkpRunning Invoke-Die "No multi-part parallel checkpoints found in $checkpointsDir. Run a live checkpoint first." } $latestMD5 = $md5Files[0].FullName } elseif ($script:DoParallelCheckpoints -eq 1) { $md5Files = Get-ChildItem -Path $checkpointsDir -Filter "*.md5" -File -ErrorAction SilentlyContinue | Sort-Object LastWriteTime -Descending if (-not $md5Files -or $md5Files.Count -eq 0) { Remove-CkpRunning Invoke-Die "No parallel checkpoint dirs found in $checkpointsDir. Run a live checkpoint first." } $latestMD5 = $md5Files[0].FullName } else { $md5Files = Get-ChildItem -Path $checkpointsDir -Filter "$($script:P4SERVER).ckp.*.md5" -File -ErrorAction SilentlyContinue | Sort-Object LastWriteTime -Descending if (-not $md5Files -or $md5Files.Count -eq 0) { Remove-CkpRunning Invoke-Die "No checkpoints found in $checkpointsDir with prefix $($script:P4SERVER). Run a live checkpoint first." } $latestMD5 = $md5Files[0].FullName } # Remove old offline db files $usableFile = Join-Path $script:OFFLINE_DB "offline_db_usable.txt" Remove-Item -Force $usableFile -ErrorAction SilentlyContinue Get-ChildItem -Path $script:OFFLINE_DB -Filter "db.*" -File -ErrorAction SilentlyContinue | Remove-Item -Force $saveDir = Join-Path $script:OFFLINE_DB "save" if (Test-Path $saveDir) { Get-ChildItem -Path $saveDir -Filter "db.*" -File -ErrorAction SilentlyContinue | Remove-Item -Force } # Restore from checkpoint if ($script:DoMultiPartParallel -eq 1) { $multiPartPrefix = $latestMD5 -replace '\.md5$', '' Write-Log "Recovering from last multi-part parallel checkpoint, $multiPartPrefix." $restoreCmd = "$($script:P4DBIN) -r `"$($script:OFFLINE_DB)`" --multipart -z -jr -N $($script:NumCheckPointThreads) `"$multiPartPrefix`"" Write-Log "Running: $restoreCmd" $sw = [System.Diagnostics.Stopwatch]::StartNew() & $script:P4DBIN -r $script:OFFLINE_DB --multipart -z -jr -N $script:NumCheckPointThreads $multiPartPrefix 2>&1 | Out-File -Append -FilePath $script:LOGFILE $sw.Stop() } elseif ($script:DoParallelCheckpoints -eq 1) { $parallelDir = $latestMD5 -replace '\.md5$', '' Write-Log "Recovering from last parallel checkpoint dir, $parallelDir." $restoreCmd = "$($script:P4DBIN) -r `"$($script:OFFLINE_DB)`" -z -jrp -N $($script:NumCheckPointThreads) `"$parallelDir`"" Write-Log "Running: $restoreCmd" $sw = [System.Diagnostics.Stopwatch]::StartNew() & $script:P4DBIN -r $script:OFFLINE_DB -z -jrp -N $script:NumCheckPointThreads $parallelDir 2>&1 | Out-File -Append -FilePath $script:LOGFILE $sw.Stop() } else { if ($latestMD5 -match '\.gz\.md5$') { $lastCheckpoint = $latestMD5 -replace '\.md5$', '' } else { $lastCheckpoint = ($latestMD5 -replace '\.md5$', '') + ".gz" } if (-not (Test-Path $lastCheckpoint)) { Invoke-Die "Missing last checkpoint file: $lastCheckpoint. Abort!" } Write-Log "Recovering from last full checkpoint, $lastCheckpoint." $restoreCmd = "$($script:P4DBIN) -r `"$($script:OFFLINE_DB)`" -jr -z `"$lastCheckpoint`"" Write-Log "Running: $restoreCmd" $sw = [System.Diagnostics.Stopwatch]::StartNew() & $script:P4DBIN -r $script:OFFLINE_DB -jr -z $lastCheckpoint 2>&1 | Out-File -Append -FilePath $script:LOGFILE $sw.Stop() } if ($LASTEXITCODE -ne 0) { Invoke-Die "Restore of checkpoint to $($script:OFFLINE_DB) failed!" } Write-Log "Offline db restored in $($sw.Elapsed.TotalSeconds) seconds." "Offline db file restored successfully." | Out-File -FilePath $usableFile -Encoding UTF8 } #============================================================================== # Compress and move old journals to the checkpoints directory. #============================================================================== function Invoke-GzipMvJournals { Write-Log "Compress journals and move to checkpoints volume." $jnlFiles = Get-ChildItem -Path $script:JOURNALS -Filter "*.jnl.*" -File -ErrorAction SilentlyContinue | Where-Object { $_.Name -notmatch '\.gz$' } if (-not $jnlFiles -or $jnlFiles.Count -le 2) { return } $jnlNums = @() foreach ($f in $jnlFiles) { if ($f.Name -match '\.jnl\.(\d+)$') { $jnlNums += [int]$Matches[1] } } $jnlNums = $jnlNums | Sort-Object if ($jnlNums.Count -le 2) { return } # Compress all but the latest $firstNum = $jnlNums[0] $lastNum = $jnlNums[-2] Write-Log "FIRSTJOURNALNUM=$firstNum" Write-Log "LASTJOURNALNUM=$lastNum" for ($j = $firstNum; $j -le $lastNum; $j++) { $numberedJournal = Join-Path $script:JOURNALS "$($script:P4SERVER).jnl.$j" if (Test-Path $numberedJournal) { Write-Log "Compressing $numberedJournal" $sw = [System.Diagnostics.Stopwatch]::StartNew() $sourceFile = $numberedJournal $destFile = Join-Path $script:CHECKPOINTS "$($script:P4SERVER).jnl.${j}.gz" try { $sourceStream = [System.IO.File]::OpenRead($sourceFile) $destStream = [System.IO.File]::Create($destFile) $gzipStream = New-Object System.IO.Compression.GZipStream($destStream, [System.IO.Compression.CompressionMode]::Compress) $sourceStream.CopyTo($gzipStream) $gzipStream.Close() $destStream.Close() $sourceStream.Close() Remove-Item -Force $sourceFile } catch { Invoke-Die "Compression of $numberedJournal failed: $_" } $sw.Stop() Write-Log "Compressed in $($sw.Elapsed.TotalSeconds) seconds." } } } #============================================================================== # Remove old checkpoints and journals based on retention settings. #============================================================================== function Remove-OldCheckpointsAndJournals { if ($script:KEEPCKPS -eq 0) { Write-Log "Skipping cleanup of old checkpoints because KeepCheckpoints is set to 0." return } Write-Log "Deleting obsolete checkpoints and journals. Keeping latest $($script:KEEPCKPS)." $checkpointsDir = $script:CHECKPOINTS if (Test-Path $checkpointsDir) { # Clean up multi-part parallel checkpoint files (e.g. p4.ckp.123.0.gz, p4.ckp.123.1.gz, ...) $multiPartFiles = Get-ChildItem -Path $checkpointsDir -Filter "$($script:P4SERVER).ckp.*" -File -ErrorAction SilentlyContinue | Where-Object { $_.Name -match '\.ckp\.(\d+)\.\d+\.gz(\.md5)?$' } if ($multiPartFiles) { $multiPartNums = $multiPartFiles | ForEach-Object { if ($_.Name -match '\.ckp\.(\d+)\.\d+\.gz') { [int]$Matches[1] } } | Sort-Object -Unique -Descending $multiPartToRemove = $multiPartNums | Select-Object -Skip $script:KEEPCKPS foreach ($num in $multiPartToRemove) { $pattern = "$($script:P4SERVER).ckp.${num}.*" Write-Log "Removing multi-part checkpoint $pattern" Get-ChildItem -Path $checkpointsDir -Filter $pattern -File -ErrorAction SilentlyContinue | Remove-Item -Force } } # Clean up parallel checkpoint directories $parallelDirs = Get-ChildItem -Path $checkpointsDir -Directory -Filter "$($script:P4SERVER).ckp.*" -ErrorAction SilentlyContinue if ($parallelDirs) { $dirsToRemove = $parallelDirs | ForEach-Object { if ($_.Name -match '\.ckp\.(\d+)$') { [PSCustomObject]@{ Dir = $_; Num = [int]$Matches[1] } } } | Sort-Object Num -Descending | Select-Object -Skip $script:KEEPCKPS foreach ($item in $dirsToRemove) { $pattern = "$($script:P4SERVER).ckp.$($item.Num)*" Write-Log "Removing $pattern" Get-ChildItem -Path $checkpointsDir -Filter $pattern -ErrorAction SilentlyContinue | Remove-Item -Force -Recurse } } # Clean up non-parallel checkpoint files $ckpFiles = Get-ChildItem -Path $checkpointsDir -Filter "$($script:P4SERVER).ckp.*.gz" -File -ErrorAction SilentlyContinue | Where-Object { $_.Name -notmatch '\.md5$' } if ($ckpFiles) { $filesToRemove = $ckpFiles | ForEach-Object { if ($_.Name -match '\.ckp\.(\d+)\.gz$') { [PSCustomObject]@{ File = $_; Num = [int]$Matches[1] } } } | Sort-Object Num -Descending | Select-Object -Skip $script:KEEPCKPS foreach ($item in $filesToRemove) { $pattern = "$($script:P4SERVER).ckp.$($item.Num)*" Write-Log "Removing $pattern" Get-ChildItem -Path $checkpointsDir -Filter $pattern -File -ErrorAction SilentlyContinue | Remove-Item -Force } } # Clean up old journal files if ($script:KEEPJNLS -gt 0) { $jnlFiles = Get-ChildItem -Path $checkpointsDir -Filter "$($script:P4SERVER).jnl.*" -File -ErrorAction SilentlyContinue if ($jnlFiles) { $jnlToRemove = $jnlFiles | ForEach-Object { if ($_.Name -match '\.jnl\.(\d+)') { [PSCustomObject]@{ File = $_; Num = [int]$Matches[1] } } } | Sort-Object Num -Descending | Select-Object -Skip $script:KEEPJNLS foreach ($item in $jnlToRemove) { $pattern = "$($script:P4SERVER).jnl.$($item.Num)*" Write-Log "Removing $pattern" Get-ChildItem -Path $checkpointsDir -Filter $pattern -File -ErrorAction SilentlyContinue | Remove-Item -Force } } } } } #============================================================================== # Remove old log files based on retention settings. #============================================================================== function Remove-OldLogs { if ($script:KEEPJNLS -gt 0) { Write-Log "Deleting old checkpoint logs. Keeping latest $($script:KEEPJNLS)." Remove-OldLogFiles -Prefix "checkpoint.log" -Keep $script:KEEPJNLS } if ($script:KEEPLOGS -eq 0) { Write-Log "Skipping cleanup of old server logs because KeepLogs is set to 0." return } Write-Log "Deleting old server logs. Keeping latest $($script:KEEPLOGS)." $logPrefixes = @( "log", "p4broker.log", "broker_rotate.log", "audit.log", "sync_replica.log", "replica_status.log", "replica_cleanup.log", "upgrade.log", "p4verify.log", "monitor_metrics.log" ) foreach ($prefix in $logPrefixes) { Remove-OldLogFiles -Prefix $prefix -Keep $script:KEEPLOGS } } function Remove-OldLogFiles { param( [string]$Prefix, [int]$Keep ) $files = Get-ChildItem -Path $script:LOGS -Filter "${Prefix}*" -File -ErrorAction SilentlyContinue | Sort-Object LastWriteTime -Descending | Select-Object -Skip $Keep foreach ($f in $files) { Write-Log "rm $($f.FullName)" Remove-Item -Force $f.FullName } } #============================================================================== # Rotate last run log files. #============================================================================== function Invoke-RotateLastRunLogs { $datestamp = Get-Date -Format "yyyyMMdd-HHmmss" # Rotate checkpoint log if ($script:LOGFILE -and (Test-Path $script:LOGFILE)) { $rotated = "$($script:LOGFILE).$($script:JOURNALNUM).$datestamp" Move-Item -Force $script:LOGFILE $rotated -ErrorAction SilentlyContinue } # Rotate server logs with compression $logsToRotate = @("log", "p4broker.log", "audit.log", "monitor_metrics.log") foreach ($logName in $logsToRotate) { $logPath = Join-Path $script:LOGS $logName if (Test-Path $logPath) { $rotated = "${logPath}.$($script:JOURNALNUM).$datestamp" Move-Item -Force $logPath $rotated -ErrorAction SilentlyContinue if (Test-Path $rotated) { try { $sourceStream = [System.IO.File]::OpenRead($rotated) $destStream = [System.IO.File]::Create("${rotated}.gz") $gzipStream = New-Object System.IO.Compression.GZipStream($destStream, [System.IO.Compression.CompressionMode]::Compress) $sourceStream.CopyTo($gzipStream) $gzipStream.Close() $destStream.Close() $sourceStream.Close() Remove-Item -Force $rotated } catch { Write-Log "Warning: Failed to compress $rotated : $_" } } } } } #============================================================================== # Set the checkpoint counter in Perforce. #============================================================================== function Set-CheckpointCounter { Invoke-P4Login $counterValue = "$(Get-Date -Format 'yyyy/MM/dd HH:mm:ss zzz')" $counterName = "LastCheckpoint" if ($script:SERVERID) { $counterName = "LastCheckpoint.$($script:SERVERID)" } $targetPort = $script:P4PORT if ($script:EDGESERVER -eq 1 -or $script:STANDBYSERVER -eq 1) { $targetPort = $script:P4MASTERPORT } & $script:P4BIN -u $script:P4USER -p $targetPort counter $counterName $counterValue 2>&1 | Out-Null } #============================================================================== # Main execution #============================================================================== function Main { $totalSw = [System.Diagnostics.Stopwatch]::StartNew() Initialize-Environment Test-Vars Set-ServerVars Get-JournalNum Invoke-RotateLastRunLogs Write-Log "Start $($script:P4SERVER) Checkpoint" Test-Dirs Test-OfflineDbUsable Set-CkpRunning try { Invoke-P4Login Get-JournalNum Invoke-TruncateJournal Invoke-ReplayJournalsToOfflineDb Get-OfflineJournalNum Invoke-DumpCheckpoint Invoke-RecreateOfflineDbFiles Invoke-GzipMvJournals Remove-OldCheckpointsAndJournals Remove-OldLogs $totalSw.Stop() Write-Log "End $($script:P4SERVER) Checkpoint (Total time: $($totalSw.Elapsed.TotalSeconds) seconds)" Set-CheckpointCounter Remove-CkpRunning } catch { Write-Log "ERROR: $_" Remove-CkpRunning throw } } # Run Main