#!/bin/perl #!c:\bin\perl\bin\perl # ====================================================================== # cache_clean.pl # Script to monitor and control space usage by a P4Proxy cache # # usage: cache_clean.pl -c cache_dir -l limit_size -o output_file [-m max_empty] [-i] # see usage section in script below # # $Id: //esi/perforce/scripts/tools/cleanup/cache_clean.pl#2 $ # $Author: sstevens $ # $Date: 2007/05/23 $ # ====================================================================== # Get system modules. use strict; use Getopt::Long; use Time::Local; my $LimitSize = 0; my $CacheDir = ""; my $OutputFile = ""; my $MaxEmpty = 0; my $DoNothing = 0; my $Help = 0; my $Total = 0; my $Used = 0; my $Avail = 0; my $CurSize = 0; # If access date is before most recent file deleted last run, it is due to a Perforce bug. Some files written # to the cache do not have the correct access time set, it can be anywhere in the Unix epoch. # But if the access date is earlier than the last file cleaned out last time cache_clean.pl was run, # it's bogus, reset it to the current date. Each time the script is run, the last date is saved to file. # The date below is used if no date is saved so far. # This may be a Solaris only bug # my $start_date = "1/1/2005"; # edit this path as appropriate, it is where to store the files saving dates my $date_file_folder = "/perforce/scripts"; # # main routine starts here # GetOptions ("help|?" => \$Help, "c=s" => \$CacheDir, "l=i" => \$LimitSize, "o:s" => \$OutputFile, "m:i" => \$MaxEmpty, "i" => \$DoNothing, ); if($Help || ($CacheDir eq "") || ($LimitSize == 0)) { print "# usage: cache_clean.pl -c cache_dir -l limit_size [-m max_empty] [-n min_empty] -c cache_dir - where cache files are stored, such as /perforce/1710/cache it can also use a wildcard, like /perforce/*/cache, and will draw from all folders that match But - all these folders must be on the same partition. And you may have to use \* rather than *, in Unix shells. -l limit_size - maximum size in MB of the area monitored, delete files by access time to get here -m max_empty If this number of MB is already free in the partition, skip most of the script This is a speed optimization. For example: if set to 50000, and 50 GB are free, no need to trim cache, don't bother with du command -i do nothing, just list what would be deleted -o output_file - print output to this file, rather than stdout NOTE: cygwin is required for Windows machines, this script may need to be modified to use the correct path to cygwin commands.\n"; exit(1); } # take care of output if($OutputFile) { if (!open(OUT, ">>$OutputFile")) { print "Unable to open $OutputFile for appending, using STDOUT\n"; } else { select(OUT); } } # look for a saved date for the oldest files deleted, use to fix Perforce access date errors my $cachepath = $CacheDir; $cachepath =~ s/\//\./g; $cachepath =~ s/\*/\./g; my $DateFile = "$date_file_folder/$cachepath.cache_clean_date.keep"; if (-e $DateFile) { if (open(DATEFILE, "<$DateFile")) { while () { if(/Cutoff Date: (\S+)/) { $start_date = $1; } } close (DATEFILE); } else { print "cache_clean.pl: Unable to open $DateFile to get cutoff date, though it exists. Aborting.\n"; exit (1); } } else { print "Using default cutoff date of $start_date, no saved cutoff date.\n"; } # set to the most recent date deleted, to be picked up next time the script is run my $last_date = ""; my ($m, $d, $y) = split (/\//, $start_date); my $cutoff_time = timelocal(0, 0, 0, $d, $m-1, $y); my $current_time = time; if($MaxEmpty) { print "Will do nothing if more than $MaxEmpty MB is free\n"; } if($DoNothing) { print "Reporting on what would be cleaned from cache folder(s) $CacheDir\n"; `date`; print "Space usage would be reduced to $LimitSize MB.\n"; } else { print "Cleaning cache folder(s) $CacheDir\n"; `date`; print "Space usage will be reduced to $LimitSize MB.\n"; } # convert megabytes to kilobytes. $MaxEmpty = $MaxEmpty * 1000; $LimitSize = $LimitSize * 1000; my @Results = ""; my $Command = ""; # Deal with platform specific stuff. I'm sure there's a better way, but this works for many cases # Cygwin must be installed: http://www.cygwin.com/ my $ucmd_path = ""; my $uname_results = `uname -a`; if($uname_results =~ /CYGWIN/) { $ucmd_path = "c:\\cygwin\\bin\\"; } # do a df, if more space available than MaxEmpty, we're done, exit $Command = "df -k $CacheDir"; @Results = `$Command`; chomp(@Results); if ($#Results == -1) { print "Unable to run df to check partition size, check cache dir path\n"; die; } shift @Results; # first line is just a header foreach my $Line (@Results) { $Line =~ /\S+\s+(\w+)\s+(\w+)\s+(\w+).*/; $Total = $1; $Used = $2; $Avail = $3; last; } if($MaxEmpty && ($Avail > $MaxEmpty)) { # plenty of room, get out my $AvailMB = $Avail / 1000; my $MaxMB = $MaxEmpty / 1000; print "Done: $AvailMB MB available, more than $MaxMB MB, plenty of room, skipping rest of check\n"; exit(0); } # may be a limited space situation, use du to get current cache size $Command = "du -Lks $CacheDir"; my @Results = `$Command`; if ($#Results == -1) { print "Unable to run du to check cache size, check cache dir path\n"; die; } foreach my $Line (@Results) { print $Line; $Line =~ /(\w+).*/; $CurSize = $CurSize + $1; } print "Total cache size is currently $CurSize KB\n"; my $DelSize = 0; # determine if cache size must be reduced if($LimitSize < $CurSize) { # yes, must reduce space. $DelSize = $CurSize - $LimitSize; } # do we have DelSize indicating that we should do something? if($DelSize == 0) { print "Done: No need to trim cache for its size or to ensure minimum free space\n"; exit(0); } # if we're here, both Limit and DelSize refer to how to delete from the cache # gather a list of all files, in a hash by access time $Command = $ucmd_path . "find $CacheDir -type f -follow"; @Results = `$Command`; if ($#Results == -1) { print "Unable to run find to check each file, check cache dir path\n"; die; } chomp(@Results); my %Fdata; foreach my $Line (@Results) { $Fdata{$Line} = (stat($Line))[8]; if(!$DoNothing && ($Fdata{$Line} < $cutoff_time)) { # reset the access time to now, it was bogus my ($s, $mi, $h, $d, $m, $y) = localtime($Fdata{$Line}); printf ("Fixing bad date: %02d/%02d/%d %02d:%02d, $Line\n", $m + 1, $d, $y + 1900, $h, $mi); `touch \"$Line\"`; $Fdata{$Line} = $current_time; } } # for the following, work with bytes $DelSize = $DelSize * 1000; my $i = 0; my $FileSize; foreach my $File (sort {$Fdata{$a} <=> $Fdata{$b} } keys %Fdata) { my ($s, $mi, $h, $d, $m, $y) = localtime($Fdata{$File}); $FileSize = (stat($File))[7]; printf ("%02d/%02d/%d %02d:%02d, %d K - $File\n", $m + 1, $d, $y + 1900, $h, $mi, int($FileSize)/1000); if(!$DoNothing) { my $Results = unlink($File); if($Results != 1) { print "unable to rm file $File\n"; } } $DelSize = $DelSize - $FileSize; $i++; if($DelSize <= 0) { # enough has been deleted $last_date = sprintf("%s/%s/%s", $m + 1, $d, $y + 1900); last; } } if($DelSize > 0) { print "Error, unable to free up space\n"; exit (1); } my $LimitMB = $LimitSize / 1000; if($DoNothing) { print "Files accessed before $last_date would be deleted.\n"; print "\nWould reduce cache size to $LimitMB MB, $i files would be deleted.\n\n\n\n"; } else { if (open(DATEFILE, ">$DateFile")) { print DATEFILE "Cutoff Date: $last_date"; close (DATEFILE); } else { print "cache_clean.pl: Unable to open $DateFile to set cutoff date.\n"; } print "Files accessed before $last_date were deleted.\n"; print "Reduced cache size to $LimitMB MB, $i files deleted, done.\n\n"; } exit (0);