#!/usr/bin/python # This script can be used to manage the cache of your perforce proxy. Without further intervention, # the Perforce proxy will only ever add files to the cache it maintains on the proxy machine. If # you have a lot of data, this can quickly fill up the harddisk on your proxy, at which time the proxy # will start to give connection errors when you attempt to sync through it. While you can manually throw # away the entire cache without much problems, this is not an ideal solution; it means you have to keep # an eye on the disk usage, and manually intervene when it fills up. Also, deleting the entire cache # means that the cache will then need to be refilled from the main server. # # This script makes it easy to manage the diskspace of the Perforce proxy. It works on both Windows # (requires win32file module, XP & server 2003 tested, but others should be fine) and Linux (2.6/ext3 # tested, but should be fine anywhere statvfs works). Other *nix variants are probably also fine, though # there is a reported bug in Solaris that means files in the proxy cache will not have their atime set. # This might interfere with the script, but I haven't tested this. The script works on Python 2.4 and 2.5, # at least. # # Use the script as follows: # p4p_clean -l low -h high -t address -m mailserver cache_path # The script will examine the disk the cache resides on, and check if the percentage of free diskspace # is below 'low'. If not, the script does nothing. If it is, the script will sort all the files in the # cache on atime and proceed to delete the oldest files (by atime) until the free diskspace is once again # above 'high'. When done, it will mail a short report to the specified address, using the specified # mailserver. If an error is encountered, this will be present in the mail. See usage below for details. # # Install a crontab or a Windows scheduled task to run the script (make sure the account you install it # under has write access to the cache files). As the script may use quite a bit of disk/cpu resources, # it should preferably be run at night. I usually run it with a low threshold of 20% and a high one of # 30%. We did occasionally still see the cache fill up during the day, so I added a backup task with a # low threshold of 10% that runs every hour for emergency cleaning, as per the following crontab file: # # 0 4 * * * /usr/local/bin/p4p_clean.py -l 20 -h 30 -t p4support -m mail /data/depot # 30 * * * * /usr/local/bin/p4p_clean.py -l 10 -h 30 -t p4support -m mail /data/depot # # Please send any questions or comments to me: frank@compagner.com # # Frank Compagner # September 17, 2007. import sys import os import time import getopt import traceback import smtplib import socket free_space_low_threshold = 20 # min % of free space on cache drive before script takes action free_space_high_threshold = 30 # min % of free space on cache drive after script is done log = "" mailserver = "" to_address = "" always_mail = False # Possible results of script no_action, cleaned, error = range(3) result = no_action #################################################################################################### def FormatExceptionInfo(maxTBlevel=5) : cla, exc, trbk = sys.exc_info() excName = cla.__name__ try : excArgs = exc.__dict__["args"] except KeyError : excArgs = "" excTb = traceback.format_tb(trbk, maxTBlevel) return (excName, excArgs, excTb) #################################################################################################### msg_template = """\ from: %FROM%\n\ To: %TO%\n\ Subject: %SUBJECT%\n\ \n\ """ def SendMail() : if mailserver and to_address and ((result != no_action) or always_mail) : server = smtplib.SMTP(mailserver) server.set_debuglevel(0) subject = "[p4p_clean] Result of running perforce proxy clean script." msg = msg_template.replace("%SUBJECT%", subject) from_name = "p4p_clean@" + socket.getfqdn() msg = msg.replace("%FROM%", from_name) to_addresses = to_address.split(',') msg = msg.replace("%TO%", ", ".join(to_addresses)) msg = msg + log server.sendmail(from_name, to_addresses, msg) server.quit() #################################################################################################### def Write(string) : t = time.strftime("%H:%M", time.localtime(time.time())) string = string.rstrip() global log log = log + t + " " + string + "\n" print t + " " + string ################################################################################################## def GetDiskInfo(path) : free_factor = 100 if os.name == 'nt' : drive, tail = os.path.splitdrive(path) if drive : import win32file sectorsPerCluster, bytesPerSector, numFreeClusters, totalNumClusters = win32file.GetDiskFreeSpace(drive + "\\") bytes = long(sectorsPerCluster) * long(bytesPerSector) total = long(totalNumClusters) free = long(numFreeClusters) elif os.name == 'posix' : import statvfs st = os.statvfs(path) bytes = st.f_frsize total = st.f_blocks free = st.f_bfree else : sys.exit(3) free_factor = (100 * free) / total total_bytes = long(total * bytes) free_bytes = long(free * bytes) return total_bytes, free_bytes, free_factor ################################################################################################## def cmp_atime(f1, f2) : return int(f1[0]) - int(f2[0]) ################################################################################################## def GetMB(size) : return str(size / (1024 * 1024)) + " MB" ################################################################################################## usage_string = "\ Script to clean a Perforce proxy cache when diskspace gets low\n\n\ Usage:\n\ p4p_clean.py [-?] [-l low_threshold] [-h high_threshold]\n\ [-t to_address] [-m mailserver] [-a] cache_path\n\n\ -? display this message\n\ -l low_threshold min % of free space on cache drive before cleanup starts\n\ -h high_threshold min % of free space on cache drive after script is done\n\ -t to_address comma separated list of email addresses to send results to\n\ -m mailserver name of smtp server to use\n\ -a always send mail, even if no action was taken\n\ cache_path path to the root of the proxy cache\ " try : opts, args = getopt.getopt(sys.argv[1:], "?l:h:t:m:av") except getopt.GetoptError : print usage_string sys.exit(1) if len(args) != 1 : print usage_string sys.exit(1) for opt, opt_arg in opts : if opt == "-?" : print usage_string sys.exit(0) if opt == "-l" : free_space_low_threshold = int(opt_arg) if opt == "-h" : free_space_high_threshold = int(opt_arg) if opt == "-t" : to_address = opt_arg if opt == "-m" : mailserver = opt_arg if opt == "-a" : always_mail = True cache_root = args[0] # Command line parsed, now get to work try : Write("Clean Cache operation started.") disk_total, disk_free, disk_free_factor = GetDiskInfo(cache_root) Write(" Current disk space :") Write(" Total space = " + GetMB(disk_total)) Write(" Free space = " + GetMB(disk_free)) Write(" Free perc. = " + str(disk_free_factor) + "%.") if disk_free_factor < free_space_low_threshold : Write(" Free percentage lower than min threshold of " + str(free_space_low_threshold) + "%") Write(" Processing files in " + cache_root) files = [] for root, dirnames, filenames in os.walk(cache_root) : for filename in filenames : pathname = os.path.join(root, filename) st = os.stat(pathname) file_info = st.st_atime, st.st_size, pathname files.append(file_info) Write(" Found " + str(len(files)) + " files.") if files : Write(" Sorting files by access time") files.sort(cmp_atime) disk_free_target = free_space_high_threshold * (float(disk_total) / 100) size_target = disk_free_target - disk_free Write(" Starting cache cleanup, trying to remove " + GetMB(int(size_target)) + ".") i = 0 removed_size = 0 while (removed_size < size_target) and (i < len(files)) : try : os.remove(files[i][2]) removed_size = removed_size + files[i][1] i = i + 1 except : Write(" Error deleting " + files[i][2]) Write(" Removed " + str(i) + " files for a total of " + GetMB(removed_size) + ".") disk_total, disk_free, disk_free_factor = GetDiskInfo(cache_root) Write(" New disk space :") Write(" Total space = " + GetMB(disk_total)) Write(" Free space = " + GetMB(disk_free)) Write(" Free perc. = " + str(disk_free_factor) + "%.") result = cleaned else : Write(" Free disk space is above low threshold of " + str(free_space_low_threshold) +"%, nothing to do.") Write("Clean Cache operation done.") except : Write(" Exception " + repr(FormatExceptionInfo()) + " occured, aborting.") result = error # We're done, send the mail and exit SendMail()