#!/usr/bin/python
# This script can be used to manage the cache of your perforce proxy. Without further intervention,
# the Perforce proxy will only ever add files to the cache it maintains on the proxy machine. If
# you have a lot of data, this can quickly fill up the harddisk on your proxy, at which time the proxy
# will start to give connection errors when you attempt to sync through it. While you can manually throw
# away the entire cache without much problems, this is not an ideal solution; it means you have to keep
# an eye on the disk usage, and manually intervene when it fills up. Also, deleting the entire cache
# means that the cache will then need to be refilled from the main server.
#
# This script makes it easy to manage the diskspace of the Perforce proxy. It works on both Windows
# (requires win32file module, XP & server 2003 tested, but others should be fine) and Linux (2.6/ext3
# tested, but should be fine anywhere statvfs works). Other *nix variants are probably also fine, though
# there is a reported bug in Solaris that means files in the proxy cache will not have their atime set.
# This might interfere with the script, but I haven't tested this. The script works on Python 2.4 and 2.5,
# at least.
#
# Use the script as follows:
# p4p_clean -l low -h high -t address -m mailserver cache_path
# The script will examine the disk the cache resides on, and check if the percentage of free diskspace
# is below 'low'. If not, the script does nothing. If it is, the script will sort all the files in the
# cache on atime and proceed to delete the oldest files (by atime) until the free diskspace is once again
# above 'high'. When done, it will mail a short report to the specified address, using the specified
# mailserver. If an error is encountered, this will be present in the mail. See usage below for details.
#
# Install a crontab or a Windows scheduled task to run the script (make sure the account you install it
# under has write access to the cache files). As the script may use quite a bit of disk/cpu resources,
# it should preferably be run at night. I usually run it with a low threshold of 20% and a high one of
# 30%. We did occasionally still see the cache fill up during the day, so I added a backup task with a
# low threshold of 10% that runs every hour for emergency cleaning, as per the following crontab file:
#
# 0 4 * * * /usr/local/bin/p4p_clean.py -l 20 -h 30 -t p4support -m mail /data/depot
# 30 * * * * /usr/local/bin/p4p_clean.py -l 10 -h 30 -t p4support -m mail /data/depot
#
# Please send any questions or comments to me: frank@compagner.com
#
# Frank Compagner
# September 17, 2007.
import sys
import os
import time
import getopt
import traceback
import smtplib
import socket
free_space_low_threshold = 20 # min % of free space on cache drive before script takes action
free_space_high_threshold = 30 # min % of free space on cache drive after script is done
log = ""
mailserver = ""
to_address = ""
always_mail = False
# Possible results of script
no_action, cleaned, error = range(3)
result = no_action
####################################################################################################
def FormatExceptionInfo(maxTBlevel=5) :
cla, exc, trbk = sys.exc_info()
excName = cla.__name__
try :
excArgs = exc.__dict__["args"]
except KeyError :
excArgs = "<no args>"
excTb = traceback.format_tb(trbk, maxTBlevel)
return (excName, excArgs, excTb)
####################################################################################################
msg_template = """\
from: %FROM%\n\
To: %TO%\n\
Subject: %SUBJECT%\n\
\n\
"""
def SendMail() :
if mailserver and to_address and ((result != no_action) or always_mail) :
server = smtplib.SMTP(mailserver)
server.set_debuglevel(0)
subject = "[p4p_clean] Result of running perforce proxy clean script."
msg = msg_template.replace("%SUBJECT%", subject)
from_name = "p4p_clean@" + socket.getfqdn()
msg = msg.replace("%FROM%", from_name)
to_addresses = to_address.split(',')
msg = msg.replace("%TO%", ", ".join(to_addresses))
msg = msg + log
server.sendmail(from_name, to_addresses, msg)
server.quit()
####################################################################################################
def Write(string) :
t = time.strftime("%H:%M", time.localtime(time.time()))
string = string.rstrip()
global log
log = log + t + " " + string + "\n"
print t + " " + string
##################################################################################################
def GetDiskInfo(path) :
free_factor = 100
if os.name == 'nt' :
drive, tail = os.path.splitdrive(path)
if drive :
import win32file
sectorsPerCluster, bytesPerSector, numFreeClusters, totalNumClusters = win32file.GetDiskFreeSpace(drive + "\\")
bytes = long(sectorsPerCluster) * long(bytesPerSector)
total = long(totalNumClusters)
free = long(numFreeClusters)
elif os.name == 'posix' :
import statvfs
st = os.statvfs(path)
bytes = st.f_frsize
total = st.f_blocks
free = st.f_bfree
else :
sys.exit(3)
free_factor = (100 * free) / total
total_bytes = long(total * bytes)
free_bytes = long(free * bytes)
return total_bytes, free_bytes, free_factor
##################################################################################################
def cmp_atime(f1, f2) :
return int(f1[0]) - int(f2[0])
##################################################################################################
def GetMB(size) :
return str(size / (1024 * 1024)) + " MB"
##################################################################################################
usage_string = "\
Script to clean a Perforce proxy cache when diskspace gets low\n\n\
Usage:\n\
p4p_clean.py [-?] [-l low_threshold] [-h high_threshold]\n\
[-t to_address] [-m mailserver] [-a] cache_path\n\n\
-? display this message\n\
-l low_threshold min % of free space on cache drive before cleanup starts\n\
-h high_threshold min % of free space on cache drive after script is done\n\
-t to_address comma separated list of email addresses to send results to\n\
-m mailserver name of smtp server to use\n\
-a always send mail, even if no action was taken\n\
cache_path path to the root of the proxy cache\
"
try :
opts, args = getopt.getopt(sys.argv[1:], "?l:h:t:m:av")
except getopt.GetoptError :
print usage_string
sys.exit(1)
if len(args) != 1 :
print usage_string
sys.exit(1)
for opt, opt_arg in opts :
if opt == "-?" :
print usage_string
sys.exit(0)
if opt == "-l" :
free_space_low_threshold = int(opt_arg)
if opt == "-h" :
free_space_high_threshold = int(opt_arg)
if opt == "-t" :
to_address = opt_arg
if opt == "-m" :
mailserver = opt_arg
if opt == "-a" :
always_mail = True
cache_root = args[0]
# Command line parsed, now get to work
try :
Write("Clean Cache operation started.")
disk_total, disk_free, disk_free_factor = GetDiskInfo(cache_root)
Write(" Current disk space :")
Write(" Total space = " + GetMB(disk_total))
Write(" Free space = " + GetMB(disk_free))
Write(" Free perc. = " + str(disk_free_factor) + "%.")
if disk_free_factor < free_space_low_threshold :
Write(" Free percentage lower than min threshold of " + str(free_space_low_threshold) + "%")
Write(" Processing files in " + cache_root)
files = []
for root, dirnames, filenames in os.walk(cache_root) :
for filename in filenames :
pathname = os.path.join(root, filename)
st = os.stat(pathname)
file_info = st.st_atime, st.st_size, pathname
files.append(file_info)
Write(" Found " + str(len(files)) + " files.")
if files :
Write(" Sorting files by access time")
files.sort(cmp_atime)
disk_free_target = free_space_high_threshold * (float(disk_total) / 100)
size_target = disk_free_target - disk_free
Write(" Starting cache cleanup, trying to remove " + GetMB(int(size_target)) + ".")
i = 0
removed_size = 0
while (removed_size < size_target) and (i < len(files)) :
try :
os.remove(files[i][2])
removed_size = removed_size + files[i][1]
i = i + 1
except :
Write(" Error deleting " + files[i][2])
Write(" Removed " + str(i) + " files for a total of " + GetMB(removed_size) + ".")
disk_total, disk_free, disk_free_factor = GetDiskInfo(cache_root)
Write(" New disk space :")
Write(" Total space = " + GetMB(disk_total))
Write(" Free space = " + GetMB(disk_free))
Write(" Free perc. = " + str(disk_free_factor) + "%.")
result = cleaned
else :
Write(" Free disk space is above low threshold of " + str(free_space_low_threshold) +"%, nothing to do.")
Write("Clean Cache operation done.")
except :
Write(" Exception " + repr(FormatExceptionInfo()) + " occured, aborting.")
result = error
# We're done, send the mail and exit
SendMail()