#!/usr/bin/python
#
# This is a beta script!
# The author takes no responsibility for the results of using this tool.
#
# Motivation:
# If you are short of storage space on your Perforce server because of big binary files
# then sooner or later you get to consider using single head revisions.
# Two things I don't like about single head revisions:
# * When you integrate a file with a single head revision then the Perforce server makes
# a physical copy even if the file hasn't changed in the integration.
# So it still wastes storage space if you work with branches.
# It doesn't happen with normal files (with single head revisions not switched on).
# * There is no way to keep particular revisions if you need them (like milestone builds, etc).
# Old revisions always get deleted when new revisions are checked in.
# You could have a branch for each revision that you need to keep (then those revisions
# would be kept because of the behaviour I described in the previous point).
# But that would swell the integration metadata (especially if you've got
# a massive amount of files) and the depot file hierarchy.
# I needed a solution that
# * made it possible to keep
# * labelled revisions (important revisions are labelled anyway)
# * recently checked in modifications and the preceding revision
# So that we avoid removing revisions that would potentially be labelled shortly.
# * without wasting storage space for unneeded revisions and redundant copies.
#
#
# This script is a more sophisticated alternative for using single head revisions.
# Run this script when you want to get rid of unneeded binary revisions, or you just want
# to get information on how much storage space is occupied by unneeded revisions at that moment.
# See the configuration section for details.
#
# Robert Kovacs (rkovacs7878@gmail.com)
#
###############################
#### CONFIGURATION SECTION ####
P4PORT = "phoebe:1666"
P4USER = "Robert.Kovacs"
P4PASSWD = "Password1"
# Head revisions are always kept
LABELLED_NEEDED = True # If True then no labelled revisions will be removed
SIZE_LIMIT = 1000 # No revisions of smaller size will be removed
AGE_LIMIT = 7 * 24 * 60 * 60 # As seconds. If greater than 0 then no revisions newer than this age will be removed. The latest one of the older revisions will not be removed either.
MODE = 0 # 0 - Report
# Unneeded revisions will be listed but not removed
# 1 - Bucket (thanks Sven for the great tool!)
# Unneeded revisions will be bucketed using p4bucket (//guest/sven_erik_knop/p4bucket/...)
# Before you use this option:
# * You need to understand p4bucket. Read the documentation!
# * Copy p4bucket.py to the same directory with unused_binary_revisions.py
# * Initialise p4bucket from there
# * Create a bucket for the unneeded binary revisions
# * Uncomment the p4bucket importing below
# * Set the BUCKET value below
# 2 - Obliterate
# Unneeded revisions will be obliterated.
# BE VERY CAREFUL WITH THIS ONE, THERE IS NO WAY TO UNDO IT!
# You need to connect with a user that has permission to obliterate.
#import p4bucket # Uncomment this line if you want to use p4bucket
# Read MODE == 1 for more information
BUCKET = "my_bucket" # Name of the bucket to archive revisions into.
# Only used if MODE == 1
#### ####
###############################
import P4
import socket
import platform
import string
import datetime
import sys
import os
p4 = None
server_root = None
revisions = { } # key: <p4_path>#<revision_number>, value: lbr filename with full path
lbr_files = { } # key: lbr filename with full path, value is True if this revision is needed
def initialise():
connect_perforce()
query_server_root()
def connect_perforce():
print("Connecting Perforce...")
global p4
p4 = P4.P4()
p4.port = P4PORT
p4.user = P4USER
p4.password = P4PASSWD
p4.connect()
def query_server_root():
print("Querying server root path...")
server_info = p4.run_info()[0]
server_host = server_info['serverAddress'].split(':')[0]
server_ip = socket.gethostbyname(server_host)
host_name = platform.uname()[1]
host_ip = socket.gethostbyname(host_name)
if server_ip != "127.0.0.1" and server_ip != host_ip:
raise Exception("Run this script on the Perforce server!")
global server_root
server_root = server_info["serverRoot"]
if not server_root.endswith('/') and not server_root.endswith('\\'):
server_root += '/'
def enumerate_files():
print("Processing depots...")
global server_root
global revisions
for depot in p4.run_depots():
if depot['type'] == "local":
print(" " + depot['name'])
lbr_path = depot['map'].rstrip('.')
# if not lbr_path.startswith(server_root):
# lbr_path = server_root + lbr_path
for file in p4.run_fstat("-Oasfc", "//" + depot['name'] + "/..."):
if file.has_key('lbrType') and file['lbrType'].startswith("binary") and file['lbrType'].find("D") < 0 and file.has_key('lbrFile') and not file.has_key("attr-archiveDate"):
revisions[file['depotFile'] + "#" + file['headRev']] = { 'lbr_filename': lbr_path + file['lbrFile'][(len(depot['name']) + 3):] + ",d/" + file['lbrRev'] + ".gz", 'mod_time': string.atoi(file['headTime']) }
print(" " + str(len(revisions.keys())) + " binary file revisions found.")
def collect_needed_lbr_files():
global lbr_files
for (filename, revision) in revisions.items():
lbr_files[revision['lbr_filename']] = False
mark_head_revisions()
mark_labelled_revisions()
mark_recently_modified_files()
mark_small_files()
def mark_head_revisions():
print("Processing head revisions...")
p4_files = { }
for revision in revisions.keys():
tokens = revision.split('#')
p4_filename = tokens[0]
revision_number = int(tokens[1])
if not p4_files.has_key(p4_filename) or revision_number > p4_files[p4_filename]:
p4_files[p4_filename] = revision_number
rev_count = 0
for (p4_filename, revision_number) in p4_files.items():
lbr_files[revisions[p4_filename + "#" + str(revision_number)]['lbr_filename']] = True
rev_count = rev_count + 1
print(" " + str(rev_count) + " revisions marked as head revision.")
def mark_labelled_revisions():
if LABELLED_NEEDED:
print("Processing labels...")
rev_count = 0
labels = p4.run_labels()
for label in labels:
print(" " + label['label'])
labelled_revisions = p4.run_files("//...@" + label['label'])
for labelled_revision in labelled_revisions:
key = labelled_revision['depotFile'] + "#" + labelled_revision['rev']
if revisions.has_key(key) and not lbr_files[revisions[key]['lbr_filename']]:
lbr_files[revisions[key]['lbr_filename']] = True
rev_count = rev_count + 1
print(" Further " + str(rev_count) + " revisions marked as labelled.")
def mark_recently_modified_files():
print("Processing recently modified files...")
now = datetime.datetime.utcnow() - datetime.datetime(1970, 1, 1)
earliest = now.days * 24 * 60 * 60 + now.seconds - AGE_LIMIT
rev_count = 0
for (filename, revision) in revisions.items():
if revision['mod_time'] > earliest:
if not lbr_files[revision['lbr_filename']]:
lbr_files[revision['lbr_filename']] = True
rev_count = rev_count + 1
tokens = filename.split('#')
previous_revision = tokens[0] + "#" + str(int(tokens[1]) - 1)
if revisions.has_key(previous_revision) and not lbr_files[revisions[previous_revision]['lbr_filename']]:
lbr_files[revisions[previous_revision]['lbr_filename']] = True
rev_count = rev_count + 1
print(" Further " + str(rev_count) + " revisions marked as recently modified.")
def mark_small_files():
if SIZE_LIMIT > 0:
print("Skipping small files...")
missing_count = 0
small_count = 0
for (lbr_filename, needed) in lbr_files.items():
if not needed:
if not os.path.exists(lbr_filename):
lbr_files[lbr_filename] = True
missing_count = missing_count + 1
elif os.path.getsize(lbr_filename) < SIZE_LIMIT:
lbr_files[lbr_filename] = True
small_count = small_count + 1
print(" Further " + str(small_count) + " revisions marked as smaller than " + str(SIZE_LIMIT) + " bytes.")
print(" " + str(missing_count) + " librarian files not found.")
def sum_size():
print("Calculating total file size...")
file_count = 0
total_size = 0
for (lbr_filename, needed) in lbr_files.items():
if not needed:
file_count += 1
total_size += os.path.getsize(lbr_filename)
print(" Unneeded " + str(total_size) + " bytes in " + str(file_count) + " files.")
def print_unneeded_revisions():
print("Unneeded revisions:")
for (filename, revision) in revisions.items():
if not lbr_files[revision['lbr_filename']]:
print(" " + filename)
def bucket_unneeded_revisions():
print("Bucketing revisions:")
bucket = p4bucket.P4Bucket(p4bucket.CONFIG_FILE)
for (filename, revision) in revisions.items():
if not lbr_files[revision['lbr_filename']]:
print(" "+ filename)
bucket.run("archive", ["-b", BUCKET, "-s", filename]);
def obliterate_unneeded_revisions():
print("Obliterating revisions:")
for (filename, revision) in revisions.items():
if not lbr_files[revision['lbr_filename']]:
print(" "+ filename)
p4.run_obliterate("-y", filename)
if __name__ == '__main__':
initialise()
enumerate_files()
collect_needed_lbr_files()
sum_size()
if MODE == 0:
print_unneeded_revisions()
elif MODE == 1:
bucket_unneeded_revisions()
elif MODE == 2:
obliterate_unneeded_revisions()
| # | Change | User | Description | Committed | |
|---|---|---|---|---|---|
| #8 | 7699 | Robert Kovacs | More output | ||
| #7 | 7636 | Robert Kovacs | unneeded_binary_revisions: not removing bucketed revisions | ||
| #6 | 7635 | Robert Kovacs | unneeded_binary_revisions: must run on the server machine | ||
| #5 | 7634 | Robert Kovacs | unneeded_binary_revisions: obliterating, p4bucket | ||
| #4 | 7633 | Robert Kovacs |
unneeded_binary_revisions: AGE_LIMIT modified. Keeps latest older revision as well. |
||
| #3 | 7632 | Robert Kovacs | unneeded_binary_revisions: skipping +S and +D files | ||
| #2 | 7631 | Robert Kovacs | unneeded_binary_files: size limit added | ||
| #1 | 7630 | Robert Kovacs | unneeded_binary_revisions: first development version |