#!/usr/bin/python # # This is a beta script! # The author takes no responsibility for the results of using this tool. # # Motivation: # If you are short of storage space on your Perforce server because of big binary files # then sooner or later you get to consider using single head revisions. # Two things I don't like about single head revisions: # * When you integrate a file with a single head revision then the Perforce server makes # a physical copy even if the file hasn't changed in the integration. # So it still wastes storage space if you work with branches. # It doesn't happen with normal files (with single head revisions not switched on). # * There is no way to keep particular revisions if you need them (like milestone builds, etc). # Old revisions always get deleted when new revisions are checked in. # You could have a branch for each revision that you need to keep (then those revisions # would be kept because of the behaviour I described in the previous point). # But that would swell the integration metadata (especially if you've got # a massive amount of files) and the depot file hierarchy. # I needed a solution that # * made it possible to keep # * labelled revisions (important revisions are labelled anyway) # * recently checked in modifications and the preceding revision # So that we avoid removing revisions that would potentially be labelled shortly. # * without wasting storage space for unneeded revisions and redundant copies. # # # This script is a more sophisticated alternative for using single head revisions. # Run this script when you want to get rid of unneeded binary revisions, or you just want # to get information on how much storage space is occupied by unneeded revisions at that moment. # See the configuration section for details. # # Robert Kovacs (rkovacs7878@gmail.com) # ############################### #### CONFIGURATION SECTION #### P4PORT = "phoebe:1666" P4USER = "Robert.Kovacs" P4PASSWD = "Password1" # Head revisions are always kept LABELLED_NEEDED = True # If True then no labelled revisions will be removed SIZE_LIMIT = 1000 # No revisions of smaller size will be removed AGE_LIMIT = 7 * 24 * 60 * 60 # As seconds. If greater than 0 then no revisions newer than this age will be removed. The latest one of the older revisions will not be removed either. MODE = 0 # 0 - Report # Unneeded revisions will be listed but not removed # 1 - Bucket (thanks Sven for the great tool!) # Unneeded revisions will be bucketed using p4bucket (//guest/sven_erik_knop/p4bucket/...) # Before you use this option: # * You need to understand p4bucket. Read the documentation! # * Copy p4bucket.py to the same directory with unused_binary_revisions.py # * Initialise p4bucket from there # * Create a bucket for the unneeded binary revisions # * Uncomment the p4bucket importing below # * Set the BUCKET value below # 2 - Obliterate # Unneeded revisions will be obliterated. # BE VERY CAREFUL WITH THIS ONE, THERE IS NO WAY TO UNDO IT! # You need to connect with a user that has permission to obliterate. #import p4bucket # Uncomment this line if you want to use p4bucket # Read MODE == 1 for more information BUCKET = "my_bucket" # Name of the bucket to archive revisions into. # Only used if MODE == 1 #### #### ############################### import P4 import socket import platform import string import datetime import sys import os p4 = None server_root = None revisions = { } # key: <p4_path>#<revision_number>, value: lbr filename with full path lbr_files = { } # key: lbr filename with full path, value is True if this revision is needed def initialise(): connect_perforce() query_server_root() def connect_perforce(): print("Connecting Perforce...") global p4 p4 = P4.P4() p4.port = P4PORT p4.user = P4USER p4.password = P4PASSWD p4.connect() def query_server_root(): print("Querying server root path...") server_info = p4.run_info()[0] server_host = server_info['serverAddress'].split(':')[0] server_ip = socket.gethostbyname(server_host) host_name = platform.uname()[1] host_ip = socket.gethostbyname(host_name) if server_ip != "127.0.0.1" and server_ip != host_ip: raise Exception("Run this script on the Perforce server!") global server_root server_root = server_info["serverRoot"] if not server_root.endswith('/') and not server_root.endswith('\\'): server_root += '/' def enumerate_files(): print("Processing depots...") global server_root global revisions for depot in p4.run_depots(): if depot['type'] == "local": print(" " + depot['name']) lbr_path = depot['map'].rstrip('.') # if not lbr_path.startswith(server_root): # lbr_path = server_root + lbr_path for file in p4.run_fstat("-Oasfc", "//" + depot['name'] + "/..."): if file.has_key('lbrType') and file['lbrType'].startswith("binary") and file['lbrType'].find("D") < 0 and file.has_key('lbrFile') and not file.has_key("attr-archiveDate"): revisions[file['depotFile'] + "#" + file['headRev']] = { 'lbr_filename': lbr_path + file['lbrFile'][(len(depot['name']) + 3):] + ",d/" + file['lbrRev'] + ".gz", 'mod_time': string.atoi(file['headTime']) } print(" " + str(len(revisions.keys())) + " binary file revisions found.") def collect_needed_lbr_files(): global lbr_files for (filename, revision) in revisions.items(): lbr_files[revision['lbr_filename']] = False mark_head_revisions() mark_labelled_revisions() mark_recently_modified_files() mark_small_files() def mark_head_revisions(): print("Processing head revisions...") p4_files = { } for revision in revisions.keys(): tokens = revision.split('#') p4_filename = tokens[0] revision_number = int(tokens[1]) if not p4_files.has_key(p4_filename) or revision_number > p4_files[p4_filename]: p4_files[p4_filename] = revision_number rev_count = 0 for (p4_filename, revision_number) in p4_files.items(): lbr_files[revisions[p4_filename + "#" + str(revision_number)]['lbr_filename']] = True rev_count = rev_count + 1 print(" " + str(rev_count) + " revisions marked as head revision.") def mark_labelled_revisions(): if LABELLED_NEEDED: print("Processing labels...") rev_count = 0 labels = p4.run_labels() for label in labels: print(" " + label['label']) labelled_revisions = p4.run_files("//...@" + label['label']) for labelled_revision in labelled_revisions: key = labelled_revision['depotFile'] + "#" + labelled_revision['rev'] if revisions.has_key(key) and not lbr_files[revisions[key]['lbr_filename']]: lbr_files[revisions[key]['lbr_filename']] = True rev_count = rev_count + 1 print(" Further " + str(rev_count) + " revisions marked as labelled.") def mark_recently_modified_files(): print("Processing recently modified files...") now = datetime.datetime.utcnow() - datetime.datetime(1970, 1, 1) earliest = now.days * 24 * 60 * 60 + now.seconds - AGE_LIMIT rev_count = 0 for (filename, revision) in revisions.items(): if revision['mod_time'] > earliest: if not lbr_files[revision['lbr_filename']]: lbr_files[revision['lbr_filename']] = True rev_count = rev_count + 1 tokens = filename.split('#') previous_revision = tokens[0] + "#" + str(int(tokens[1]) - 1) if revisions.has_key(previous_revision) and not lbr_files[revisions[previous_revision]['lbr_filename']]: lbr_files[revisions[previous_revision]['lbr_filename']] = True rev_count = rev_count + 1 print(" Further " + str(rev_count) + " revisions marked as recently modified.") def mark_small_files(): if SIZE_LIMIT > 0: print("Skipping small files...") missing_count = 0 small_count = 0 for (lbr_filename, needed) in lbr_files.items(): if not needed: if not os.path.exists(lbr_filename): lbr_files[lbr_filename] = True missing_count = missing_count + 1 elif os.path.getsize(lbr_filename) < SIZE_LIMIT: lbr_files[lbr_filename] = True small_count = small_count + 1 print(" Further " + str(small_count) + " revisions marked as smaller than " + str(SIZE_LIMIT) + " bytes.") print(" " + str(missing_count) + " librarian files not found.") def sum_size(): print("Calculating total file size...") file_count = 0 total_size = 0 for (lbr_filename, needed) in lbr_files.items(): if not needed: file_count += 1 total_size += os.path.getsize(lbr_filename) print(" Unneeded " + str(total_size) + " bytes in " + str(file_count) + " files.") def print_unneeded_revisions(): print("Unneeded revisions:") for (filename, revision) in revisions.items(): if not lbr_files[revision['lbr_filename']]: print(" " + filename) def bucket_unneeded_revisions(): print("Bucketing revisions:") bucket = p4bucket.P4Bucket(p4bucket.CONFIG_FILE) for (filename, revision) in revisions.items(): if not lbr_files[revision['lbr_filename']]: print(" "+ filename) bucket.run("archive", ["-b", BUCKET, "-s", filename]); def obliterate_unneeded_revisions(): print("Obliterating revisions:") for (filename, revision) in revisions.items(): if not lbr_files[revision['lbr_filename']]: print(" "+ filename) p4.run_obliterate("-y", filename) if __name__ == '__main__': initialise() enumerate_files() collect_needed_lbr_files() sum_size() if MODE == 0: print_unneeded_revisions() elif MODE == 1: bucket_unneeded_revisions() elif MODE == 2: obliterate_unneeded_revisions()
# | Change | User | Description | Committed | |
---|---|---|---|---|---|
#8 | 7699 | Robert Kovacs | More output | ||
#7 | 7636 | Robert Kovacs | unneeded_binary_revisions: not removing bucketed revisions | ||
#6 | 7635 | Robert Kovacs | unneeded_binary_revisions: must run on the server machine | ||
#5 | 7634 | Robert Kovacs | unneeded_binary_revisions: obliterating, p4bucket | ||
#4 | 7633 | Robert Kovacs |
unneeded_binary_revisions: AGE_LIMIT modified. Keeps latest older revision as well. |
||
#3 | 7632 | Robert Kovacs | unneeded_binary_revisions: skipping +S and +D files | ||
#2 | 7631 | Robert Kovacs | unneeded_binary_files: size limit added | ||
#1 | 7630 | Robert Kovacs | unneeded_binary_revisions: first development version |