#!/usr/bin/env python3 ################################################################################ # # Copyright (c) 2017, Perforce Software, Inc. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL PERFORCE SOFTWARE, INC. BE LIABLE FOR ANY # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # = Date # # $Date: 2017/07/28 $ # # = Description # # Collect information about workspaces, task streams, labels and output them # in csv format: # Client,name,owner,last_update_date,last_access_date,nb_synced_files,build? # Task,name,owner,last_update_date,last_access_date,nb_of_tmp_files # Label,name,owner,last_update_date,last_access_date,nb_of_tagged_files # # The script will create a journal patch (journal.patch.gz) to remove db.have # records for non existent workspace spec and db.label records for non # existent label spec. # # = Usage # # oldClientsTasksLabels.py a_checkpoint|a_checkpoint.gz # ################################################################################ from __future__ import print_function import re import time import sys import gzip import mimetypes import os import math def cmdUsage(): sys.exit("Usage: oldClientsTasksLabels.py a_checkpoint|a_checkpoint.gz") def main(): if len(sys.argv) < 2: cmdUsage() clients = {} labels = {} streams = {} filename = sys.argv[1] if mimetypes.guess_type(filename)[1] == 'gzip': ckp = gzip.open(filename, "rb") else: ckp = open(filename, "rb") clientSpecRE = re.compile(b'@pv@ [0-9]* @db.domain@ @(.*?)@ 99 @.*?@ @.*?@ @.*?@ @.*?@ @(.*?)@ ([0-9]*) ([0-9]*) [0-9]* @(.*)') haveRE = re.compile(b'@pv@ [0-9]* @db.have@ @//(.*?)/.*') haverpRE = re.compile(b'@pv@ [0-9]* @db.have.rp@ @//(.*?)/.*') streamRE = re.compile(b'@pv@ [0-9]* @db.stream@ @(//.*?/.*?)@ @.*@ @.*@ ([0-9]) ') streamSpecRE = re.compile(b'@pv@ [0-9]* @db.domain@ @(//.*?/.*?)@ 115 @@ @@ @@ @@ @(.*?)@ ([0-9]*) ([0-9]*) ') taskRE = re.compile(b'@pv@ [0-9]* @db.revtx@ @(//.*?/.*?)/.*?@') labelSpecRE = re.compile(b'@pv@ [0-9]* @db.domain@ @(.*?)@ 108 @@ @@ @@ @@ @(.*?)@ ([0-9]*) ([0-9]*) ') labelRE = re.compile(b'@pv@ [0-9]* @db.label@ @(.*?)@.*') stopRE = re.compile(b'.*@db.revcx@.*') print("Processing, it may take several hours...\n", end='', file=sys.stderr) sys.stderr.flush() jnlPatch = None for line in ckp: match = clientSpecRE.search(line) if match: client = match.group(1) if client not in clients: clients[client] = {} clients[client]["have"] = 0 clients[client]["db.have.rp"] = "False" clients[client]["owner"] = match.group(2) clients[client]["update"] = time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(float(match.group(3)))) clients[client]["access"] = time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(float(match.group(4)))) else: match = haveRE.search(line) if match: client = match.group(1) if client not in clients: clients[client] = {} clients[client]["have"] = 0 clients[client]["db.have.rp"] = "False" clients[client]["owner"] = b"unknown" clients[client]["update"] = time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(0.0)) clients[client]["access"] = time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(0.0)) if clients[client]["owner"] == b"unknown": if not jnlPatch: jnlPatch = gzip.open("jnl.patch.gz", "wb") jnlPatch.write(line.replace(b"@pv@", b"@dv@")) clients[client]["have"] += 1 match = haverpRE.search(line) if match: client = match.group(1) if client not in clients: clients[client] = {} clients[client]["have"] = 0 clients[client]["owner"] = b"unknown" clients[client]["update"] = time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(0.0)) clients[client]["access"] = time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(0.0)) clients[client]["db.have.rp"] = "True" if clients[client]["owner"] == b"unknown": if not jnlPatch: jnlPatch = gzip.open("jnl.patch.gz", "wb") jnlPatch.write(line.replace(b"@pv@", b"@dv@")) clients[client]["have"] += 1 else: match = streamRE.search(line) if match: stream = match.group(1) if stream not in streams: streams[stream] = {} streams[stream]["rev"] = 0 streams[stream]["type"] = match.group(2) else: match = streamSpecRE.search(line) if match: stream = match.group(1) if stream not in streams: streams[stream] = {} streams[stream]["rev"] = 0 streams[stream]["owner"] = match.group(2) streams[stream]["update"] = time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(float(match.group(3)))) streams[stream]["access"] = time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(float(match.group(4)))) else: match = taskRE.search(line) if match: stream = match.group(1) if stream not in streams: streams[stream] = {} streams[stream]["rev"] = 0 streams[stream]["owner"] = b"unknown" streams[stream]["update"] = time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(0.0)) streams[stream]["access"] = time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(0.0)) streams[stream]["rev"] += 1 else: match = labelSpecRE.search(line) if match: label = match.group(1) if label not in labels: labels[label] = {} labels[label]["tag"] = 0 labels[label]["owner"] = match.group(2) labels[label]["update"] = time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(float(match.group(3)))) labels[label]["access"] = time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(float(match.group(4)))) else: match = labelRE.search(line) if match: label = match.group(1) if label not in labels: labels[label] = {} labels[label]["tag"] = 0 labels[label]["owner"] = b"unknown" labels[label]["update"] = time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(0.0)) labels[label]["access"] = time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(0.0)) if labels[label]["owner"] == b"unknown": if not jnlPatch: jnlPatch = gzip.open("jnl.patch.gz", "wb") jnlPatch.write(line.replace(b"@pv@", b"@dv@")) labels[label]["tag"] += 1 else: if stopRE.search(line): # stop, no need to look further break ckp.close() if jnlPatch: jnlPatch.close() # In order to output bytes to stdout, sys.stdout.buffer must be used in Python 3 # but this is not compatible with Python 2, so to make it compatible: output = getattr(sys.stdout, 'buffer', sys.stdout) for (key, value) in clients.items(): output.write(b"Client," + key + b"," + value["owner"] + b",") sys.stdout.write("," + value["update"] + "," + value["access"] + "," + str(value["have"]) + "," + value["db.have.rp"] + "\n") sys.stdout.flush() for (key, value) in streams.items(): if "type" in value and value["type"] == b"4": output.write(b"Task," + key + b"," + value["owner"]) sys.stdout.write("," + value["update"] + "," + value["access"] + "," + str(value["rev"]) + "\n") sys.stdout.flush() for (key, value) in labels.items(): output.write(b"Label," + key + b"," + value["owner"]) sys.stdout.write("," + value["update"] + "," + value["access"] + "," + str(value["tag"]) + "\n") sys.stdout.flush() if __name__ == '__main__': main()