#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# ==============================================================================
# Copyright and license info is available in the LICENSE file included with
# the Server Deployment Package (SDP), and also available online:
# https://swarm.workshop.perforce.com/projects/perforce-software-sdp/view/main/LICENSE
# ------------------------------------------------------------------------------
"""
NAME:
depot_verify_chunks.py
DESCRIPTION:
This script prints out a list of directories to verify that don't exceed a specific
size limit, e.g. 100M or 1G.
It is intended as an optional input to p4verify.py script or to be run using GNU parallel.
It walks down the tree of specified paths using the p4 dirs/p4 sizes -az commands to identify
appropriate sub-trees.
The output is a list of depot paths:
//depot/...
//big_depot/subdir1/...
//big_depot/subdir2/...
//big_depot/subdir3/...
//big_depot/*
You can run:
depot_verify_chunks.py -m 1G //some/path/... //another_depot/...
With parallel (see https://www.gnu.org/software/parallel/):
depot_verify_chunks.py -v ERROR -m 1G //some/path/... | parallel p4 verify -qzt {} >> verify.out
nohup depot_verify_chunks.py -v ERROR -m 1G //some/path/... | parallel p4 verify -qzt {} >> verify.out 2>&1 &
"""
# Python 2.7/3.3 compatibility.
from __future__ import print_function
import sys
import os
import textwrap
import argparse
import logging
import P4
import re
from collections import OrderedDict
script_name = os.path.basename(os.path.splitext(__file__)[0])
LOGDIR = os.getenv('LOGS', '/p4/1/logs')
DEFAULT_LOG_FILE = "log-%s.log" % script_name
if os.path.exists(LOGDIR):
DEFAULT_LOG_FILE = os.path.join(LOGDIR, "%s.log" % script_name)
DEFAULT_VERBOSITY = 'DEBUG'
LOGGER_NAME = 'verify_dir_list'
def parse_human_fmt(opt):
"Parses 1, 1K, 2M, 3.1G etc and returns int"
units = "kmgt"
opt = opt.lower()
m = re.match(r"^([0-9]*\.{0,1}[0-9]*)([kmgt]*)$", opt)
if not m:
raise TypeError("Argument must be a human readable size number, e.g. 1, 1K, 2M, 3.1G")
num = float(m.group(1))
unit = m.group(2)
if unit and unit in units:
multiplier = 1.0
for u in ['k','m','g','t']:
multiplier *= 1024.0
if u == unit:
break
num *= multiplier
return int(num)
class depot_verify_chunks(object):
"""See module doc string for details"""
def __init__(self, *args, **kwargs):
self.parse_args(__doc__, args)
def parse_args(self, doc, args):
"""Common parsing and setting up of args"""
desc = textwrap.dedent(doc)
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
description=desc,
epilog="Copyright (c) 2008-2018 Perforce Software, Inc."
)
self.add_parse_args(parser)
self.options = parser.parse_args(args=args)
self.init_logger()
self.logger.debug("Command Line Options: %s\n" % self.options)
self.options.max_size = parse_human_fmt(self.options.max_size) # May raise error
def add_parse_args(self, parser, default_log_file=None, default_verbosity=None):
"""Default trigger arguments - common to all triggers
:param default_verbosity:
:param default_log_file:
:param parser:
"""
if not default_log_file:
default_log_file = DEFAULT_LOG_FILE
if not default_verbosity:
default_verbosity = DEFAULT_VERBOSITY
parser.add_argument('-p', '--port', default=None,
help="Perforce server port. Default: $P4PORT")
parser.add_argument('-u', '--user', default=None, help="Perforce user. Default: $P4USER")
parser.add_argument('-L', '--log', default=default_log_file, help="Default: " + default_log_file)
parser.add_argument('-m', '--max-size', help="Max size of each depot chunk. Can be specified in K/M/G/T, e.g. 1G")
parser.add_argument('path', nargs='*', help="Perforce depot path(s)")
parser.add_argument('-v', '--verbosity',
nargs='?',
const="INFO",
default=default_verbosity,
choices=('DEBUG', 'WARNING', 'INFO', 'ERROR', 'FATAL'),
help="Output verbosity level. Default is: " + default_verbosity)
def init_logger(self, logger_name=None):
if not logger_name:
logger_name = LOGGER_NAME
self.logger = logging.getLogger(logger_name)
self.logger.setLevel(self.options.verbosity)
logformat = '%(levelname)s %(asctime)s %(filename)s %(lineno)d: %(message)s'
logging.basicConfig(format=logformat, filename=self.options.log, level=self.options.verbosity)
formatter = logging.Formatter('%(message)s')
ch = logging.StreamHandler(sys.stderr)
ch.setLevel(logging.INFO)
ch.setFormatter(formatter)
self.logger.addHandler(ch)
def get_chunks(self, results, startdir, max_size):
if startdir.endswith("/..."):
startdir = startdir[:-4]
sizes = self.p4.run_sizes("-az", "%s/..." % startdir)
if int(sizes[0]['fileSize']) <= max_size:
results["%s/..." % startdir] = 1
return
for d in self.p4.run_dirs('-D', "%s/*" % startdir):
self.get_chunks(results, d['dir'], max_size)
results["%s/*" % startdir] = 1
results["%s/*" % startdir] = 1
def run(self):
"""Runs script"""
self.p4 = P4.P4()
self.p4.logger = self.logger
if self.options.port:
self.p4.port = self.options.port
if self.options.user:
self.p4.user = self.options.user
self.p4.connect()
results = OrderedDict()
for path in self.options.path:
self.get_chunks(results, path, int(self.options.max_size))
print("\n".join([k for k in results.keys()]))
return [k for k in results.keys()]
if __name__ == '__main__':
""" Main Program"""
obj = depot_verify_chunks(*sys.argv[1:])
obj.run()
| # | Change | User | Description | Committed | |
|---|---|---|---|---|---|
| #7 | 32478 | bot_Claude_Anthropic |
Fix depot_verify_chunks.py failing to subdivide when p4 dirs -D returns parent path Validate p4 dirs -D results to only accept genuine one-level-deeper children. For some depot structures, p4 dirs -D PATH/* returns the parent path itself rather than subdirectories, causing the visited guard to absorb all results and fall back to a single oversized leaf chunk. When p4 dirs -D yields no valid children, retry without -D to obtain the real subdirectories. Fixes SDP-1344. #review-32479 @robert_cowham @tom_tyler |
||
| #6 | 32476 | bot_Claude_Anthropic |
Fix infinite loop in depot_verify_chunks.py get_chunks Add visited set guard to iterative stack-based traversal to prevent infinite loops when p4 dirs returns paths that resolve back to the current directory (e.g. paths with trailing /... that strip to parent). Also handle edge case where no new subdirectories can be found for a too-large directory by falling back to including it as a leaf chunk. Fixes SDP-1344. #review-32477 @robert_cowham @tom_tyler |
||
| #5 | 32474 | bot_Claude_Anthropic |
Fix recursion depth issue in depot_verify_chunks.py Convert get_chunks method from recursive to iterative using stack-based approach to handle arbitrarily deep directory trees without hitting Python recursion limits. Fixes SDP-1344. #review-32475 @robert_cowham @tom_tyler |
||
| #4 | 32092 | C. Thomas Tyler | Fixed an issue generating a sytnax warning. | ||
| #3 | 29244 | C. Thomas Tyler |
Adjusted in-script comments to use '-v ERROR' option to avoid seeing the 'p4 sizes' commands. #review @robert_cowham |
||
| #2 | 26946 | C. Thomas Tyler | chmod +x depot_verify_chunks.py | ||
| #1 | 26932 | C. Thomas Tyler |
Repurposed the /p4/common/site directory. This directory will exist on a a fresh SDP install, but will be empty save for a ReadMe.txt file explaining that it is to be used to make local extensions to the SDP, and that anything in here is not supported. The coming automated SDP upgrade procedure will know to ignore /p4/common/site directory tree. The p4_vars ensures that /p4/common/site/bin is in the PATH. |
||
| //guest/perforce_software/sdp/dev/Server/Unix/p4/common/site/bin/depot_verify_chunks.py | |||||
| #2 | 25068 | Robert Cowham | Mention parallel | ||
| #1 | 25067 | Robert Cowham | Utility to analyse a depot in chunks | ||