pverify #1

  • //
  • guest/
  • michael_mirman/
  • conference2011/
  • pverify
  • View
  • Commits
  • Open Download .zip Download (7 KB)
#!/usr/local/bin/perl

=head1 NAME

pverify

=head1 SYNOPSIS

pverify -p [host:]port [-n] [-head] [-z] [-m M] [-nproc N]
        [-d //path1[,//path2,...]] [-l log] [-maxerr N] [-v]

=head1 DESCRIPTION

Run several verify processes in parallel to increase CPU activity and decrease
the time we keep the database locked.

Options:
 -d path[,...] piece(s) of the repository to verify
               Default: all directories from //* except //tmw, which
               will also be split up in pieces
               Example: -d //bat/...,//private/...
 -head         Verify only the head revision. Convinient to verify
               that the latest revision is present in the archive.
 -l log        If this option is used, the verify output is redirected to
               the specified log file.
 -m N          verify only N revisions of every file
 -p port       Perforce port (required)
 -maxerr N     max number of errors displayed (default: 10)
 -nproc N      max number of processes (default: 30)
 -n            dont run verify - just tell what would happen
 -v            verbose mode
 -z            use the -z option for p4 verify

=head1 EXAMPLES

 pverify -nproc 4 -p 1666 -z -d //sandbox/batscm/...

=cut

use strict;
use warnings;

use Cwd            qw(abs_path);
use File::Basename qw(dirname fileparse);
use Getopt::Long   qw(GetOptions);
use IO::Handle;
use Parallel::ForkManager;
use Pod::Usage     qw(pod2usage);

my ($Myname, $Mydir);
BEGIN {
    ($Myname, $Mydir) = fileparse($0);
    $Myname =~ s{/$}{};
    $Mydir = abs_path($Mydir);
    unshift @INC, dirname($Mydir) . "/lib";
}
use MW::Util::StackStdout ();
use MW::Util::Util ();  # duration()
use MW::P4::FixEnv ();  # fix %ENV

# no buffering
*STDOUT->autoflush();
*STDERR->autoflush();

my $start = time;
my ($dont, $head_only, $log, $max_err, $MAX_PROCESSES, $max_verified_revs);
my (@p4pieces, $port, $verbose, $zopt);
GetOptions(
           'd=s'      => sub { push @p4pieces, split /,/, $_[1] },
           'head'     => \$head_only,
           'help'     => sub { pod2usage(-verbose => 2, -exit => 0) },
           'l=s'      => \$log,
           'm=i'      => \$max_verified_revs,
           'maxerr=i' => \$max_err,
           'n'        => \$dont,
           'nproc=i'  => \$MAX_PROCESSES,
           'p=s'      => \$port,
           'v'        => \$verbose,
           'z'        => \$zopt,
          )
    or die "Error parsing arguments\n";

push @p4pieces, @ARGV;

print "\n     * * * * *\n";

die "Specify -p port\n" unless $port;

$max_err       ||= 10; # max number of errors to display
$MAX_PROCESSES ||= 30; # this provides reasonable but not overwhelming load

open my $DATA, '<', "$Mydir/$Myname.data"
    or die "ERROR: Cannot open $Mydir/$Myname.data: $!\n";
my %expected_time;
while ( <$DATA> ) {
    my ($path, $time) = m{^ (\S+) \s+ (\d+) \s* $ }x
      or next;
    # in case if we have dups in the data file, take the greater value
    $expected_time{$path} = $time
        if (! $expected_time{$path} || $expected_time{$path} < $time);
}
close $DATA;
my $PIECE_TIME_LIMIT = 3000;
my %time4sort;

if ( ! @p4pieces ) {
    my @dirs = `p4 -p '$port' dirs '//*'`;
    chomp(@dirs);
    while ( @dirs ) {
        my $piece = shift @dirs;

# unclear whether this is a good thing or a bad thing
#        if ( $piece =~ m{^//sandbox/(\w+)/} ) {
#            my $user = $1;
#            if ( ! getpwnam($user) ) {
#                msg("Skipping $piece because $user is not a valid user\n");
#                next;
#            }
#        }

        # If the $piece is 0 we must split it; no comparison is needed.
        # If the $piece is defined with a positive number, compare it
        # with $PIECE_TIME_LIMIT.
        # If the $piece is not defined we won't split it.
        if ( $expected_time{$piece}
             && $expected_time{$piece} <= $PIECE_TIME_LIMIT ) {
            $time4sort{"$piece/..."} = $expected_time{$piece};
            push @p4pieces, "$piece/...";
        }
        elsif ( ! defined $expected_time{$piece} ) {
            $time4sort{"$piece/..."} = 5000; # arbitrary number
            push @p4pieces, "$piece/...";
        }
        else { # split it
            push @p4pieces, "$piece/*";
            $time4sort{"$piece/*"} = 1;
            push @dirs, map { chomp;
                              /no such file\(s\)\./ ? () : $_ }
                            `p4 -p '$port' dirs '$piece/\*' 2>&1`;
        }
    }
}

msg("Verifying " . @p4pieces . " parts of the $port repository\n");
if ( $log ) {
    MW::Util::StackStdout::push_stdout($log)
        or die "Failed to redirect output:\n    $@\n";
}

my @err;
# Run a child process and analyze the output from that child process
if ( my $pid = open my $CHILD, '-|' ) { # parent can read from the child
    while ( <$CHILD> ) {
        if ( /\b(MISSING|BAD)\b/
             || /^(RCS no such revision \S+)/
             || m{Done: //.* \(rc=[1-9]} ) {
            push @err, $_;
            s/\n/ ***ERROR***\n/;
        }
        print;
    }
    waitpid $pid, 0;
    if ( $? ) {
        my $msg = "Top child process failed with code $? ***ERROR***\n";
        push @err, $msg;
        print $msg;
    }
}
elsif ( defined $pid ) { # child process will perform verify's
    do_verify();
    exit;
}
else {
    die "Failed to fork the main child process: $!\n";
}

MW::Util::StackStdout::pop_stdout() if $log;
msg("Verification is done. Errors: ". @err . ". Duration: ",
    MW::Util::Util::duration(time - $start), " sec\n",
    (@err
     ? "Search for ***ERROR*** in the log. Here "
     . (@err > $max_err
        ? join('', "are the first 10 errors:\n", @err[0..9])
        : join('', "they are:\n", @err))
     : '')
   );
exit @err;

#
# Actual verify's
#
sub do_verify {
    open STDERR, '>>&STDOUT'
      or warn "$$: Cannot redirect STDERR to STDOUT: $!\n";
    # Sort it the way so the longest appear first
    my @sorted = sort { $time4sort{$b} <=> $time4sort{$a} } @p4pieces;

    my $pm = new Parallel::ForkManager($MAX_PROCESSES);
    foreach ( @sorted ) {
        # Forks and returns the pid for the child:
        my $pid = $pm->start and next;

        my $start = time;
        my $piece = $_ . ($head_only ? '#head' : '');
	$piece = "'$piece'"
	    if $piece =~ /\s/;
        # verify $piece in the child process
        my $cmd = "p4 -p '$port' verify"
          . ($max_verified_revs ? " -m $max_verified_revs" : '')
	    # Note: It's OK to generate missing digests for the meta depot
          . ( # we cannot use -u on replicas. either add logic
              # to see whether $port is a replica or not, or always
              # use -q (which is obviously easier)
              # $piece =~ m{ ^ //meta/ }smx ? ' -qu ' :
              $zopt                     ? ' -qz '
              :                           ' -q ' )
            . $piece;
        msg("Starting: $cmd\n");
        my $rc = $dont ? 0 : system($cmd);
        $rc >>= 8 if $rc>255;
        msg("Done: $piece (rc=$rc; duration: ", time - $start, " sec)\n");
        $pm->finish($rc); # Terminates the child process
    }
    $pm->wait_all_children;
    return;
} # do_verify

sub msg {
    my ($sec,$min,$hour,$mday,$mon,$year) = localtime;
    printf "%4d-%02d-%02d %02d:%02d:%02d ",
           $year+1900, $mon+1, $mday, $hour, $min, $sec;
    print @_;
} # msg
# Change User Description Committed
#1 7899 Michael Mirman example how verify for the whole repository can be split up
and be done in parallel processes