#!/usr/bin/perl -w # # locks - shows p4 process locks # # notes: # - must run on Perforce server machine # - requires user to enter password for "sudo" unless run as same user # as Perforce server. # - unless you run with the -d for demo flag, there must be a Perforce # server running on port 1999 for "p4 monitor show" # # Dan Bloch - dbloch@google.com # # Copyright (C) 2006 Google Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or # implied. # See the License for the specific language governing permissions and # limitations under the License. # #---------- CONSTANTS ---------- # # The Unix user account under which p4d is being run. lsof must be run # either as this user or as root in order to see files opened by p4d. # This script will attempt to use sudo to run the command if it is run # by another user. # $UNIX_USER = "p4"; # # The P4PORT value for which the "p4 monitor show" command will be # run against. Since p4 monitor show may hang if the server is hanging, # it is strongly recommended that a server be run just for this purpose, # on the same machine but a different port than the server being # monitored; with its db.monitor a symlink to the main server's. # $P4_MONITOR_SHOW_PORT = "localhost:1999"; # # Locations of executables # $LSOF = "/usr/sbin/lsof"; # this is the standard Linux location # $P4 = "p4"; # look for p4 on $PATH. Can specify absolute path. #---------------------------------------------------------------------- $options = "dvk"; ($prog = $0) =~ s#.*[/\\]##; $usage = "usage: $prog [-$options] -d demo mode--doesn't require separate server running -v verbose -k kill long-running \"p4 integrated\" and \"p4 changes -i\" processes "; die $usage if @ARGV && $ARGV[0] =~ /^(-?help|-\?+)$/i; use Getopt::Std; &getopts($options) || die $usage; $DEMO_MODE = 1 if defined $opt_d; $VERBOSE = 1 if defined $opt_v; $KILL = 1 if defined $opt_k; our($opt_d, $opt_v, $opt_k); die "must be user \"$UNIX_USER\" to use -k flag\n" if $opt_k && $ENV{USER} ne $UNIX_USER; #---------------------------------------------------------------------- # find locks #---------------------------------------------------------------------- # notes: # - -bw skips calls which could block and doesn't report warnings. Otherwise, # lsof can hang on unavailable remote-mounted volumes. $command = "$LSOF -bw -c p4d"; $command = "sudo $command" if $ENV{USER} ne $UNIX_USER; date(), print "running $command\n" if $VERBOSE; @lsof=`$command`; date(), print "\n"; # whether verbose or not # COMMAND PID USER FD TYPE DEVICE SIZE NODE NAME # p4d 18974 p4 9uR REG 8,1 82403328 4767758 /p4/root/db.domain shift @lsof; for (@lsof) { my($command, $pid, $user, $fd, $type, $device, $size, $node, $file) = split(" "); $file =~ s#/.*/##; next if $file !~ /^db\.\w+$/; # fd always says "u", meaning open for read/write access. # It has a lock if there's a capital letter (R or W) following. ($lock = $fd) =~ s/\d+u//; # R or W if ($lock) { $pids{$pid}++; if ($lock eq "R") { $read_locks{$pid} .= ", $file"; } elsif ($lock eq "W") { $write_locks{$pid} .= ", $file"; } else { print "unexpected lock: $pid: $lock: $file\n"; } } } #---------------------------------------------------------------------- # correlate with p4 processes and print #---------------------------------------------------------------------- if (%pids) { # get list of p4 processes, sorted from oldest to most recent $port_spec = "-p $P4_MONITOR_SHOW_PORT"; $user_spec = "-u p4"; # set in setup.sh $user_spec = $port_spec = "" if $DEMO_MODE; $command = "$P4 $port_spec $user_spec monitor show -l | sort -rb +3"; date(), print "running $command\n" if $VERBOSE; # There's a p4d bug (call #1385833) which causes p4d to take a lock # on db.monitor and spin, hanging Perforce forever and also hanging # the port 1999 server. # Code to run a command with a timeout is from # http://www.perl.com/doc/manual/html/pod/perlfunc/alarm.html eval { local $SIG{ALRM} = sub { die "alarm\n"; }; alarm 60; # 60 seconds @processes = `$command`; # run command alarm 0; }; if ($@) { die "unexpected: $@" if $@ ne "alarm\n"; $P4_MONITOR_SHOW_TIMED_OUT = 1; } if (!@processes && !$P4_MONITOR_SHOW_TIMED_OUT) { # p4 command must have failed, since we should always see ourselves. # Error will have been printed out to standard error, so just exit. print STDERR "Try '$prog -d' for a mode which doesn't require server support.\n" unless $DEMO_MODE; exit 1; } # first pass: remove all but first fstat, kill selected commands if -k for (@processes) { # 15165 R pratikm 08:11:17 resolve //depot/google3/... my($pid) = /^\s*(\d+)/; next if ! $read_locks{$pid} && ! $write_locks{$pid}; # ------------------------------------------------------------ # remove all but first fstat # ------------------------------------------------------------ if (/ fstat /) { if ($fstat_pid) { # if we already have one # XXX: saw use of uninitialized value in string eq at line 112. if ($read_locks{$pid} eq $fstat_locks) { # 112 delete $read_locks{$pid}; $nskipped++; next; } else { # (unlikely) It's an fstat with with _different_ locks, # in which case fall through and print it. } } else { # first fstat we've seen $fstat_pid = $pid; $fstat_locks = $read_locks{$pid}; s/ fstat .*/ fstat [args]/; } } # ------------------------------------------------------------ # kill "p4 integrated", "p4 changes -i", etc. # ------------------------------------------------------------ # Obviously, killing users' commands is serious thing to do. # We are doing this because these commands have caused 5-10 # minutes of server downtime on repeated occasions, and we # aren't always around and don't react quickly enough to kill # the commands (which is what we would do in any case). # # The script kills these commands under the following conditions: # - script has been run with the -k flag # - script was run by "p4" user (required to use -k) # - the command is holding locks # - the locks are read locks only, no write locks # - the command has been running for more than a minute # # When we kill a command, the output seen by the user will be # Perforce client error: # Partner exited unexpectedly. # ------------------------------------------------------------ if ($KILL && ! $write_locks{$pid}) { # to do: change this to a commented regexp if ( # "p4 integrated", "p4 changes -i", "filelog ..." /[^0]:\d\d (integrated|changes( -.*)? -i) / # accidental space between ... and @, hangs 10+ minutes || /[^0]:\d\d integrate .*\.\.\. \@/ ) { kill TERM, $pid; # SIGTERM s/$/ - KILLED/; # label for output } } push(@lock_holders, $_); } if ($P4_MONITOR_SHOW_TIMED_OUT) { print "P4 MONITOR SHOW TIMED OUT\n"; print "Server is probably hung by a lock on db.monitor\n"; print "Run $prog in non-demo mode with support server to avoid this\n" if $DEMO_MODE; print "\n"; @processes = sort keys %write_locks; } # second pass: print locks for (@processes) { # 15165 R pratikm 08:11:17 resolve //depot/google3/... my($pid) = /^\s*(\d+)/; # print locks for processes that are still active only next if ! $read_locks{$pid} && ! $write_locks{$pid}; printf("%5d: ", $pid); my $has_read_locks = 0; if ($read_locks{$pid}) { print_locks($read_locks{$pid}, "read"); $has_read_locks = 1; } if ($write_locks{$pid}) { print " " if $has_read_locks; print_locks($write_locks{$pid}, "WRITE"); } if ($nskipped && $pid == $fstat_pid) { print " [$nskipped additional fstats omitted]\n"; } } # now print processes if (@lock_holders) { for (@lock_holders) { s/ R /: / } # remove unhelpful "R" print "\n", @lock_holders; } else { print "processes not found--maybe they exited\n" unless $P4_MONITOR_SHOW_TIMED_OUT; } } else { print("no locks found\n"); } # # print_locks($locks, $tag) # $locks is a comma-separated list of files, with a leading ", " # $tag is typically "read" or "WRITE" # sub print_locks { my($locks, $tag) = @_; $locks =~ s/^, //; my $s = ($locks =~ /,/) ? "s" : ""; print "$tag lock$s: $locks\n"; } sub date { system("date"); }