#!/usr/bin/perl -w # # lockers - report transactions with significant time spent holding # (or optionally waiting for) locks # # Dan Bloch - dbloch@google.com # # Copyright (C) 2007 Google Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or # implied. # See the License for the specific language governing permissions and # limitations under the License. # #---------- CONSTANTS ---------- # # The location of today's server log. Used if no log is specified on the # command line or with the -z (tail) flag. # $TODAYS_LOG = "/p4/logs/errorlog"; # # The location of yesterday's server log. Used with the -y flag. # Comment out this line to remove the -y flag if you don't rotate your log. # $YESTERDAYS_LOG = "/p4/logs/old/errorlog.1.gz"; # # The number of lines at the end of the error log to process in -z (tail) # mode. This can be tuned depending on your server's activity and how much # tail information you want to see. # $TAIL_SIZE = 100000; # # The lock time threshold in seconds above which commands will be # reported. Can be overridden with -t flag. # $THRESHOLD = 15; #---------------------------------------------------------------------- $options = "t:yzkKw"; $options =~ s/y// if ! $YESTERDAYS_LOG; ($_options = $options) =~ s/://g; ($prog = $0) =~ s#.*[/\\]##; $usage = "usage: $prog [-$_options] [ ...] -t - lock times greater than seconds only (default=$THRESHOLD) -y use yesterday's errorlog -z use last $TAIL_SIZE lines of today's log -k only processes killed by the server (MaxScanRows, etc.) -K exclude processes killed by administrator (signal 15) -w include processes with wait time but no time holding locks "; $usage =~ s/^.*yesterday.*\n$//m if ! $YESTERDAYS_LOG; die $usage if @ARGV && $ARGV[0] =~ /^(-?help|-\?+)$/i; use Getopt::Std; &getopts($options) || die $usage; $SHOW_KILLED_BY_SERVER = 1, if defined $opt_k; $IGNORE_KILLED_BY_SIGNAL = 1 if defined $opt_K; $PRINT_WAITERS = 1 if defined $opt_w; $THRESHOLD = $opt_t if defined $opt_t; $YESTERDAY = 1 if defined $opt_y; $TAIL = 1 if defined $opt_z; our($opt_k, $opt_K, $opt_w, $opt_y, $opt_z); push(@INC, $1) if ($0 =~ /(.*)\//); $THRESHOLD *= 1000; # seconds -> milliseconds $IGNORE_KILLED_BY_SIGNAL = 1 if $SHOW_KILLED_BY_SERVER; $record = ""; while (log_open()) { while () { # process start # 2005/12/19 10:03:55 pid 32356 foo@bar 10.253.8.73 [p4v] 'user-client if (/^\t\d\d\d\d\/\d\d\/\d\d (\d\d:\d\d:\d\d).* '(user|dm)-/) { printRecord($record) if $print_record; ($record = $_) =~ s/^\t/\n/; undef $print_record; } elsif (/^--- killed /) { $record .= $_; } elsif (/^--- (db.*)/) { # --- db.have $db = $1; } elsif (/^--- *locks wait.held read.write (\d+)ms\+(\d+)ms\/(\d+)ms\+(\d+)ms/) { # --- locks wait+held read/write 0ms+414ms/5665ms+26ms ($readwait, $read, $writewait, $write) = ($1, $2, $3, $4); $wait = $readwait + $writewait; $print_record = 1 if $read >= $THRESHOLD || $write >= $THRESHOLD || ($wait >= $THRESHOLD && $PRINT_WAITERS); $record .= " $db:"; $record .= " read: " . $read/1000 if $read > 1000; # 1 sec $record .= " WRITE: " . $write/1000 if $write > 1000; # $record .= " (wait: " . $wait/1000 . ")" if $wait > 1000; # $record .= "\n"; # TO DO: more info would be better } elsif (/Process (\d+) exited on a signal \d+!/ && ! $IGNORE_KILLED_BY_SIGNAL) { print "\n$_"; } } } printRecord($record) if $print_record; if ($total_locked) { $total_locked = int $total_locked/1000; # milliseconds -> seconds print "\n$nlockers commands holding locks for a total of " . "$total_locked seconds\n"; # read locks can overlap, but it's uncommon } else { $THRESHOLD /= 1000; # milliseconds -> seconds print "[no commands holding locks > $THRESHOLD seconds]\n"; } exit 0; # # printRecord($record) - print record if it qualifies # sub BEGIN { # block for static variables my $last_printed = ""; sub printRecord { my($record) = @_; return if ! $record; # called with empty arg first time return if $record eq $last_printed; # log sometimes repeats info return if $SHOW_KILLED_BY_SERVER && $record !~ /^--- killed/m; print($record), $last_printed = $record, return if $PRINT_WAITERS; # Process may have read/write lock time but really be waiting on # another lock during that time. Try to only display processes # with significant non-wait lock time (heuristic). # look at non-wait time on last lock. Line will be one of # db.rev: read: 17.691 # db.rev: WRITE: 57.365 (wait: 5.054) # db.rev: (wait: 57.2) # return if there isn't any non-wait time on last lock $record =~ /db.\w+: \w+: ([\d.]+).*$/ || return; # or if it's less than a third of the total time (five seconds for # the default fifteen-second threshold). my $last_non_wait = $1; $last_non_wait *= 1000; # seconds -> milliseconds return if $last_non_wait < $THRESHOLD/3; $nlockers++; $total_locked += $last_non_wait; print $record; $last_printed = $record; } } # end block for static variables # # log_open() - figure out log files based on @ARGV. Open on filehandle # LOGFILE and return filename. Each call to log_open opens # next file on list. Last call returns undef. # sub BEGIN { # block for static variables my $first_time = 1; my @argv; sub log_open { if ($first_time) { $first_time = 0; @argv = @ARGV; # Handle request to tail log. Note return from within block. # This is a bit of a hack. The second time through, @argv is # empty so function will just return. if ($TAIL) { die "-z cannot be used if filenames are specified\n" if @ARGV; open(LOGFILE, "tail -$TAIL_SIZE $TODAYS_LOG|") || die "couldn't tail $TODAYS_LOG: $!\n"; print_log_time(); return "tail $TODAYS_LOG"; } # handle request for yesterday's log if ($YESTERDAY) { die "-y cannot be used if filenames are specified\n" if @ARGV; @argv = ($YESTERDAYS_LOG); } if (! @argv) { if (-t) { # not a pipe, use today's log @argv = ($TODAYS_LOG); } else { # pipe *LOGFILE= *STDIN; return "stdin"; } } } else { close LOGFILE; } my $file; if ($file = shift @argv) { open(LOGFILE, $file) || die "couldn't open $file: $!\n"; if ($file =~ /\.gz$/) { # previous open is useful to report errors open(LOGFILE, "zcat $file|") || die "couldn't zcat $file: $!\n"; } } $file; } } # end block for static variables sub print_log_time { my($yymmdd, $hhmmss); while () { ($yymmdd, $hhmmss) = ($1, $2), last if /^\t([\d\/]+) ([\d:]+) /; } die("no date found\n") if ! $yymmdd; print "[starting at: $yymmdd $hhmmss]\n"; }