p4clog.py #2

#!/usr/bin/python

# p4clog.py [ options ] < p4d-log-file > tables.csv
#
# $Id: //guest/perforce_software/utils/log_analyzer/p4clog.py#2 $
#
# -s - submit only
# -c - show locking by command
# -d - show delay analysis
# -h - show lock held histogram
# -i - interval for histogram instead of 600
# -w - show wait vs held analysis
# -v - log page I/Os, wait/hold more than 100ms, delays
#

import sys, re, time, getopt, math

# logParse - parse a line of p4d log tracking

class logParse:

    def __init__( p ):

	# what we're parsing:


	# 2013/01/17 22:01:11 pid 28322 u@c 10.0.50.39/10.0.0.40 [p4tter/v72 (brokered)] 'command args'
	# --- db.rev
	# ---   pages in+out+cached 222520+0+96
	# ---   locks read/write 1/0 rows get+pos+scan put+del 0+1+7372595 0+0
	# ---   total lock wait+held read/write 0ms+9578ms/0ms+0ms
	# --- db.change
	# ---   total lock wait+held read/write 0ms+9583ms/0ms+0ms

	p.pa = re.compile( "^\t(..../../.. ..:..:..) pid [^']*'([^ ']*)([^']*)" )
	p.pb = re.compile( "^--- (db.*)$" )
	p.pc = re.compile( "^---   pages in\+out\+cached ([^+]*)\+([^+]*)\+(.*)$" )
	p.pd = re.compile( "^---   locks .* put\+del ([^+]*)\+([^+]*)\+([^ ]*) ([^+]*)\+(.*)$" )
	p.pe = re.compile( "^---   total .* read/write ([^m]*)ms\+([^m]*)ms/([^m]*)ms\+([^m]*)ms$" )
	p.pf = re.compile( "^$" )

	p.reset()

    def reset( p ):

	p.date = p.command = p.args = p.table = ""

    # parse a single line of log

    def parse( p, line ):

	p.lineType = "none"

	ll = len( line )

	if( line[0] == "\n" ):
	    p.lineType = "end"
	    return

	if( ll > 5 and line[5] == "/" ):
	    m = p.pa.search( line )
	    if( m ): 
		p.date,p.command,p.args = m.group(1,2,3)
		p.lineType = "command"
	    return

	if( ll > 4 and line[4] == "d" ):
	    m = p.pb.search( line )
	    if( m ): p.table = m.group(1)
	    return

	if( ll > 6 and line[6] == "t" ):
	    m = p.pe.search( line )
	    if( m ): 
		p.lrw,p.lrh,p.lww,p.lwh = map(int,m.group(1,2,3,4))
		p.lineType = "locks"
	    return

	if( ll > 6 and line[6] == "p" ):
	    m = p.pc.search( line )
	    if( m ): 
		p.pgi,p.pgo,p.pgc = map(int,m.group(1,2,3))
		p.lineType = "pages"
		p.isPages = True
	    return

	return

	# don't use these yet

	if( ll > 6 and line[6] == "l" ):
	    m = p.pd.search( line )
	    if( m ): rgt,rpo,rsc,rpt,rdl = map(int,m.group(1,2,3,4,5))
	    return

# tableCounts - read/write wait/held time counter

class tableCounts:

    def __init__( s, lrw, lrh, lww, lwh ):
	s.readWaitCount = lrw > 0; s.readWaitTime = lrw
	s.readHeldCount = lrh > 0; s.readHeldTime = lrh
	s.writeWaitCount = lww > 0; s.writeWaitTime = lww
	s.writeHeldCount = lwh > 0; s.writeHeldTime = lwh

    def __iadd__( s, o ):

	s.readHeldCount += o.readHeldCount
	s.readHeldTime += o.readHeldTime
	s.readWaitCount += o.readWaitCount
	s.readWaitTime += o.readWaitTime

	s.writeHeldCount += o.writeHeldCount
	s.writeHeldTime += o.writeHeldTime
	s.writeWaitCount += o.writeWaitCount
	s.writeWaitTime += o.writeWaitTime

	return s

    def __ior__( s, o ):

	# add waits, but max of held

	s.readWaitTime += o.readWaitTime
	s.writeWaitTime += o.writeWaitTime

	if( o.readHeldTime > s.readHeldTime ):
	    s.readHeldTime = o.readHeldTime
	if( o.writeHeldTime > s.writeHeldTime ):
	    s.writeHeldTime = o.writeHeldTime

	return s

# Spanner - track how long locks are held for one db file

class Spanner:

    # per interval result

    class Result:

	def __init__( r, time, secsHeld, secsFree, longestHeld ):

	    r.time = time
	    r.secsHeld = secsHeld
	    r.secsFree = secsFree
	    r.longestHeld = longestHeld

    # when the log file begins

    epoch = 0

    def __init__( s ):

	# number of locks held in each second 
	# since log file begins (gets long!)

	s.byTheSeconds = []
	s.byTheInterval = []

    # translate log file date into seconds since epoch

    def secs( s, date ):

	try:
	    t = int( time.mktime( time.strptime( date, "%Y/%m/%d %H:%M:%S" ) ) )
	    if( Spanner.epoch == 0 ): Spanner.epoch = t
	    return t - Spanner.epoch

	except ValueError:
	    print >> sys.stderr, "bad date %s\n" % date,
	    return 0

    # process a single lock hold

    def span( s, start, held ):

	# translate start/held to start/end in second since epoch

	heldStart = s.secs( start )
	heldEnd = heldStart + ( held + 500 ) / 1000

	# sometime the log file is out of order

	if( heldStart < 0 or heldStart == heldEnd ):
	    return

	# dynamically extend array

	if( len( s.byTheSeconds ) <= heldEnd ):
	    s.byTheSeconds.extend( [ 0 ] * ( heldEnd + 1 - len( s.byTheSeconds ) ) )

	# count this as an instance this lock was held this second

	for x in range( heldStart, heldEnd ):
	    s.byTheSeconds[ x ] += 1

    # condense into intervals

    def condense( s, interval ):

	lastFree = -1
	longestHeld = 0

	# for each 'interval' seconds
	# find the time held/free/longest
	# and dump out a single line

	for x in range( 0, len( s.byTheSeconds ), interval ):

	    secsHeld = 0
	    secsFree = interval

	    for y in range( x, min( x + interval, len( s.byTheSeconds ) ) ):

		sex = s.byTheSeconds[ y ]

		secsHeld += sex
		secsFree -= sex == 0

		if( sex == 0 ):
		    lastFree = y
		elif( y - lastFree > longestHeld ):
		    longestHeld = y - lastFree

	    s.byTheInterval.append( s.Result( Spanner.epoch + x, secsHeld, secsFree, longestHeld ) )

	    longestHeld = 0

    # dump one table's locks

    def dump( s, t, interval ):

	s.condense( interval )

	if( len( s.byTheInterval ) == 0 ): return

	print ">>> %s saturation each %d seconds" % ( t, interval )
	print "%s,%s,%s,%s" % ( "time", "not free", "held", "longest held" )

	# for each 'interval' seconds
	# find the time held/free/longest
	# and dump out a single line

	for x in s.byTheInterval:

	    print "%s,%d,%d,%d" % ( 
		time.ctime( x.time ),
		x.secsFree,
		x.secsHeld,
		x.longestHeld )

# LocksByTime: one spanner for each table

class LocksByTime:

    def __init__( s ):

	s.tableSpanner = {}

    def tally( s, commandTables, date ):

	for t in commandTables:

	    if( not t in s.tableSpanner ):
		s.tableSpanner[ t ] = Spanner()

	    s.tableSpanner[ t ].span( date,
		    commandTables[ t ].writeHeldTime +
		    commandTables[ t ].readHeldTime )

    def dump( s, spannerInterval ):

	for t in s.tableSpanner:
	    s.tableSpanner[ t ].dump( t, spannerInterval )

#
# WaitVsHeld -- calculate how long locks are held waiting for others
#
#	.tally() - given lock+held times for all tables used by a command's
#		processing, calculate which tables are holding up others
#
#	.dump() - dump out all wait vs held times for tables held more than
#		1 seconds total
#

class WaitVsHeld:

    def __init__( s ):

	s.table = {}

    def tally( s, commandTables, showWaitVsHeld, count, command ):

	for t in commandTables:

	    # wait vs held

	    held = commandTables[ t ].writeHeldTime

	    if( held == 0 ): continue

	    for t2 in commandTables:

		wait = commandTables[ t2 ].writeWaitTime

		if( t == t2 or wait > held or wait <= held / 2 ):
		    continue

		if( not t in s.table ):
		    s.table[ t ] = {}
		if( not t2 in s.table[ t ] ):
		    s.table[ t ][ t2 ] = tableCounts( 0, 0, 0, 0 )

		s.table[ t ][ t2 ] += commandTables[ t2 ]

		# for interest sake

		if( showWaitVsHeld ):
		    print count, "held", t, held, "wait", t2, wait, command

    def dump( s, totalTables ):

	print ">>> Wait times causing hold times"

	print "%s,%s,%s,%s,%s,%s" % (
        	"table", 
                "write-wait-Time", 
                "read-wait-Time", 
		"table",
                "write-held-Time",
		"read-held-Time" )

	for t in s.table:
	    tc = totalTables[ t ]
	    for t2 in s.table[ t ]:
		tg = s.table[ t ][ t2 ]

		if( tg.writeWaitTime > 1000 and tc.writeHeldTime > 1000 or
		    tg.readWaitTime > 1000 and tc.readHeldTime > 1000 ):
		    print "%s,%d,%d,%s,%d,%d" % (
			    t2, 
			    tg.writeWaitTime / 1000,
			    tg.readWaitTime / 1000,
			    t, 
			    tc.writeHeldTime / 1000,
			    tc.readHeldTime / 1000 )

# 
# TimesByCommand -- calculate read+write hold time for each command
#
#	.tally() - add a single read+write hold to a command's total
#	.dump() - dump out each command's total
#

class TimesByCommand:

    def __init__( s ):

	s.byCommand = {}

    def tally( s, table, command, tc ):

	# by table then command 

	if( not table in s.byCommand ):
	    s.byCommand[ table ] = {}
	if( not command in s.byCommand[ table ] ):
	    s.byCommand[ table ][ command ] = tableCounts( 0, 0, 0, 0 )

	s.byCommand[ table ][ command ] += tc

    def dump( s ):

	print ">>> Table hold times by command"
	print "%s,%s,%s,%s" % (
        	"table", 
		"command",
                "read-held-Time", 
                "write-held-Time" )

	for t in s.byCommand:
	    for c in s.byCommand[ t ]:
		tt = s.byCommand[ t ][ c ]
		if( tt.readHeldTime >= 1000 or tt.writeHeldTime >= 1000 ):
		    print "%s,%s,%d,%d" % (
			t, c, 
			tt.readHeldTime/1000,
			tt.writeHeldTime/1000 )

#
# TimesByTable -- calculate read+write/wait+hold time for each table
#
#	.tally() - add a single read+write/wait+hold to a table's total
#	.dump() - dump out each table's tally, using a grandTotal for %age
#

class TimesByTable:

    def __init__( s ):

    	s.byTable = {}

    def tally( s, table, tc ):

	if( not table in s.byTable ):
	    s.byTable[ table ] = tableCounts( 0, 0, 0, 0 )

	s.byTable[ table ] += tc

    def dump( s, grandTotal ):

	grandTotal += tableCounts( 1, 1, 1, 1 ) # no divide by zero errors

	print ">>> Hold and wait times per table"

	print "%s,%s,%s,%s,%s,%s,%s" % (
        	"table", 
                "read-wait-Time", 
		"%-of-Total",
                "read-held-Time", 
                "write-wait-Time", 
		"%-of-Total",
                "write-held-Time" )

	# sort descending by readWaitTime

	for t,tc in sorted( s.byTable.iteritems(), key= lambda (k,v): -v.readWaitTime ):

                print "%s,%d,%d%%,%d,%d,%d%%,%d" % (
                        t, 
                        tc.readWaitTime/1000, 
			tc.readWaitTime * 100 / grandTotal.readWaitTime,
                        tc.readHeldTime/1000,
                        tc.writeWaitTime/1000,
                        tc.writeWaitTime * 100 / grandTotal.writeWaitTime,
                        tc.writeHeldTime/1000 )

#
# WaitDelays - how much commands are delayed by wait times
#

class WaitDelays:

    def __init__( s ):

	s.maxSlots = 10
	s.power = 2
	s.delaySlots = [ 0 ] * s.maxSlots

    def tally( s, commandTotal, logDelays, count, command ):

	wait = commandTotal.readWaitTime + commandTotal.writeWaitTime 
	held = commandTotal.readHeldTime + commandTotal.writeHeldTime

	if( wait < 1000 or wait < held / 10 ):
	    return

	if( logDelays ):
	    print count, "commandWait", wait, "commandHeld", held, command

	slot = int( math.log( wait / 1000, s.power ) )

	if( slot >= s.maxSlots ):
	    slot = s.maxSlots-1

	s.delaySlots[ slot ] += 1

    def dump( s ):

	print ">>> Commands delayed"
	print "%s,%s" % ( "Seconds of delay", "Commands affected" )

	for x in range( 0, s.maxSlots ):
	    print "%d,%d" % ( s.power**x, s.delaySlots[ x ] )

def main():

	# process args

	showByCommand = False
	showDelays = False
	showWaitVsHeld = False
	showHisto = False
	showTally = False
	spannerInterval = 600
	submitOnly = False
	verboseLogging = False

	try:
	    opts, args = getopt.getopt( sys.argv[1:], "acdhi:stwv" )

	except getopt.error, msg:
		print msg
		print "Usage: p4clog.py [ -v ] [ -acdhswt ] [ -i interval ] < log-file"
		sys.exit(2)

	for o, a in opts:
	    if o == '-h': showHisto = True
	    elif o == '-s': submitOnly = True
	    elif o == '-i': spannerInterval = int(a)
	    elif o == '-c': showByCommand = True
	    elif o == '-d': showDelays = True
	    elif o == '-t': showTally = True
	    elif o == '-a': showHisto = showByCommand = showDelays = showTally = showWaitVsHeld = True
	    elif o == '-w': showWaitVsHeld = True
	    elif o == '-v': verboseLogging = True

	# per table counts of held and wait times

	lineCount = 0
	commandCount = 0
	pagesIn = 0
	pagesOut = 0
	firstDate = 0
	lastDate = 0

	p = logParse()

	commandTables = {}

	timesByTable = TimesByTable()
	timesByCommand = TimesByCommand()
	waitVsHeld = WaitVsHeld()
	locksByTime = LocksByTime()
	waitDelays = WaitDelays()

	commandTotal = tableCounts( 0, 0, 0, 0 )
	grandTotal = tableCounts( 0, 0, 0, 0 )

	for line in sys.stdin:

		lineCount += 1

		if( lineCount % 1000000 == 0 ):
		    print >> sys.stderr, "%s\n" % lineCount,

		# match one of the lines

		p.parse( line )

		# per table tallying

		if( submitOnly and p.command != "dm-CommitSubmit" ):
		    continue

		# command count

		if( p.lineType == "command" ):
		    commandCount += 1
		    if( firstDate == 0 ): firstDate = p.date
		    lastDate = p.date

		# pages

		if( p.lineType == "pages" ):

		    pagesIn += p.pgi
		    pagesOut += p.pgo

		    if( verboseLogging ):
		       if( p.pgi + p.pgo > 0 ):
			    print lineCount, "pages", p.table, p.pgi + p.pgo, p.command, p.args

		    continue

		# locks

		if( p.lineType == "locks"  ):

		    # for interest sake

		    if( verboseLogging ):
			if( p.lrh > 100 ): print lineCount, "readheld", p.table, p.lrh, p.command, p.args 
			if( p.lwh > 100 ): print lineCount, "writeheld", p.table, p.lwh, p.command, p.args 
			if( p.lrw > 100 ): print lineCount, "readwait", p.table, p.lrw, p.command, p.args 
			if( p.lww > 100 ): print lineCount, "writewait", p.table, p.lww, p.command, p.args

		    # remember count for this table for this command
		    # and add it to the grand total

		    tc = tableCounts( p.lrw, p.lrh, p.lww, p.lwh )
		    commandTables[ p.table ] = tc
		    commandTotal |= tc
		    grandTotal += tc

		    # tally by table and by command name

		    timesByTable.tally( p.table, tc )
		    timesByCommand.tally( p.table, p.command, tc )

		# per command tallying

		if( p.lineType == "end" ):

		    if( showHisto ):
			locksByTime.tally( commandTables, p.date )

		    if( showWaitVsHeld ):
			waitVsHeld.tally( commandTables, verboseLogging, lineCount, p.command )

		    if( showDelays ):
			waitDelays.tally( commandTotal, verboseLogging, lineCount, p.command )

		    commandTotal = tableCounts( 0, 0, 0, 0 )
		    commandTables = {}

		    p.reset()

	# Dump it all out

	if( showTally ):
	    print ">>> Log file summary"
	    print "Lines,Commands,Pages Read,Pages Written,First Date,Last Date"
	    print "%d,%d,%d,%d,%s,%s" % ( lineCount, commandCount, pagesIn, pagesOut, firstDate, lastDate )

	if( showDelays ):
	    waitDelays.dump()

	timesByTable.dump( grandTotal )

	if( showWaitVsHeld ):
	    waitVsHeld.dump( timesByTable.byTable )

	if( showByCommand ):
	    timesByCommand.dump()

	if( showHisto ):
	    locksByTime.dump( spannerInterval )

main()
#	Change	User	Description
#3	25220	Robert Cowham	Moved project files to new location: //guest/perforce_software/log-analyzer/psla/... Required by Swarm project structure for workshop.
#2	9735	Lester Cheung	Updating p4clog.py with our latest version
#1	8280	Matt Attaway	Add our new log analyszer, p4clog.