#!/usr/bin/python
# p4clog.py [ options ] < p4d-log-file > tables.csv
#
# $Id: //guest/lester_cheung/log_analyzer/p4clog.py#1 $
#
# -s - submit only
# -c - show locking by command
# -d - show delay analysis
# -h - show lock held histogram
# -i - interval for histogram instead of 600
# -w - show wait vs held analysis
# -v - log page I/Os, wait/hold more than 100ms, delays
#
import sys, re, time, getopt, math
# logParse - parse a line of p4d log tracking
class logParse:
def __init__( p ):
# what we're parsing:
# 2013/01/17 22:01:11 pid 28322 u@c 10.0.50.39/10.0.0.40 [p4tter/v72 (brokered)] 'command args'
# --- db.rev
# --- pages in+out+cached 222520+0+96
# --- locks read/write 1/0 rows get+pos+scan put+del 0+1+7372595 0+0
# --- total lock wait+held read/write 0ms+9578ms/0ms+0ms
# --- db.change
# --- total lock wait+held read/write 0ms+9583ms/0ms+0ms
p.pa = re.compile( "^\t(..../../.. ..:..:..) pid [^']*'([^ ']*)([^']*)" )
p.pb = re.compile( "^--- (db.*)$" )
p.pc = re.compile( "^--- pages in\+out\+cached ([^+]*)\+([^+]*)\+(.*)$" )
p.pd = re.compile( "^--- locks .* put\+del ([^+]*)\+([^+]*)\+([^ ]*) ([^+]*)\+(.*)$" )
p.pe = re.compile( "^--- total .* read/write ([^m]*)ms\+([^m]*)ms/([^m]*)ms\+([^m]*)ms$" )
p.pf = re.compile( "^$" )
p.reset()
def reset( p ):
p.date = p.command = p.args = p.table = ""
# parse a single line of log
def parse( p, line ):
p.lineType = "none"
ll = len( line )
if( line[0] == "\n" ):
p.lineType = "end"
return
if( ll > 5 and line[5] == "/" ):
m = p.pa.search( line )
if( m ):
p.date,p.command,p.args = m.group(1,2,3)
p.lineType = "command"
return
if( ll > 4 and line[4] == "d" ):
m = p.pb.search( line )
if( m ): p.table = m.group(1)
return
if( ll > 6 and line[6] == "t" ):
m = p.pe.search( line )
if( m ):
p.lrw,p.lrh,p.lww,p.lwh = map(int,m.group(1,2,3,4))
p.lineType = "locks"
return
if( ll > 6 and line[6] == "p" ):
m = p.pc.search( line )
if( m ):
p.pgi,p.pgo,p.pgc = map(int,m.group(1,2,3))
p.lineType = "pages"
p.isPages = True
return
return
# don't use these yet
if( ll > 6 and line[6] == "l" ):
m = p.pd.search( line )
if( m ): rgt,rpo,rsc,rpt,rdl = map(int,m.group(1,2,3,4,5))
return
# tableCounts - read/write wait/held time counter
class tableCounts:
def __init__( s, lrw, lrh, lww, lwh ):
s.readWaitCount = lrw > 0; s.readWaitTime = lrw
s.readHeldCount = lrh > 0; s.readHeldTime = lrh
s.writeWaitCount = lww > 0; s.writeWaitTime = lww
s.writeHeldCount = lwh > 0; s.writeHeldTime = lwh
def __iadd__( s, o ):
s.readHeldCount += o.readHeldCount
s.readHeldTime += o.readHeldTime
s.readWaitCount += o.readWaitCount
s.readWaitTime += o.readWaitTime
s.writeHeldCount += o.writeHeldCount
s.writeHeldTime += o.writeHeldTime
s.writeWaitCount += o.writeWaitCount
s.writeWaitTime += o.writeWaitTime
return s
def __ior__( s, o ):
# add waits, but max of held
s.readWaitTime += o.readWaitTime
s.writeWaitTime += o.writeWaitTime
if( o.readHeldTime > s.readHeldTime ):
s.readHeldTime = o.readHeldTime
if( o.writeHeldTime > s.writeHeldTime ):
s.writeHeldTime = o.writeHeldTime
return s
# Spanner - track how long locks are held for one db file
class Spanner:
# per interval result
class Result:
def __init__( r, time, secsHeld, secsFree, longestHeld ):
r.time = time
r.secsHeld = secsHeld
r.secsFree = secsFree
r.longestHeld = longestHeld
# when the log file begins
epoch = 0
def __init__( s ):
# number of locks held in each second
# since log file begins (gets long!)
s.byTheSeconds = []
s.byTheInterval = []
# translate log file date into seconds since epoch
def secs( s, date ):
try:
t = int( time.mktime( time.strptime( date, "%Y/%m/%d %H:%M:%S" ) ) )
if( Spanner.epoch == 0 ): Spanner.epoch = t
return t - Spanner.epoch
except ValueError:
print >> sys.stderr, "bad date %s\n" % date,
return 0
# process a single lock hold
def span( s, start, held ):
# translate start/held to start/end in second since epoch
heldStart = s.secs( start )
heldEnd = heldStart + ( held + 500 ) / 1000
# sometime the log file is out of order
if( heldStart < 0 or heldStart == heldEnd ):
return
# dynamically extend array
if( len( s.byTheSeconds ) <= heldEnd ):
s.byTheSeconds.extend( [ 0 ] * ( heldEnd + 1 - len( s.byTheSeconds ) ) )
# count this as an instance this lock was held this second
for x in range( heldStart, heldEnd ):
s.byTheSeconds[ x ] += 1
# condense into intervals
def condense( s, interval ):
lastFree = -1
longestHeld = 0
# for each 'interval' seconds
# find the time held/free/longest
# and dump out a single line
for x in range( 0, len( s.byTheSeconds ), interval ):
secsHeld = 0
secsFree = interval
for y in range( x, min( x + interval, len( s.byTheSeconds ) ) ):
sex = s.byTheSeconds[ y ]
secsHeld += sex
secsFree -= sex == 0
if( sex == 0 ):
lastFree = y
elif( y - lastFree > longestHeld ):
longestHeld = y - lastFree
s.byTheInterval.append( s.Result( Spanner.epoch + x, secsHeld, secsFree, longestHeld ) )
longestHeld = 0
# dump one table's locks
def dump( s, t, interval ):
s.condense( interval )
if( len( s.byTheInterval ) == 0 ): return
print ">>> %s saturation each %d seconds" % ( t, interval )
print "%s,%s,%s,%s" % ( "time", "not free", "held", "longest held" )
# for each 'interval' seconds
# find the time held/free/longest
# and dump out a single line
for x in s.byTheInterval:
print "%s,%d,%d,%d" % (
time.ctime( x.time ),
x.secsFree,
x.secsHeld,
x.longestHeld )
# LocksByTime: one spanner for each table
class LocksByTime:
def __init__( s ):
s.tableSpanner = {}
def tally( s, commandTables, date ):
for t in commandTables:
if( not t in s.tableSpanner ):
s.tableSpanner[ t ] = Spanner()
s.tableSpanner[ t ].span( date,
commandTables[ t ].writeHeldTime +
commandTables[ t ].readHeldTime )
def dump( s, spannerInterval ):
for t in s.tableSpanner:
s.tableSpanner[ t ].dump( t, spannerInterval )
#
# WaitVsHeld -- calculate how long locks are held waiting for others
#
# .tally() - given lock+held times for all tables used by a command's
# processing, calculate which tables are holding up others
#
# .dump() - dump out all wait vs held times for tables held more than
# 1 seconds total
#
class WaitVsHeld:
def __init__( s ):
s.table = {}
def tally( s, commandTables, showWaitVsHeld, count, command ):
for t in commandTables:
# wait vs held
held = commandTables[ t ].writeHeldTime
if( held == 0 ): continue
for t2 in commandTables:
wait = commandTables[ t2 ].writeWaitTime
if( t == t2 or wait > held or wait <= held / 2 ):
continue
if( not t in s.table ):
s.table[ t ] = {}
if( not t2 in s.table[ t ] ):
s.table[ t ][ t2 ] = tableCounts( 0, 0, 0, 0 )
s.table[ t ][ t2 ] += commandTables[ t2 ]
# for interest sake
if( showWaitVsHeld ):
print count, "held", t, held, "wait", t2, wait, command
def dump( s, totalTables ):
print ">>> Wait times causing hold times"
print "%s,%s,%s,%s,%s,%s" % (
"table",
"write-wait-Time",
"read-wait-Time",
"table",
"write-held-Time",
"read-held-Time" )
for t in s.table:
tc = totalTables[ t ]
for t2 in s.table[ t ]:
tg = s.table[ t ][ t2 ]
if( tg.writeWaitTime > 1000 and tc.writeHeldTime > 1000 or
tg.readWaitTime > 1000 and tc.readHeldTime > 1000 ):
print "%s,%d,%d,%s,%d,%d" % (
t2,
tg.writeWaitTime / 1000,
tg.readWaitTime / 1000,
t,
tc.writeHeldTime / 1000,
tc.readHeldTime / 1000 )
#
# TimesByCommand -- calculate read+write hold time for each command
#
# .tally() - add a single read+write hold to a command's total
# .dump() - dump out each command's total
#
class TimesByCommand:
def __init__( s ):
s.byCommand = {}
def tally( s, table, command, tc ):
# by table then command
if( not table in s.byCommand ):
s.byCommand[ table ] = {}
if( not command in s.byCommand[ table ] ):
s.byCommand[ table ][ command ] = tableCounts( 0, 0, 0, 0 )
s.byCommand[ table ][ command ] += tc
def dump( s ):
print ">>> Table hold times by command"
print "%s,%s,%s,%s" % (
"table",
"command",
"read-held-Time",
"write-held-Time" )
for t in s.byCommand:
for c in s.byCommand[ t ]:
tt = s.byCommand[ t ][ c ]
if( tt.readHeldTime >= 1000 or tt.writeHeldTime >= 1000 ):
print "%s,%s,%d,%d" % (
t, c,
tt.readHeldTime/1000,
tt.writeHeldTime/1000 )
#
# TimesByTable -- calculate read+write/wait+hold time for each table
#
# .tally() - add a single read+write/wait+hold to a table's total
# .dump() - dump out each table's tally, using a grandTotal for %age
#
class TimesByTable:
def __init__( s ):
s.byTable = {}
def tally( s, table, tc ):
if( not table in s.byTable ):
s.byTable[ table ] = tableCounts( 0, 0, 0, 0 )
s.byTable[ table ] += tc
def dump( s, grandTotal ):
grandTotal += tableCounts( 1, 1, 1, 1 ) # no divide by zero errors
print ">>> Hold and wait times per table"
print "%s,%s,%s,%s,%s,%s,%s" % (
"table",
"read-wait-Time",
"%-of-Total",
"read-held-Time",
"write-wait-Time",
"%-of-Total",
"write-held-Time" )
# sort descending by readWaitTime
for t,tc in sorted( s.byTable.iteritems(), key= lambda (k,v): -v.readWaitTime ):
print "%s,%d,%d%%,%d,%d,%d%%,%d" % (
t,
tc.readWaitTime/1000,
tc.readWaitTime * 100 / grandTotal.readWaitTime,
tc.readHeldTime/1000,
tc.writeWaitTime/1000,
tc.writeWaitTime * 100 / grandTotal.writeWaitTime,
tc.writeHeldTime/1000 )
#
# WaitDelays - how much commands are delayed by wait times
#
class WaitDelays:
def __init__( s ):
s.maxSlots = 10
s.power = 2
s.delaySlots = [ 0 ] * s.maxSlots
def tally( s, commandTotal, logDelays, count, command ):
wait = commandTotal.readWaitTime + commandTotal.writeWaitTime
held = commandTotal.readHeldTime + commandTotal.writeHeldTime
if( wait < 1000 or wait < held / 10 ):
return
if( logDelays ):
print count, "commandWait", wait, "commandHeld", held, command
slot = int( math.log( wait / 1000, s.power ) )
if( slot >= s.maxSlots ):
slot = s.maxSlots-1
s.delaySlots[ slot ] += 1
def dump( s ):
print ">>> Commands delayed"
print "%s,%s" % ( "Seconds of delay", "Commands affected" )
for x in range( 0, s.maxSlots ):
print "%d,%d" % ( s.power**x, s.delaySlots[ x ] )
def main():
# process args
showByCommand = False
showDelays = False
showWaitVsHeld = False
showHisto = False
showTally = False
spannerInterval = 600
submitOnly = False
verboseLogging = False
try:
opts, args = getopt.getopt( sys.argv[1:], "acdhi:stwv" )
except getopt.error, msg:
print msg
print "Usage: p4clog.py [ -v ] [ -acdhswt ] [ -i interval ] < log-file"
sys.exit(2)
for o, a in opts:
if o == '-h': showHisto = True
elif o == '-s': submitOnly = True
elif o == '-i': spannerInterval = int(a)
elif o == '-c': showByCommand = True
elif o == '-d': showDelays = True
elif o == '-t': showTally = True
elif o == '-a': showHisto = showByCommand = showDelays = showTally = showWaitVsHeld = True
elif o == '-w': showWaitVsHeld = True
elif o == '-v': verboseLogging = True
# per table counts of held and wait times
lineCount = 0
commandCount = 0
pagesIn = 0
pagesOut = 0
firstDate = 0
lastDate = 0
p = logParse()
commandTables = {}
timesByTable = TimesByTable()
timesByCommand = TimesByCommand()
waitVsHeld = WaitVsHeld()
locksByTime = LocksByTime()
waitDelays = WaitDelays()
commandTotal = tableCounts( 0, 0, 0, 0 )
grandTotal = tableCounts( 0, 0, 0, 0 )
for line in sys.stdin:
lineCount += 1
if( lineCount % 1000000 == 0 ):
print >> sys.stderr, "%s\n" % lineCount,
# match one of the lines
p.parse( line )
# per table tallying
if( submitOnly and p.command != "dm-CommitSubmit" ):
continue
# command count
if( p.lineType == "command" ):
commandCount += 1
if( firstDate == 0 ): firstDate = p.date
lastDate = p.date
# pages
if( p.lineType == "pages" ):
pagesIn += p.pgi
pagesOut += p.pgo
if( verboseLogging ):
if( p.pgi + p.pgo > 0 ):
print lineCount, "pages", p.table, p.pgi + p.pgo, p.command, p.args
continue
# locks
if( p.lineType == "locks" ):
# for interest sake
if( verboseLogging ):
if( p.lrh > 100 ): print lineCount, "readheld", p.table, p.lrh, p.command, p.args
if( p.lwh > 100 ): print lineCount, "writeheld", p.table, p.lwh, p.command, p.args
if( p.lrw > 100 ): print lineCount, "readwait", p.table, p.lrw, p.command, p.args
if( p.lww > 100 ): print lineCount, "writewait", p.table, p.lww, p.command, p.args
# remember count for this table for this command
# and add it to the grand total
tc = tableCounts( p.lrw, p.lrh, p.lww, p.lwh )
commandTables[ p.table ] = tc
commandTotal |= tc
grandTotal += tc
# tally by table and by command name
timesByTable.tally( p.table, tc )
timesByCommand.tally( p.table, p.command, tc )
# per command tallying
if( p.lineType == "end" ):
if( showHisto ):
locksByTime.tally( commandTables, p.date )
if( showWaitVsHeld ):
waitVsHeld.tally( commandTables, verboseLogging, lineCount, p.command )
if( showDelays ):
waitDelays.tally( commandTotal, verboseLogging, lineCount, p.command )
commandTotal = tableCounts( 0, 0, 0, 0 )
commandTables = {}
p.reset()
# Dump it all out
if( showTally ):
print ">>> Log file summary"
print "Lines,Commands,Pages Read,Pages Written,First Date,Last Date"
print "%d,%d,%d,%d,%s,%s" % ( lineCount, commandCount, pagesIn, pagesOut, firstDate, lastDate )
if( showDelays ):
waitDelays.dump()
timesByTable.dump( grandTotal )
if( showWaitVsHeld ):
waitVsHeld.dump( timesByTable.byTable )
if( showByCommand ):
timesByCommand.dump()
if( showHisto ):
locksByTime.dump( spannerInterval )
main()