#!/usr/bin/python # p4clog.py [ options ] < p4d-log-file > tables.csv # # $Id: //guest/robert_cowham/perforce/utils/log_analysis/p4clog.py#2 $ # # -s - submit only # -c - show locking by command # -d - show delay analysis # -h - show lock held histogram # -i - interval for histogram instead of 600 # -w - show wait vs held analysis # -v - log page I/Os, wait/hold more than 100ms, delays # from __future__ import print_function import sys, re, time, getopt, math # logParse - parse a line of p4d log tracking class logParse: def __init__(self): # what we're parsing: # 2013/01/17 22:01:11 pid 28322 u@c 10.0.50.39/10.0.0.40 [p4tter/v72 (brokered)] 'command args' # --- db.rev # --- pages in+out+cached 222520+0+96 # --- locks read/write 1/0 rows get+pos+scan put+del 0+1+7372595 0+0 # --- total lock wait+held read/write 0ms+9578ms/0ms+0ms # --- db.change # --- total lock wait+held read/write 0ms+9583ms/0ms+0ms self.pa = re.compile("^\t(..../../.. ..:..:..) pid [^']*'([^ ']*)([^']*)") self.pb = re.compile("^--- (db.*)$") self.pc = re.compile("^--- pages in\+out\+cached ([^+]*)\+([^+]*)\+(.*)$") self.pd = re.compile("^--- locks .* put\+del ([^+]*)\+([^+]*)\+([^ ]*) ([^+]*)\+(.*)$") self.pe = re.compile("^--- total .* read/write ([^m]*)ms\+([^m]*)ms/([^m]*)ms\+([^m]*)ms$") self.pf = re.compile("^$") self.reset() def reset(self): self.date = self.command = self.args = self.table = "" # parse a single line of log def parse(self, line): self.lineType = "none" ll = len(line) if (line[0] == "\n"): self.lineType = "end" return if (ll > 5 and line[5] == "/"): m = self.pa.search(line) if (m): self.date, self.command, self.args = m.group(1, 2, 3) self.lineType = "command" return if (ll > 4 and line[4] == "d"): m = self.pb.search(line) if (m): self.table = m.group(1) return if (ll > 6 and line[6] == "t"): m = self.pe.search(line) if (m): self.lrw, self.lrh, self.lww, self.lwh = map(int, m.group(1, 2, 3, 4)) self.lineType = "locks" return if (ll > 6 and line[6] == "p"): m = self.pc.search(line) if (m): self.pgi, self.pgo, self.pgc = map(int, m.group(1, 2, 3)) self.lineType = "pages" self.isPages = True return return # don't use these yet # if (ll > 6 and line[6] == "l"): # m = self.pd.search(line) # if (m): rgt, rpo, rsc, rpt, rdl = map(int, m.group(1, 2, 3, 4, 5)) # return # tableCounts - read/write wait/held time counter class tableCounts: def __init__(self, lrw, lrh, lww, lwh): self.readWaitCount = lrw > 0; self.readWaitTime = lrw self.readHeldCount = lrh > 0; self.readHeldTime = lrh self.writeWaitCount = lww > 0; self.writeWaitTime = lww self.writeHeldCount = lwh > 0; self.writeHeldTime = lwh def __iadd__(self, o): self.readHeldCount += o.readHeldCount self.readHeldTime += o.readHeldTime self.readWaitCount += o.readWaitCount self.readWaitTime += o.readWaitTime self.writeHeldCount += o.writeHeldCount self.writeHeldTime += o.writeHeldTime self.writeWaitCount += o.writeWaitCount self.writeWaitTime += o.writeWaitTime return self def __ior__(self, o): # add waits, but max of held self.readWaitTime += o.readWaitTime self.writeWaitTime += o.writeWaitTime if (o.readHeldTime > self.readHeldTime): self.readHeldTime = o.readHeldTime if (o.writeHeldTime > self.writeHeldTime): self.writeHeldTime = o.writeHeldTime return self # Spanner - track how long locks are held for one db file class Spanner: # per interval result class Result: def __init__(self, time, secsHeld, secsFree, longestHeld): self.time = time self.secsHeld = secsHeld self.secsFree = secsFree self.longestHeld = longestHeld # when the log file begins epoch = 0 def __init__(self): # number of locks held in each second # since log file begins (gets long!) self.byTheSeconds = [] self.byTheInterval = [] # translate log file date into seconds since epoch def secs(self, date): try: t = int(time.mktime(time.strptime(date, "%Y/%m/%d %H:%M:%S"))) if (Spanner.epoch == 0): Spanner.epoch = t return t - Spanner.epoch except ValueError: print("bad date %s" % date, file=sys.stderr) return 0 # process a single lock hold def span(self, start, held): # translate start/held to start/end in second since epoch heldStart = self.secs(start) heldEnd = int(heldStart + (held + 500) / 1000) # sometime the log file is out of order if (heldStart < 0 or heldStart == heldEnd): return # dynamically extend array if (len(self.byTheSeconds) <= heldEnd): self.byTheSeconds.extend([0] * (heldEnd + 1 - len(self.byTheSeconds))) # count this as an instance this lock was held this second for x in range(heldStart, heldEnd): self.byTheSeconds[x] += 1 # condense into intervals def condense(self, interval): lastFree = -1 longestHeld = 0 # for each 'interval' seconds # find the time held/free/longest # and dump out a single line for x in range(0, len(self.byTheSeconds), interval): secsHeld = 0 secsFree = interval for y in range(x, min(x + interval, len(self.byTheSeconds))): sex = self.byTheSeconds[y] secsHeld += sex secsFree -= sex == 0 if (sex == 0): lastFree = y elif (y - lastFree > longestHeld): longestHeld = y - lastFree self.byTheInterval.append(self.Result(Spanner.epoch + x, secsHeld, secsFree, longestHeld)) longestHeld = 0 # dump one table's locks def dump(self, t, interval): self.condense(interval) if (len(self.byTheInterval) == 0): return print(">>> %s saturation each %d seconds" % (t, interval)) print("%s,%s,%s,%s" % ("time", "not free", "held", "longest held")) # for each 'interval' seconds # find the time held/free/longest # and dump out a single line for x in self.byTheInterval: print("%s,%d,%d,%d" % ( time.ctime(x.time), x.secsFree, x.secsHeld, x.longestHeld)) # LocksByTime: one spanner for each table class LocksByTime: def __init__(self): self.tableSpanner = {} def tally(self, commandTables, date): for t in commandTables: if (not t in self.tableSpanner): self.tableSpanner[t] = Spanner() self.tableSpanner[t].span(date, commandTables[t].writeHeldTime + commandTables[t].readHeldTime) def dump(s, spannerInterval): for t in s.tableSpanner: s.tableSpanner[t].dump(t, spannerInterval) # # WaitVsHeld -- calculate how long locks are held waiting for others # # .tally() - given lock+held times for all tables used by a command's # processing, calculate which tables are holding up others # # .dump() - dump out all wait vs held times for tables held more than # 1 seconds total # class WaitVsHeld: def __init__(self): self.table = {} def tally(self, commandTables, showWaitVsHeld, count, command): for t in commandTables: # wait vs held held = commandTables[t].writeHeldTime if (held == 0): continue for t2 in commandTables: wait = commandTables[t2].writeWaitTime if (t == t2 or wait > held or wait <= held / 2): continue if (not t in self.table): self.table[t] = {} if (not t2 in self.table[t]): self.table[t][t2] = tableCounts(0, 0, 0, 0) self.table[t][t2] += commandTables[t2] # for interest sake if (showWaitVsHeld): print(count, "held", t, held, "wait", t2, wait, command) def dump(self, totalTables): print(">>> Wait times causing hold times") print("%s,%s,%s,%s,%s,%s" % ( "table", "write-wait-Time", "read-wait-Time", "table", "write-held-Time", "read-held-Time")) for t in self.table: tc = totalTables[t] for t2 in self.table[t]: tg = self.table[t][t2] if (tg.writeWaitTime > 1000 and tc.writeHeldTime > 1000 or tg.readWaitTime > 1000 and tc.readHeldTime > 1000): print("%s,%d,%d,%s,%d,%d" % ( t2, tg.writeWaitTime / 1000, tg.readWaitTime / 1000, t, tc.writeHeldTime / 1000, tc.readHeldTime / 1000)) # # TimesByCommand -- calculate read+write hold time for each command # # .tally() - add a single read+write hold to a command's total # .dump() - dump out each command's total # class TimesByCommand: def __init__(self): self.byCommand = {} def tally(self, table, command, tc): # by table then command if (not table in self.byCommand): self.byCommand[table] = {} if (not command in self.byCommand[table]): self.byCommand[table][command] = tableCounts(0, 0, 0, 0) self.byCommand[table][command] += tc def dump(self): print(">>> Table hold times by command") print("%s,%s,%s,%s" % ( "table", "command", "read-held-Time", "write-held-Time")) for t in self.byCommand: for c in self.byCommand[t]: tt = self.byCommand[t][c] if (tt.readHeldTime >= 1000 or tt.writeHeldTime >= 1000): print("%s,%s,%d,%d" % ( t, c, tt.readHeldTime / 1000, tt.writeHeldTime / 1000)) # # TimesByTable -- calculate read+write/wait+hold time for each table # # .tally() - add a single read+write/wait+hold to a table's total # .dump() - dump out each table's tally, using a grandTotal for %age # class TimesByTable: def __init__(self): self.byTable = {} def tally(self, table, tc): if (not table in self.byTable): self.byTable[table] = tableCounts(0, 0, 0, 0) self.byTable[table] += tc def dump(self, grandTotal): grandTotal += tableCounts(1, 1, 1, 1) # no divide by zero errors print(">>> Hold and wait times per table") print("%s,%s,%s,%s,%s,%s,%s" % ( "table", "read-wait-Time", "%-of-Total", "read-held-Time", "write-wait-Time", "%-of-Total", "write-held-Time")) # sort descending by readWaitTime # for t,tc in s.byTable.items(): for t, tc in sorted(self.byTable.items(), key=lambda v: 0 - (v[1].readWaitTime + v[1].writeWaitTime)): print("%s,%d,%d%%,%d,%d,%d%%,%d" % ( t, tc.readWaitTime / 1000, tc.readWaitTime * 100 / grandTotal.readWaitTime, tc.readHeldTime / 1000, tc.writeWaitTime / 1000, tc.writeWaitTime * 100 / grandTotal.writeWaitTime, tc.writeHeldTime / 1000)) # # WaitDelays - how much commands are delayed by wait times # class WaitDelays: def __init__(self): self.maxSlots = 10 self.power = 2 self.delaySlots = [0] * self.maxSlots def tally(self, commandTotal, logDelays, count, command): wait = commandTotal.readWaitTime + commandTotal.writeWaitTime held = commandTotal.readHeldTime + commandTotal.writeHeldTime if (wait < 1000 or wait < held / 10): return if (logDelays): print(count, "commandWait", wait, "commandHeld", held, command) slot = int(math.log(wait / 1000, self.power)) if (slot >= self.maxSlots): slot = self.maxSlots - 1 self.delaySlots[slot] += 1 def dump(self): print(">>> Commands delayed") print("%s,%s" % ("Seconds of delay", "Commands affected")) for x in range(0, self.maxSlots): print("%d,%d" % (self.power ** x, self.delaySlots[x])) def main(): # process args showByCommand = False showDelays = False showWaitVsHeld = False showHisto = False showTally = False spannerInterval = 600 submitOnly = False verboseLogging = False try: opts, args = getopt.getopt(sys.argv[1:], "acdhi:stwv") except getopt.error as msg: print(msg) print("Usage: p4clog.py [ -v ] [ -acdhswt ] [ -i interval ] < log-file") sys.exit(2) for o, a in opts: if o == '-h': showHisto = True elif o == '-s': submitOnly = True elif o == '-i': spannerInterval = int(a) elif o == '-c': showByCommand = True elif o == '-d': showDelays = True elif o == '-t': showTally = True elif o == '-a': showHisto = showByCommand = showDelays = showTally = showWaitVsHeld = True elif o == '-w': showWaitVsHeld = True elif o == '-v': verboseLogging = True # per table counts of held and wait times lineCount = 0 commandCount = 0 pagesIn = 0 pagesOut = 0 firstDate = 0 lastDate = 0 p = logParse() commandTables = {} timesByTable = TimesByTable() timesByCommand = TimesByCommand() waitVsHeld = WaitVsHeld() locksByTime = LocksByTime() waitDelays = WaitDelays() commandTotal = tableCounts(0, 0, 0, 0) grandTotal = tableCounts(0, 0, 0, 0) for line in sys.stdin: lineCount += 1 if (lineCount % 1000000 == 0): print("%s" % lineCount, file=sys.stderr) # match one of the lines p.parse(line) # per table tallying if (submitOnly and p.command != "dm-CommitSubmit"): continue # command count if (p.lineType == "command"): commandCount += 1 if (firstDate == 0): firstDate = p.date lastDate = p.date # pages if (p.lineType == "pages"): pagesIn += p.pgi pagesOut += p.pgo if (verboseLogging): if (p.pgi + p.pgo > 0): print(lineCount, "pages", p.table, p.pgi + p.pgo, p.command, p.args) continue # locks if (p.lineType == "locks"): # for interest sake if (verboseLogging): if (p.lrh > 100): print(lineCount, "readheld", p.table, p.lrh, p.command, p.args) if (p.lwh > 100): print(lineCount, "writeheld", p.table, p.lwh, p.command, p.args) if (p.lrw > 100): print(lineCount, "readwait", p.table, p.lrw, p.command, p.args) if (p.lww > 100): print(lineCount, "writewait", p.table, p.lww, p.command, p.args) # remember count for this table for this command # and add it to the grand total tc = tableCounts(p.lrw, p.lrh, p.lww, p.lwh) commandTables[p.table] = tc commandTotal |= tc grandTotal += tc # tally by table and by command name timesByTable.tally(p.table, tc) timesByCommand.tally(p.table, p.command, tc) # per command tallying if (p.lineType == "end"): if (showHisto): locksByTime.tally(commandTables, p.date) if (showWaitVsHeld): waitVsHeld.tally(commandTables, verboseLogging, lineCount, p.command) if (showDelays): waitDelays.tally(commandTotal, verboseLogging, lineCount, p.command) commandTotal = tableCounts(0, 0, 0, 0) commandTables = {} p.reset() # Dump it all out if (showTally): print(">>> Log file summary") print("Lines,Commands,Pages Read,Pages Written,First Date,Last Date") print("%d,%d,%d,%d,%s,%s" % (lineCount, commandCount, pagesIn, pagesOut, firstDate, lastDate)) if (showDelays): waitDelays.dump() timesByTable.dump(grandTotal) if (showWaitVsHeld): waitVsHeld.dump(timesByTable.byTable) if (showByCommand): timesByCommand.dump() if (showHisto): locksByTime.dump(spannerInterval) main()