#!/usr/bin/env python3 # -*- encoding: UTF8 -*- ############################################################################## # # Copyright (c) 2008,2016 Perforce Software, Inc. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL PERFORCE SOFTWARE, INC. BE LIABLE FOR ANY # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # = Description # # Output SQL queries generated by parsing commands logged in a Perforce # server log file. # The output can be imported directly in any SQL database. # # log2sql populates the following tables: # # +----------------+----------+------+-----+ # | process | # +----------------+----------+------+-----+ # | Field | Type | Null | Key | # +----------------+----------+------+-----+ # | processKey | char(32) | NO | PRI | # | lineNumber | int | NO | PRI | # | startTime | date | NO | | # | endTime | date | YES | | # | computedLapse | float | YES | | # | completedLapse | float | YES | | # | pid | int | NO | | # | user | text | NO | | # | workspace | text | NO | | # | ip | text | NO | | # | app | text | NO | | # | cmd | text | NO | | # | args | text | YES | | # | uCpu | int | YES | | # | sCpu | int | YES | | # | diskIn | int | YES | | # | diskOut | int | YES | | # | ipcIn | int | YES | | # | ipcOut | int | YES | | # | maxRss | int | YES | | # | pageFaults | int | YES | | # | rpcMsgsIn | int | YES | | # | rpcMsgsOut | int | YES | | # | rpcSizeIn | int | YES | | # | rpcSizeOut | int | YES | | # | rpcHimarkFwd | int | YES | | # | rpcHimarkRev | int | YES | | # | rpcSnd | float | YES | | # | rpcRcv | float | YES | | # | running | int | NO | | # | error | text | YES | | # +----------------+----------+------+-----+ # # +----------------+--------------+------+-----+ # | tableUse | # +----------------+--------------+------+-----+ # | Field | Type | Null | Key | # +----------------+--------------+------+-----+ # | processKey | char(32) | NO | PRI | # | lineNumber | int | NO | PRI | # | tableName | varchar(255) | NO | PRI | # | pagesIn | int | YES | | # | pagesOut | int | YES | | # | pagesCached | int | YES | | # | readLocks | int | YES | | # | writeLocks | int | YES | | # | getRows | int | YES | | # | posRows | int | YES | | # | scanRows | int | YES | | # | putRows | int | YES | | # | delRows | int | YES | | # | totalReadWait | int | YES | | # | totalReadHeld | int | YES | | # | totalWriteWait | int | YES | | # | totalWriteHeld | int | YES | | # | maxReadWait | int | YES | | # | maxReadHeld | int | YES | | # | maxWriteWait | int | YES | | # | maxWriteHeld | int | YES | | # +----------------+--------------+------+-----+ # # = Usage # # log2sql.rb -d <database_name> log_file # # = Requirements # # Ruby: http://www.ruby-lang.org # ############################################################################## from __future__ import print_function import re import sys import os import math import argparse import datetime import hashlib import sqlite3 python3 = sys.version_info[0] >= 3 RowsPerTransaction = 1000 # Commit every so many rows if in SQL mode def escapeChar(str): str = str.replace("\\", "\\\\") str = str.replace("\"", "\\\"") return str def parseDateTime(str): # \d+/\d+/\d+ \d+:\d+:\d+ - much faster just to do str.replace() return "\"%s\"" % str.replace("/", "-") def dateAdd(str, seconds): "Add specified seconds to date in string format" str = str.replace('"', '') date = datetime.datetime.strptime(str, "%Y-%m-%d %H:%M:%S") date = date + datetime.timedelta(seconds=seconds) return date.strftime("\"%Y-%m-%d %H:%M:%S\"") def nullValue(str): "For SQL inserts" if str == "NULL": return None return str class Command: processKey = None pid = 0 completed = False # Set when a completed record is found startTime = None endTime = None computedLapse = 0.0 completedLapse = 0.0 user = None workspace = None ip = None app = None name = None args = None uCpu = None sCpu = None diskIn = None diskOut = None ipcIn = None ipcOut = None maxRss = None pageFaults = None rpcMsgsIn = None rpcMsgsOut = None rpcSizeIn = None rpcSizeOut = None rpcHimarkFwd = None rpcHimarkRev = None rpcSnd = None rpcRcv = None running = 0 lineNumber = 0 error = None def __init__(self, processKey, lineNumber, pid, startTime, user, workspace, ip, app, name, args): self.processKey = processKey self.lineNumber = lineNumber self.pid = pid self.startTime = startTime self.user = escapeChar(user) self.workspace = escapeChar(workspace) self.ip = ip self.app = app self.name = name if args: self.args = escapeChar(args) else: self.args = "NULL" self.endTime = self.computedLapse = self.completedLapse = self.uCpu = "NULL" self.sCpu = self.diskIn = self.diskOut = self.ipcIn = self.ipcOut = self.maxRss = "NULL" self.pageFaults = self.rpcMsgsIn = self.rpcMsgsOut = self.rpcSizeOut = "NULL" self.rpcSizeIn = self.rpcHimarkFwd = self.rpcHimarkRev = self.error = "NULL" self.rpcSnd = self.rpcRcv = "NULL" self.running = 0 def updateFrom(self, other): "Update any potentiall null fields - see __init__" for f in ["endTime", "computedLapse", "completedLapse", "uCpu", "sCpu", "diskIn", "diskOut", "ipcIn", "ipcOut", "maxRss", "pageFaults", "rpcMsgsIn", "rpcMsgsOut", "rpcSizeOut", "rpcSizeIn", "rpcHimarkFwd", "rpcHimarkRev", "error", "rpcSnd", "rpcRcv"]: if getattr(other, f) != "NULL" and getattr(self, f) == "NULL": setattr(self, f, getattr(other, f)) def setUsage(self, uCpu, sCpu, diskIn, diskOut, ipcIn, ipcOut, maxRss, pageFaults): self.uCpu = uCpu self.sCpu = sCpu self.diskIn = diskIn self.diskOut = diskOut self.ipcIn = ipcIn self.ipcOut = ipcOut self.maxRss = maxRss self.pageFaults = pageFaults def setRpc(self, rpcMsgsIn, rpcMsgsOut, rpcSizeIn, rpcSizeOut, rpcHimarkFwd, rpcHimarkRev, rpcSnd="NULL", rpcRcv="NULL"): self.rpcMsgsIn = rpcMsgsIn self.rpcMsgsOut = rpcMsgsOut self.rpcSizeIn = rpcSizeIn self.rpcSizeOut = rpcSizeOut self.rpcHimarkFwd = rpcHimarkFwd self.rpcHimarkRev = rpcHimarkRev self.rpcSnd = rpcSnd self.rpcRcv = rpcRcv class Table: processKey = None lineNumber = 0 tableName = None pagesIn = None pagesOut = None pagesCached = None readLocks = None writeLocks = None getRows = None posRows = None scanRows = None putRows = None delRow = None totalReadWait = None totalReadHeld = None totalWriteWait = None totalWriteHeld = None maxReadWait = None maxReadHeld = None maxWriteWait = None maxWriteHeld = None def __init__(self, processKey, lineNumber, tableName): self.processKey = processKey self.lineNumber = lineNumber self.tableName = tableName self.pagesIn = self.pagesOut = self.pagesCached = "NULL" self.readLocks = self.writeLocks = self.getRows = self.posRows = self.scanRows = "NULL" self.putRows = self.delRow = self.totalReadWait = self.totalReadHeld = "NULL" self.totalWriteWait = self.totalWriteHeld = self.maxReadWait = "NULL" self.maxReadHeld = self.maxWriteWait = self.maxWriteHeld = "NULL" def setPages(self, pagesIn, pagesOut, pagesCached): self.pagesIn = pagesIn self.pagesOut = pagesOut self.pagesCached = pagesCached def setLocksRows(self, readLocks, writeLocks, getRows, posRows, scanRows, putRows, delRow): self.readLocks = readLocks self.writeLocks = writeLocks self.getRows = getRows self.posRows = posRows self.scanRows = scanRows self.putRows = putRows self.delRow = delRow def setTotalLock(self, totalReadWait, totalReadHeld, totalWriteWait, totalWriteHeld): self.totalReadWait = totalReadWait self.totalReadHeld = totalReadHeld self.totalWriteWait = totalWriteWait self.totalWriteHeld = totalWriteHeld def setMaxLock(self, maxReadWait, maxReadHeld, maxWriteWait, maxWriteHeld): self.maxReadWait = maxReadWait self.maxReadHeld = maxReadHeld self.maxWriteWait = maxWriteWait self.maxWriteHeld = maxWriteHeld class Block: def __init__(self): self.lines = [] self.lineNo = 0 def addLine(self, line, lineNo): self.lines.append(line) # Only at start of block if not self.lineNo: self.lineNo = lineNo class Log2sql: logname = None dbname = None logfile = None ckpSize = None readBytes = None mod = None cmds = None lineNo = 0 def __init__(self, options, instream=None, outstream=None): self.dbname = options.dbname self.options = options self.outstream = outstream # For testing if outstream is None: if options.output: if options.output == "-": self.outstream = sys.stdout else: self.outstream = open(options.output, "w") else: self.outstream = None if instream is None: self.logfile = open(options.logfile[0], "r") self.ckpSize = os.stat(options.logfile[0]).st_size else: self.logfile = instream self.ckpSize = 500 if self.options.sql: self.conn = sqlite3.connect("%s.db" % self.dbname) self.conn.text_factory = str self.cursor = self.conn.cursor() self.cursor.execute("PRAGMA synchronous = OFF") self.cursor.execute("PRAGMA journal_mode = MEMORY") self.cmds = {} self.tables = {} self.readBytes = 0.0 self.mod = 10.0 self.running = 0 self.rowCount = 0 self.db_create_database() self.db_create_process() self.db_create_tableUse() query = "BEGIN TRANSACTION;" self.output(query) if self.options.sql: self.conn.execute(query) self.output("Processing %s: 0%%" % self.logname, file=sys.stderr) def outputRequired(self, file=sys.stdout): return self.outstream or file == sys.stderr def output(self, text, file=sys.stdout): if (file == sys.stderr): print(text, file=file) elif self.outstream: self.outstream.write("%s\n" % text) def terminate(self): self.flush_output() self.db_commit(True) if self.options.sql: self.conn.commit() self.conn.close() def readLogfile(self): line = self.logfile.readline() if line: self.lineNo += 1 # TODO # if line.respond_to?(:encode) # line = line.encode("utf-8", 'binary', :invalid => :replace, :undef => :replace) #line = line.decode('utf-8', errors='ignore') self.readBytes += len(line) if math.floor(((self.readBytes / self.ckpSize) * 100.0) / self.mod) == 1.0: self.output("...%d%%" % self.mod, file=sys.stderr) # $stderr.flush self.mod += 10 return line def getLineNumber(self): return self.lineNo def db_commit_updates(self): self.rowCount += 1 if self.rowCount % RowsPerTransaction == 0: query = "COMMIT;" if self.outputRequired(): self.output(query) if self.options.sql: self.conn.commit() query = "BEGIN TRANSACTION;" if self.outputRequired(): self.output(query) if self.options.sql: self.conn.execute(query) def db_commit(self, state): if (state): query = "COMMIT;" if self.outputRequired(): self.output(query) if self.options.sql: self.conn.commit() query = "BEGIN TRANSACTION;" if self.outputRequired(): self.output(query) if self.options.sql: self.conn.execute(query) else: query = "SET autocommit=0;" if self.outputRequired(): self.output(query) def db_create_database(self): query = "CREATE DATABASE IF NOT EXISTS " + self.dbname + ";" if self.outputRequired(): self.output(query) query = "USE " + self.dbname + ";" if self.outputRequired(): self.output(query) def db_create_process(self): query = "DROP TABLE IF EXISTS process;" if self.outputRequired(): self.output(query) query = "CREATE TABLE process (processkey CHAR(32) NOT NULL, lineNumber INT NOT NULL, pid INT NOT NULL, " \ "startTime DATETIME NOT NULL,endTime DATETIME NULL, computedLapse FLOAT NULL,completedLapse FLOAT NULL, " \ "user TEXT NOT NULL, workspace TEXT NOT NULL, ip TEXT NOT NULL, app TEXT NOT NULL, cmd TEXT NOT NULL, " \ "args TEXT NULL, uCpu INT NULL, sCpu INT NULL, diskIn INT NULL, diskOut INT NULL, ipcIn INT NULL, " \ "ipcOut INT NULL, maxRss INT NULL, pageFaults INT NULL, rpcMsgsIn INT NULL, rpcMsgsOut INT NULL, " \ "rpcSizeIn INT NULL, rpcSizeOut INT NULL, rpcHimarkFwd INT NULL, rpcHimarkRev INT NULL, " \ "rpcSnd FLOAT NULL, rpcRcv FLOAT NULL, running INT NULL, " \ "error TEXT NULL, PRIMARY KEY (processkey, lineNumber));" if self.outputRequired(): self.output(query) if self.options.sql: try: self.cursor.execute(query) except: pass def db_create_tableUse(self): query = "DROP TABLE IF EXISTS tableUse;" if self.outputRequired(): self.output(query) if self.options.sql: self.cursor.execute(query) query = "CREATE TABLE tableUse (processkey CHAR(32) NOT NULL, lineNumber INT NOT NULL, " \ "tableName VARCHAR(255) NOT NULL, pagesIn INT NULL, pagesOut INT NULL, pagesCached INT NULL, " \ "readLocks INT NULL, writeLocks INT NULL, getRows INT NULL, posRows INT NULL, scanRows INT NULL, " \ "putRows int NULL, delRows INT NULL, totalReadWait INT NULL, totalReadHeld INT NULL, " \ "totalWriteWait INT NULL, totalWriteHeld INT NULL, maxReadWait INT NULL, maxReadHeld INT NULL, " \ "maxWriteWait INT NULL, maxWriteHeld INT NULL, PRIMARY KEY (processkey, lineNumber, tableName));" if self.outputRequired(): self.output(query) if self.options.sql: try: self.cursor.execute(query) except: pass def addCommand(self, cmd, vtrack): cmd.running = self.running # If we are re-using pid then process previous command # Or process 'rmt-FileFetch' because they don't have 'completed' records if cmd.pid in self.cmds: if cmd.processKey != self.cmds[cmd.pid].processKey: self.sql_process_insert(self.cmds[cmd.pid]) self.cmds[cmd.pid] = cmd # replace elif cmd.name == 'rmt-FileFetch' and not vtrack: self.sql_process_insert(self.cmds[cmd.pid]) self.cmds[cmd.pid] = cmd # replace else: self.cmds[cmd.pid].updateFrom(cmd) else: self.cmds[cmd.pid] = cmd self.running += 1 self.processCompletedCommands() def processCompletedCommands(self): """Remove any commands with completed state - should only be called by addCommand when we are adding a new command - at that point we know there are no new records to come for any previous ones as even track output will have been processed""" for pid in self.cmds.keys(): if self.cmds[pid].completed: self.sql_process_insert(self.cmds[pid]) del self.cmds[pid] self.running -= 1 def updateComputedTime(self, pid, computedLapse): if pid in self.cmds: # sum all the compute values of a same command if self.cmds[pid].computedLapse == "NULL": sum = 0.0 else: sum = float(self.cmds[pid].computedLapse) sum += float(computedLapse) self.cmds[pid].computedLapse = str(sum) def updateCompletionTime(self, pid, endTime, completedLapse): if pid in self.cmds: self.cmds[pid].completed = True self.cmds[pid].endTime = endTime self.cmds[pid].completedLapse = str(float(completedLapse)) def processTrackRecords(self, cmd, lines): self.tables = {} tableName = None rpcMsgsIn = rpcMsgsOut = rpcSizeIn = rpcSizeOut = rpcSnd = rpcRcv = 0 rpcHimarkFwd = rpcHimarRev = uCpu = 0 sCpu = diskIn = diskOut = ipcIn = ipcOut = maxRss = pageFaults = 0 readLocks = writeLocks = getRows = posRows = scanRows = 0 putRows = delRow = totalReadWait = totalReadHeld = 0 totalWriteWait = totalWriteHeld = maxReadWait = 0 maxReadHeld = maxWriteWait = maxWriteHeld = 0 # Use line number from original cmd if appropriate if cmd.pid in self.cmds and cmd.processKey == self.cmds[cmd.pid].processKey: cmd.lineNumber = self.cmds[cmd.pid].lineNumber for line in lines: if not RE_TRACK.match(line): break gotMatch = False match = RE_TRACK_LAPSE.match(line) if match: gotMatch = True cmd.completedLapse = match.group(1) if not gotMatch: match = RE_FAILED_AUTH.match(line) if match: cmd.error = "\"" + match.group(1) + "\"" if not gotMatch: match = RE_KILLED_BY.match(line) if match: gotMatch = True cmd.error = "\"" + match.group(1) + "\"" if not gotMatch: match = RE_EXITED.match(line) if match: gotMatch = True cmd.error = "\"" + match.group(1) + "\"" if not gotMatch: match = RE_TRACK_USAGE.match(line) if match: gotMatch = True cmd.setUsage(match.group(1), match.group(2), match.group(3), match.group(4), match.group(5), match.group(6), match.group(7), match.group(8)) if not gotMatch: match = RE_TRACK_RPC2.match(line) if match: gotMatch = True cmd.setRpc(match.group(1), match.group(2), match.group(3), match.group(4), match.group(5), match.group(6), match.group(7), match.group(8)) if not gotMatch: match = RE_TRACK_RPC.match(line) if match: gotMatch = True cmd.setRpc(match.group(1), match.group(2), match.group(3), match.group(4), match.group(5), match.group(6)) if not gotMatch: match = RE_TRACK_TABLE.match(line) if match: gotMatch = True tableName = match.group(1) self.tables[tableName] = Table(cmd.processKey, cmd.lineNumber, tableName) if not gotMatch: match = RE_TRACK_PAGES.match(line) if match: gotMatch = True if tableName in self.tables: self.tables[tableName].setPages(match.group(1), match.group(2), match.group(3)) if not gotMatch: match = RE_TRACK_LOCKS_ROWS.match(line) if match: gotMatch = True if tableName in self.tables: self.tables[tableName].setLocksRows(match.group(1), match.group(2), match.group(3), match.group(4), match.group(5), match.group(6), match.group(7)) if not gotMatch: match = RE_TRACK_TOTAL_LOCK.match(line) if match: gotMatch = True if tableName in self.tables: self.tables[tableName].setTotalLock(match.group(1), match.group(2), match.group(3), match.group(4)) # The max is only reported if there is more than one # lock taken on the table if (self.tables[tableName].readLocks == "NULL" or int(self.tables[tableName].readLocks) < 2) and \ (self.tables[tableName].writeLocks == "NULL" or int(self.tables[tableName].writeLocks) < 2): self.tables[tableName].setMaxLock(match.group(1), match.group(2), match.group(3), match.group(4)) if not gotMatch: match = RE_TRACK_MAX_LOCK.match(line) if match: gotMatch = True if tableName in self.tables: self.tables[tableName].setMaxLock(match.group(1), match.group(2), match.group(3), match.group(4)) if cmd.completedLapse != "NULL": cmd.endTime = dateAdd(cmd.startTime, int(cmd.completedLapse)) else: cmd.endTime = cmd.startTime self.addCommand(cmd, True) try: if python3: for k, v in self.tables.items(): self.sql_tableUse_insert(v) else: for table in self.tables.values(): self.sql_tableUse_insert(table) except Exception as e: print("%s, lineNo %d, cmd lineNo %d, %s, %s, %s" % (str(e), self.lineNo, cmd.lineNumber, cmd.processKey, cmd.name, cmd.args)) def flush_output(self): if python3: for p, v in self.cmds.items(): self.sql_process_insert(v) else: for p, v in self.cmds.iteritems(): self.sql_process_insert(v) self.output("", file=sys.stderr) def sql_process_insert(self, cmd): if self.outputRequired(): query = 'INSERT IGNORE INTO process VALUES ("%s",%d,%d,%s,%s,%s,%s,"%s","%s","%s","%s","%s","%s",%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%d,%s);' % \ (cmd.processKey, cmd.lineNumber, cmd.pid, cmd.startTime, cmd.endTime, cmd.computedLapse, cmd.completedLapse, cmd.user, cmd.workspace, cmd.ip, cmd.app, cmd.name, cmd.args, cmd.rpcMsgsIn, cmd.rpcMsgsOut, cmd.rpcSizeIn, cmd.rpcSizeOut, cmd.rpcHimarkFwd, cmd.rpcHimarkRev, cmd.rpcSnd, cmd.rpcRcv, cmd.uCpu, cmd.sCpu, cmd.diskIn, cmd.diskOut, cmd.ipcIn, cmd.ipcOut, cmd.maxRss, cmd.pageFaults, cmd.running, cmd.error) self.output(query) if self.options.sql: try: self.cursor.execute('INSERT INTO process VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)', (cmd.processKey, cmd.lineNumber, cmd.pid, cmd.startTime, nullValue(cmd.endTime), nullValue(cmd.computedLapse), nullValue(cmd.completedLapse), nullValue(cmd.user), nullValue(cmd.workspace), nullValue(cmd.ip), nullValue(cmd.app), nullValue(cmd.name), nullValue(cmd.args), nullValue(cmd.rpcMsgsIn), nullValue(cmd.rpcMsgsOut), nullValue(cmd.rpcSizeIn), nullValue(cmd.rpcSizeOut), nullValue(cmd.rpcHimarkFwd), nullValue(cmd.rpcHimarkRev), nullValue(cmd.rpcSnd), nullValue(cmd.rpcRcv), nullValue(cmd.uCpu), nullValue(cmd.sCpu), nullValue(cmd.diskIn), nullValue(cmd.diskOut), nullValue(cmd.ipcIn), nullValue(cmd.ipcOut), nullValue(cmd.maxRss), nullValue(cmd.pageFaults), nullValue(cmd.running), nullValue(cmd.error))) except Exception as e: raise Exception("%s: %s, %d, %d, %s" % (str(e), cmd.processKey, cmd.lineNumber, cmd.pid, cmd.name)) self.db_commit_updates() def sql_tableUse_insert(self, tab): if self.outputRequired(): query = 'INSERT IGNORE INTO tableUse VALUES ("%s",%d,"%s",%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,' \ '%s,%s,%s,%s);' % \ (tab.processKey, tab.lineNumber, tab.tableName, tab.pagesIn, tab.pagesOut, tab.pagesCached, tab.readLocks, tab.writeLocks, tab.getRows, tab.posRows, tab.scanRows, tab.putRows, tab.delRow, tab.totalReadWait, tab.totalReadHeld, tab.totalWriteWait, tab.totalWriteHeld, tab.maxReadWait, tab.maxReadHeld, tab.maxWriteWait, tab.maxWriteHeld) self.output(query) if self.options.sql: try: self.cursor.execute('INSERT INTO tableUse VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)', (tab.processKey, tab.lineNumber, tab.tableName, nullValue(tab.pagesIn), nullValue(tab.pagesOut), nullValue(tab.pagesCached), nullValue(tab.readLocks), nullValue(tab.writeLocks), nullValue(tab.getRows), nullValue(tab.posRows), nullValue(tab.scanRows), nullValue(tab.putRows), nullValue(tab.delRow), nullValue(tab.totalReadWait), nullValue(tab.totalReadHeld), nullValue(tab.totalWriteWait), nullValue(tab.totalWriteHeld), nullValue(tab.maxReadWait), nullValue(tab.maxReadHeld), nullValue(tab.maxWriteWait), nullValue(tab.maxWriteHeld))) except Exception as e: raise Exception("%s: %s, %d, %s" % (str(e), tab.processKey, tab.lineNumber, tab.tableName)) self.db_commit_updates() def processInfoBlock(self, block): cmd = None i = 0 # First line of block is info line - process the rest for line in block.lines[1:]: i += 1 # Check for track lines and once we have found one, process them all and finish if cmd: match = RE_TRACK.match(line) if match: self.processTrackRecords(cmd, block.lines[i:]) break # Block has been processed match = RE_CMD_NOARG.match(line) if not match: match = RE_CMD.match(line) if match: pid = int(match.group(2)) startTime = parseDateTime(match.group(1)) user = match.group(3) workspace = match.group(4) ip = match.group(5) # following gsub required due to a 2009.2 P4V bug app = match.group(6).replace("\x00", "/") name = match.group(7) cmdArgs = None if len(match.groups()) == 8: cmdArgs = match.group(8) # Strip Swarm/Git Fusion commands with lots of json smatch = re.search(RE_JSON_CMDARGS, cmdArgs) if smatch: cmdArgs = smatch.group(1) m = hashlib.md5() line = re.sub(r'[^\x00-\x7F]', '?', line) # Do this so we can compare results with ruby version if python3: m.update(line.encode()) else: m.update(line) processKey = m.hexdigest() cmd = Command(processKey, block.lineNo, pid, startTime, user, workspace, ip, app, name, cmdArgs) self.addCommand(cmd, False) continue # Pattern matching a completed line match = RE_COMPLETED.match(line) if match: pid = int(match.group(2)) endTime = parseDateTime(match.group(1)) completedLapse = match.group(3) self.updateCompletionTime(pid, endTime, completedLapse) continue # Pattern matching a compute line match = RE_COMPUTE.match(line) if match: pid = int(match.group(2)) computedLapse = float(match.group(3)) self.updateComputedTime(pid, computedLapse) continue def blockEnd(self, line): "Blank line or one of terminators" terminators = ["Perforce server info:", "Perforce server error:" "locks acquired by blocking after", "Rpc himark:"] if line == "" or line == "\n" or line == "\r\n": return True for t in terminators: if line[:len(t)] == t: return True return False def blockInfo(self, line): t = "Perforce server info:" if line[:len(t)] == t: return True return False def processLog(self): block = Block() line = self.readLogfile() while line: if self.blockEnd(line): if block.lines: if self.blockInfo(block.lines[0]): self.processInfoBlock(block) # TODO: process errors etc block = Block() block.addLine(line, self.lineNo) else: block.addLine(line, self.lineNo) line = self.readLogfile() if block.lines: if self.blockInfo(block.lines[0]): self.processInfoBlock(block) self.terminate() ###################### # START OF MAIN SCRIPT ###################### RE_INFO = re.compile('^Perforce server info:') RE_EMPTY = re.compile('^\s*$') RE_JSON_CMDARGS = re.compile('^(.*) \{.*\}$') RE_CMD = re.compile('^\t(\d+/\d+/\d+ \d+:\d+:\d+) pid (\d+) (.*)@(.*) (.*) \[(.*)\] \'(\w+-\w+) (.*)\'.*') RE_CMD_NOARG = re.compile('^\t(\d+/\d+/\d+ \d+:\d+:\d+) pid (\d+) (.*)@(.*) (.*) \[(.*)\] \'(\w+-\w+)\'.*') RE_COMPUTE = re.compile('^\t(\d+/\d+/\d+ \d+:\d+:\d+) pid (\d+) compute end ([0-9]+|[0-9]+\.[0-9]+|\.[0-9]+)s.*') RE_COMPLETED = re.compile('^\t(\d+/\d+/\d+ \d+:\d+:\d+) pid (\d+) completed ([0-9]+|[0-9]+\.[0-9]+|\.[0-9]+)s.*') RE_TRACK = re.compile('^---|^locks acquired by blocking after 3 non-blocking attempts') RE_TRACK_LAPSE = re.compile('^--- lapse (\d+)') RE_TRACK_RPC = re.compile('^--- rpc msgs/size in\+out (\d+)\+(\d+)/(\d+)mb\+(\d+)mb himarks (\d+)/(\d+)') RE_TRACK_RPC2 = re.compile('^--- rpc msgs/size in\+out (\d+)\+(\d+)/(\d+)mb\+(\d+)mb himarks (\d+)/(\d+) snd/rcv ([0-9]+|[0-9]+\.[0-9]+|\.[0-9]+)s/([0-9]+|[0-9]+\.[0-9]+|\.[0-9]+)s') RE_TRACK_USAGE = re.compile('^--- usage (\d+)\+(\d+)us (\d+)\+(\d+)io (\d+)\+(\d+)net (\d+)k (\d+)pf') RE_FAILED_AUTH = re.compile('^--- (failed authentication check)') RE_KILLED_BY = re.compile('^--- (killed by .*)') RE_EXITED = re.compile('^--- (exited on fatal server error)') RE_TRACK_TABLE = re.compile('^--- db.([a-zA-Z]*)') RE_TRACK_PAGES = re.compile('^--- pages in\+out\+cached (\d+)\+(\d+)\+(\d+)') RE_TRACK_LOCKS_ROWS = re.compile( '^--- locks read/write (\d+)/(\d+) rows get\+pos\+scan put\+del (\d+)\+(\d+)\+(\d+) (\d+)\+(\d+)') RE_TRACK_TOTAL_LOCK = re.compile('^--- total lock wait\+held read/write (\d+)ms\+(\d+)ms/(\d+)ms\+(\d+)ms') RE_TRACK_MAX_LOCK = re.compile( '^--- max lock wait\+held read/write (\d+)ms\+(\d+)ms/(\d+)ms\+(\d+)ms|--- locks wait+held read/write (\d+)ms\+(\d+)ms/(\d+)ms\+(\d+)ms') def main(): parser = argparse.ArgumentParser(add_help=True) parser.add_argument('logfile', nargs=1, help='log file to process') parser.add_argument('-d', '--dbname', help="Database name to use", default=None) parser.add_argument('-o', '--output', help="Name of file to print SQL to (if not specified then no output)", default=None) parser.add_argument('-s', '--sql', help="Use local SQL Lite database", action='store_true', default=False) try: options = parser.parse_args() except: parser.print_help() sys.exit(1) if not options.output and not options.sql: print("Please specify either an output file or -s/--sql") parser.print_help() sys.exit(1) log2sql = Log2sql(options) log2sql.processLog() if __name__ == '__main__': main()
# | Change | User | Description | Committed | |
---|---|---|---|---|---|
#5 | 23765 | Robert Cowham | Moved things down one level to psla dir to make it easier to see what belongs | ||
#4 | 23719 | Robert Cowham | Log warnings for insert exceptions | ||
#3 | 23683 | Robert Cowham | Bring in latest changes - performance improvements | ||
#2 | 23636 | Robert Cowham |
Merge in latests changes to log2sql.py - much faster - better at detecting completed commands and out of order vtrack - full test harness |
||
#1 | 22465 | Robert Cowham |
Added tests for r99.2 created depot (no filesize/digest stored) Fixed problem with preview failing comparison Add --p4d option to tests to use different p4d version |
||
//guest/robert_cowham/perforce/utils/log_analysis/log2sql.py | |||||
#22 | 21796 | Robert Cowham | Handle rmt-FileFetch records which may be duplicates as they don't have 'completed' entries. | ||
#21 | 21780 | Robert Cowham |
Don't drop tables when creating sqlite db Performance tweaks after profiling - particularly date parsing. |
||
#20 | 21713 | Robert Cowham | Ignore temporary error inserting into trackuse table | ||
#19 | 21712 | Robert Cowham | Check for correct line number when looking to update for track entries | ||
#18 | 21708 | Robert Cowham |
Tweak parameters to improve error messages. Make output to stdout optional |
||
#17 | 21705 | Robert Cowham |
Minor refactoring for clarity. Process all completed records as we go. |
||
#16 | 21702 | Robert Cowham |
Rework algorithm to process blocks at a time, and to avoid the SQL update statements. Risk of increased memory vs improved speed. |
||
#15 | 21697 | Robert Cowham | Parse rcpSnd/rpcRcv and add to database. | ||
#14 | 21694 | Robert Cowham | Handle null values properly | ||
#13 | 21672 | Robert Cowham | Improve performance when inserting rows | ||
#12 | 21660 | Robert Cowham | Handle utf8 in python2 strings | ||
#11 | 21656 | Robert Cowham | Avoid SQL quoting issues with alternative insert statement. | ||
#10 | 21643 | Robert Cowham | Handle git-fusion entries (strip the json) - for --sql option | ||
#9 | 21402 | Robert Cowham |
Filter out swarm commands Test for db content in a basic fashion. |
||
#8 | 21393 | Robert Cowham |
Add a swarm test - large record. Improve performance by skipping easy matches early. |
||
#7 | 21378 | Robert Cowham | Batch updates and make SQL date arithmetic work with SQLite | ||
#6 | 21374 | Robert Cowham | Write to sqllite db | ||
#5 | 21368 | Robert Cowham | Make tests work under python3 | ||
#4 | 21367 | Robert Cowham | Restructured for unit tests, and first unit tests. | ||
#3 | 21366 | Robert Cowham | Fix problems with NULL values and int | ||
#2 | 21365 | Robert Cowham | Equivalent of Ruby version on smaller sets | ||
#1 | 21361 | Robert Cowham | Sort of works... |