#!/usr/bin/env python3 # -*- encoding: UTF8 -*- ############################################################################## # # Copyright (c) 2008,2016 Perforce Software, Inc. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL PERFORCE SOFTWARE, INC. BE LIABLE FOR ANY # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # = Description # # Output SQL queries generated by parsing commands logged in a Perforce # server log file. # The output can be imported directly in any SQL database. # # log2sql populates the following tables: # # +----------------+----------+------+-----+ # | process | # +----------------+----------+------+-----+ # | Field | Type | Null | Key | # +----------------+----------+------+-----+ # | processKey | char(32) | NO | PRI | # | lineNumber | int | NO | PRI | # | startTime | date | NO | | # | endTime | date | YES | | # | computedLapse | float | YES | | # | completedLapse | float | YES | | # | pid | int | NO | | # | user | text | NO | | # | workspace | text | NO | | # | ip | text | NO | | # | app | text | NO | | # | cmd | text | NO | | # | args | text | YES | | # | uCpu | int | YES | | # | sCpu | int | YES | | # | diskIn | int | YES | | # | diskOut | int | YES | | # | ipcIn | int | YES | | # | ipcOut | int | YES | | # | maxRss | int | YES | | # | pageFaults | int | YES | | # | rpcMsgsIn | int | YES | | # | rpcMsgsOut | int | YES | | # | rpcSizeIn | int | YES | | # | rpcSizeOut | int | YES | | # | rpcHimarkFwd | int | YES | | # | rpcHimarkRev | int | YES | | # | running | int | NO | | # | error | text | YES | | # +----------------+----------+------+-----+ # # +----------------+--------------+------+-----+ # | tableUse | # +----------------+--------------+------+-----+ # | Field | Type | Null | Key | # +----------------+--------------+------+-----+ # | processKey | char(32) | NO | PRI | # | lineNumber | int | NO | PRI | # | tableName | varchar(255) | NO | PRI | # | pagesIn | int | YES | | # | pagesOut | int | YES | | # | pagesCached | int | YES | | # | readLocks | int | YES | | # | writeLocks | int | YES | | # | getRows | int | YES | | # | posRows | int | YES | | # | scanRows | int | YES | | # | putRows | int | YES | | # | delRows | int | YES | | # | totalReadWait | int | YES | | # | totalReadHeld | int | YES | | # | totalWriteWait | int | YES | | # | totalWriteHeld | int | YES | | # | maxReadWait | int | YES | | # | maxReadHeld | int | YES | | # | maxWriteWait | int | YES | | # | maxWriteHeld | int | YES | | # +----------------+--------------+------+-----+ # # = Usage # # log2sql.rb -d <database_name> log_file # # = Requirements # # Ruby: http://www.ruby-lang.org # ############################################################################## from __future__ import print_function import re import sys import os import math import argparse import fileinput import datetime import hashlib import sqlite3 from collections import OrderedDict python3 = sys.version_info[0] >= 3 RowsPerTransaction = 1000 # Commit every so many rows if in SQL mode def escapeChar(str): str = str.replace("\\", "\\\\") str = str.replace("\"", "\\\"") return str def parseDateTime(str): # \d+/\d+/\d+ \d+:\d+:\d+ date = datetime.datetime.strptime(str, "%Y/%m/%d %H:%M:%S") return date.strftime("\"%Y-%m-%d %H:%M:%S\"") def nullValue(str): if str == "NULL": return None return str class Command: processKey = None pid = 0 startTime = None endTime = None computedLapse = 0.0 completedLapse = 0.0 user = None workspace = None ip = None app = None name = None args = None uCpu = None sCpu = None diskIn = None diskOut = None ipcIn = None ipcOut = None maxRss = None pageFaults = None rpcMsgsIn = None rpcMsgsOut = None rpcSizeIn = None rpcSizeOut = None rpcHimarkFwd = None rpcHimarkRev = None running = 0 lineNumber = 0 error = None def __init__(self, processKey, lineNumber, pid, startTime, user, workspace, ip, app, name, args): self.processKey = processKey self.lineNumber = lineNumber self.pid = pid self.startTime = startTime self.user = escapeChar(user) self.workspace = escapeChar(workspace) self.ip = ip self.app = app self.name = name if args: self.args = escapeChar(args) else: self.args = "NULL" self.endTime = self.computedLapse = self.completedLapse = self.uCpu = "NULL" self.sCpu = self.diskIn = self.diskOut = self.ipcIn = self.ipcOut = self.maxRss = "NULL" self.pageFaults = self.rpcMsgsIn = self.rpcMsgsOut = self.rpcSizeOut = "NULL" self.rpcSizeIn = self.rpcHimarkFwd = self.rpcHimarkRev = self.error = "NULL" self.running = 0 def setUsage(self, uCpu, sCpu, diskIn, diskOut, ipcIn, ipcOut, maxRss, pageFaults): self.uCpu = uCpu self.sCpu = sCpu self.diskIn = diskIn self.diskOut = diskOut self.ipcIn = ipcIn self.ipcOut = ipcOut self.maxRss = maxRss self.pageFaults = pageFaults def setRpc(self, rpcMsgsIn, rpcMsgsOut, rpcSizeIn, rpcSizeOut, rpcHimarkFwd, rpcHimarkRev): self.rpcMsgsIn = rpcMsgsIn self.rpcMsgsOut = rpcMsgsOut self.rpcSizeIn = rpcSizeIn self.rpcSizeOut = rpcSizeOut self.rpcHimarkFwd = rpcHimarkFwd self.rpcHimarkRev = rpcHimarkRev class Table: processKey = None lineNumber = 0 tableName = None pagesIn = None pagesOut = None pagesCached = None readLocks = None writeLocks = None getRows = None posRows = None scanRows = None putRows = None delRow = None totalReadWait = None totalReadHeld = None totalWriteWait = None totalWriteHeld = None maxReadWait = None maxReadHeld = None maxWriteWait = None maxWriteHeld = None def __init__(self, processKey, lineNumber, tableName): self.processKey = processKey self.lineNumber = lineNumber self.tableName = tableName self.pagesIn = self.pagesOut = self.pagesCached = "NULL" self.readLocks = self.writeLocks = self.getRows = self.posRows = self.scanRows = "NULL" self.putRows = self.delRow = self.totalReadWait = self.totalReadHeld = "NULL" self.totalWriteWait = self.totalWriteHeld = self.maxReadWait = "NULL" self.maxReadHeld = self.maxWriteWait = self.maxWriteHeld = "NULL" def setPages(self, pagesIn, pagesOut, pagesCached): self.pagesIn = pagesIn self.pagesOut = pagesOut self.pagesCached = pagesCached def setLocksRows(self, readLocks, writeLocks, getRows, posRows, scanRows, putRows, delRow): self.readLocks = readLocks self.writeLocks = writeLocks self.getRows = getRows self.posRows = posRows self.scanRows = scanRows self.putRows = putRows self.delRow = delRow def setTotalLock(self, totalReadWait, totalReadHeld, totalWriteWait, totalWriteHeld): self.totalReadWait = totalReadWait self.totalReadHeld = totalReadHeld self.totalWriteWait = totalWriteWait self.totalWriteHeld = totalWriteHeld def setMaxLock(self, maxReadWait, maxReadHeld, maxWriteWait, maxWriteHeld): self.maxReadWait = maxReadWait self.maxReadHeld = maxReadHeld self.maxWriteWait = maxWriteWait self.maxWriteHeld = maxWriteHeld class FifoList: max = None list = None def __init__(self, max): self.list = [] self.max = max def add(self, cmd): if (len(self.list) + 1) > self.max: del self.list[0] self.list.append(cmd) def remove(self, cmd): for i, val in enumerate(self.list): if (self.list[i].processKey == cmd.processKey): del self.list[i] break def getLineNumber(self, cmd): lineNumber = 0 for p in self.list: if (p.processKey == cmd.processKey): lineNumber = p.lineNumber break return lineNumber class Log2sql: logname = None dbname = None logfile = None ckpSize = None readBytes = None mod = None cmds = None lastCompletedCmds = None lineNo = 0 def __init__(self, options, instream=None, outstream=None): self.logname = options.logname self.dbname = options.dbname self.options = options self.outstream = outstream # For testing if outstream is None: self.outstream = sys.stdout if instream is None: self.logfile = open(self.logname, "r") self.ckpSize = os.stat(self.logname).st_size else: self.logfile = instream self.ckpSize = 500 if self.options.sql: self.conn = sqlite3.connect("%s.db" % self.dbname) self.conn.text_factory = str self.cursor = self.conn.cursor() self.cursor.execute("PRAGMA synchronous = OFF") self.cursor.execute("PRAGMA journal_mode = MEMORY") self.cmds = {} self.tables = {} self.lastCompletedCmds = FifoList(1000) self.readBytes = 0.0 self.mod = 10.0 self.running = 0 self.rowCount = 0 self.db_create_database() self.db_create_process() self.db_create_tableUse() #self.db_commit(False) # if self.options.sql: # if self.options.sql: # self.conn.isolation_level = 'DEFERRED' query = "BEGIN TRANSACTION;" self.output(query) if self.options.sql: self.conn.execute(query) self.output("Processing %s: 0%%" % self.logname, file=sys.stderr) def output(self, text, file=sys.stdout): if (file == sys.stderr): print(text, file=file) # elif not self.options.sql: else: self.outstream.write("%s\n" % text) def terminate(self): self.flush_output() self.db_commit(True) if self.options.sql: self.conn.commit() self.conn.close() def readLogfile(self): line = self.logfile.readline() if line: self.lineNo += 1 # TODO # if line.respond_to?(:encode) # line = line.encode("utf-8", 'binary', :invalid => :replace, :undef => :replace) #line = line.decode('utf-8', errors='ignore') self.readBytes += len(line) if math.floor(((self.readBytes / self.ckpSize) * 100.0) / self.mod) == 1.0: self.output("...%d%%" % self.mod, file=sys.stderr) # $stderr.flush self.mod += 10 return line def getLineNumber(self): return self.lineNo def db_commit_updates(self): self.rowCount += 1 if self.rowCount % RowsPerTransaction == 0: query = "COMMIT;" self.output(query) if self.options.sql: self.conn.commit() query = "BEGIN TRANSACTION;" self.output(query) if self.options.sql: self.conn.execute(query) def db_commit(self, state): if (state): query = "COMMIT;" self.output(query) if self.options.sql: self.conn.commit() query = "BEGIN TRANSACTION;" self.output(query) if self.options.sql: self.conn.execute(query) else: query = "SET autocommit=0;" self.output(query) def db_create_database(self): query = "CREATE DATABASE IF NOT EXISTS " + self.dbname + ";" self.output(query) query = "USE " + self.dbname + ";" self.output(query) def db_create_process(self): query = "DROP TABLE IF EXISTS process;" self.output(query) if self.options.sql: self.cursor.execute(query) query = "CREATE TABLE process (processkey CHAR(32) NOT NULL, lineNumber INT NOT NULL, pid INT NOT NULL, " \ "startTime DATETIME NOT NULL,endTime DATETIME NULL, computedLapse FLOAT NULL,completedLapse FLOAT NULL, " \ "user TEXT NOT NULL, workspace TEXT NOT NULL, ip TEXT NOT NULL, app TEXT NOT NULL, cmd TEXT NOT NULL, " \ "args TEXT NULL, uCpu INT NULL, sCpu INT NULL, diskIn INT NULL, diskOut INT NULL, ipcIn INT NULL, " \ "ipcOut INT NULL, maxRss INT NULL, pageFaults INT NULL, rpcMsgsIn INT NULL, rpcMsgsOut INT NULL, " \ "rpcSizeIn INT NULL, rpcSizeOut INT NULL, rpcHimarkFwd INT NULL, rpcHimarkRev INT NULL, running INT NULL, " \ "error TEXT NULL, PRIMARY KEY (processkey, lineNumber));" self.output(query) if self.options.sql: self.cursor.execute(query) def db_create_tableUse(self): query = "DROP TABLE IF EXISTS tableUse;" self.output(query) if self.options.sql: self.cursor.execute(query) query = "CREATE TABLE tableUse (processkey CHAR(32) NOT NULL, lineNumber INT NOT NULL, " \ "tableName VARCHAR(255) NOT NULL, pagesIn INT NULL, pagesOut INT NULL, pagesCached INT NULL, " \ "readLocks INT NULL, writeLocks INT NULL, getRows INT NULL, posRows INT NULL, scanRows INT NULL, " \ "putRows int NULL, delRows INT NULL, totalReadWait INT NULL, totalReadHeld INT NULL, " \ "totalWriteWait INT NULL, totalWriteHeld INT NULL, maxReadWait INT NULL, maxReadHeld INT NULL, " \ "maxWriteWait INT NULL, maxWriteHeld INT NULL, PRIMARY KEY (processkey, lineNumber, tableName));" self.output(query) if self.options.sql: self.cursor.execute(query) def addCommand(self, cmd, vtrack): cmd.running = self.running self.cmds[cmd.pid] = cmd if not vtrack: self.running += 1 def deleteCommand(self, cmd, vtrack): if cmd.pid in self.cmds: self.sql_process_insert(self.cmds[cmd.pid]) del self.cmds[cmd.pid] if not vtrack: self.running -= 1 def computed(self, pid, computedLapse): if pid in self.cmds: # sum all the compute values of a same command if self.cmds[pid].computedLapse == "NULL": sum = 0.0 else: sum = float(self.cmds[pid].computedLapse) sum += float(computedLapse) self.cmds[pid].computedLapse = str(sum) def completed(self, pid, endTime, completedLapse): if pid in self.cmds: self.cmds[pid].endTime = endTime self.cmds[pid].completedLapse = str(float(completedLapse)) self.lastCompletedCmds.add(self.cmds[pid]) self.deleteCommand(self.cmds[pid], False) def track(self, cmd, line): self.tables = {} tableName = None rpcMsgsIn = rpcMsgsOut = rpcSizeIn = rpcSizeOut = 0 rpcHimarkFwd = rpcHimarRev = uCpu = 0 sCpu = diskIn = diskOut = ipcIn = ipcOut = maxRss = pageFaults = 0 readLocks = writeLocks = getRows = posRows = scanRows = 0 putRows = delRow = totalReadWait = totalReadHeld = 0 totalWriteWait = totalWriteHeld = maxReadWait = 0 maxReadHeld = maxWriteWait = maxWriteHeld = 0 lineNumber = self.lastCompletedCmds.getLineNumber(cmd) if lineNumber > 0: cmd.lineNumber = lineNumber self.lastCompletedCmds.remove(cmd) else: self.addCommand(cmd, True) while line and RE_TRACK.match(line): gotMatch = False match = RE_TRACK_LAPSE.match(line) if match: gotMatch = True cmd.completedLapse = match.group(1) if not gotMatch: match = RE_FAILED_AUTH.match(line) if match: cmd.error = "\"" + match.group(1) + "\"" if not gotMatch: match = RE_KILLED_BY.match(line) if match: gotMatch = True cmd.error = "\"" + match.group(1) + "\"" if not gotMatch: match = RE_EXITED.match(line) if match: gotMatch = True cmd.error = "\"" + match.group(1) + "\"" if not gotMatch: match = RE_TRACK_USAGE.match(line) if match: gotMatch = True cmd.setUsage(match.group(1), match.group(2), match.group(3), match.group(4), match.group(5), match.group(6), match.group(7), match.group(8)) if not gotMatch: match = RE_TRACK_RPC.match(line) if match: gotMatch = True cmd.setRpc(match.group(1), match.group(2), match.group(3), match.group(4), match.group(5), match.group(6)) if not gotMatch: match = RE_TRACK_TABLE.match(line) if match: gotMatch = True tableName = match.group(1) self.tables[tableName] = Table(cmd.processKey, cmd.lineNumber, tableName) if not gotMatch: match = RE_TRACK_PAGES.match(line) if match: gotMatch = True if tableName in self.tables: self.tables[tableName].setPages(match.group(1), match.group(2), match.group(3)) if not gotMatch: match = RE_TRACK_LOCKS_ROWS.match(line) if match: gotMatch = True if tableName in self.tables: self.tables[tableName].setLocksRows(match.group(1), match.group(2), match.group(3), match.group(4), match.group(5), match.group(6), match.group(7)) if not gotMatch: match = RE_TRACK_TOTAL_LOCK.match(line) if match: gotMatch = True if tableName in self.tables: self.tables[tableName].setTotalLock(match.group(1), match.group(2), match.group(3), match.group(4)) # The max is only reported if there is more than one # lock taken on the table if (self.tables[tableName].readLocks == "NULL" or int(self.tables[tableName].readLocks) < 2) and \ (self.tables[tableName].writeLocks == "NULL" or int(self.tables[tableName].writeLocks) < 2): self.tables[tableName].setMaxLock(match.group(1), match.group(2), match.group(3), match.group(4)) if not gotMatch: match = RE_TRACK_MAX_LOCK.match(line) if match: gotMatch = True if tableName in self.tables: self.tables[tableName].setMaxLock(match.group(1), match.group(2), match.group(3), match.group(4)) line = self.readLogfile() self.deleteCommand(cmd, True) self.sql_process_update(cmd) if python3: for k, v in self.tables.items(): self.sql_tableUse_insert(v) else: for table in self.tables.values(): self.sql_tableUse_insert(table) def flush_output(self): if python3: for p, v in self.cmds.items(): self.sql_process_insert(v) else: for p, v in self.cmds.iteritems(): self.sql_process_insert(v) self.output("", file=sys.stderr) def sql_process_insert(self, cmd): query = 'INSERT IGNORE INTO process VALUES ("%s",%d,%d,%s,%s,%s,%s,"%s","%s","%s","%s","%s","%s",%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%d,%s);' % \ (cmd.processKey, cmd.lineNumber, cmd.pid, cmd.startTime, cmd.endTime, cmd.computedLapse, cmd.completedLapse, cmd.user, cmd.workspace, cmd.ip, cmd.app, cmd.name, cmd.args, cmd.rpcMsgsIn, cmd.rpcMsgsOut, cmd.rpcSizeIn, cmd.rpcSizeOut, cmd.rpcHimarkFwd, cmd.rpcHimarkRev, cmd.uCpu, cmd.sCpu, cmd.diskIn, cmd.diskOut, cmd.ipcIn, cmd.ipcOut, cmd.maxRss, cmd.pageFaults, cmd.running, cmd.error) self.output(query) if self.options.sql: self.cursor.execute('INSERT INTO process VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)', (cmd.processKey, cmd.lineNumber, cmd.pid, cmd.startTime, nullValue(cmd.endTime), nullValue(cmd.computedLapse), nullValue(cmd.completedLapse), nullValue(cmd.user), nullValue(cmd.workspace), nullValue(cmd.ip), nullValue(cmd.app), nullValue(cmd.name), nullValue(cmd.args), nullValue(cmd.rpcMsgsIn), nullValue(cmd.rpcMsgsOut), nullValue(cmd.rpcSizeIn), nullValue(cmd.rpcSizeOut), nullValue(cmd.rpcHimarkFwd), nullValue(cmd.rpcHimarkRev), nullValue(cmd.uCpu), nullValue(cmd.sCpu), nullValue(cmd.diskIn), nullValue(cmd.diskOut), nullValue(cmd.ipcIn), nullValue(cmd.ipcOut), nullValue(cmd.maxRss), nullValue(cmd.pageFaults), nullValue(cmd.running), nullValue(cmd.error))) self.db_commit_updates() def sql_process_update(self, cmd): query = "UPDATE process SET uCpu=%s,sCpu=%s,diskIn=%s,diskOut=%s,ipcIn=%s,ipcOut=%s,maxRss=%s,pageFaults=%s," \ "rpcMsgsIn=%s,rpcMsgsOut=%s,rpcSizeIn=%s,rpcSizeOut=%s,rpcHimarkFwd=%s,rpcHimarkRev=%s,error=%s " \ "WHERE processKey = \"%s\" and lineNumber = %d;" % \ (cmd.uCpu, cmd.sCpu, cmd.diskIn, cmd.diskOut, cmd.ipcIn, cmd.ipcOut, cmd.maxRss, cmd.pageFaults, cmd.rpcMsgsIn, cmd.rpcMsgsOut, cmd.rpcSizeIn, cmd.rpcSizeOut, cmd.rpcHimarkFwd, cmd.rpcHimarkRev, cmd.error, cmd.processKey, cmd.lineNumber) self.output(query) if self.options.sql: self.cursor.execute(query) if cmd.completedLapse != "NULL": dateFunction = "DATE_ADD(startTime,INTERVAL %d SECOND)" % int(cmd.completedLapse) if self.options.sql: dateFunction = "DATETIME(startTime,'+%d SECONDs')" % int(cmd.completedLapse) query = 'UPDATE process SET endTime=%s WHERE processKey = "%s" and ' \ 'lineNumber = %d and endTime IS NULL;' % \ (dateFunction, cmd.processKey, cmd.lineNumber) else: query = 'UPDATE process SET endTime=startTime WHERE processKey = "%s" and lineNumber = %d and endTime IS NULL;' % \ (cmd.processKey, cmd.lineNumber) self.output(query) if self.options.sql: self.cursor.execute(query) self.db_commit_updates() def sql_tableUse_insert(self, tab): query = 'INSERT IGNORE INTO tableUse VALUES ("%s",%d,"%s",%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,' \ '%s,%s,%s,%s);' % \ (tab.processKey, tab.lineNumber, tab.tableName, tab.pagesIn, tab.pagesOut, tab.pagesCached, tab.readLocks, tab.writeLocks, tab.getRows, tab.posRows, tab.scanRows, tab.putRows, tab.delRow, tab.totalReadWait, tab.totalReadHeld, tab.totalWriteWait, tab.totalWriteHeld, tab.maxReadWait, tab.maxReadHeld, tab.maxWriteWait, tab.maxWriteHeld) self.output(query) if self.options.sql: self.cursor.execute('INSERT INTO tableUse VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)', (tab.processKey, tab.lineNumber, tab.tableName, nullValue(tab.pagesIn), nullValue(tab.pagesOut), nullValue(tab.pagesCached), nullValue(tab.readLocks), nullValue(tab.writeLocks), nullValue(tab.getRows), nullValue(tab.posRows), nullValue(tab.scanRows), nullValue(tab.putRows), nullValue(tab.delRow), nullValue(tab.totalReadWait), nullValue(tab.totalReadHeld), nullValue(tab.totalWriteWait), nullValue(tab.totalWriteHeld), nullValue(tab.maxReadWait), nullValue(tab.maxReadHeld), nullValue(tab.maxWriteWait), nullValue(tab.maxWriteHeld))) self.db_commit_updates() def processLog(self): line = self.readLogfile() while line: nextLine = None if RE_EMPTY.match(line) or RE_INFO.match(line): line = self.readLogfile() continue # # Pattern matching a command # gotMatch = False match = RE_CMD.match(line) if not match: match = RE_CMD_NOARG.match(line) if match: gotMatch = True pid = int(match.group(2)) startTime = parseDateTime(match.group(1)) user = match.group(3) workspace = match.group(4) ip = match.group(5) # following gsub required due to a 2009.2 P4V bug app = match.group(6).replace("\x00", "/") name = match.group(7) cmdArgs = None if len(match.groups()) == 8: cmdArgs = match.group(8) # Strip Swarm/Git Fusion commands with lots of json smatch = re.search(RE_JSON_CMDARGS, cmdArgs) if smatch: cmdArgs = smatch.group(1) m = hashlib.md5() line = re.sub(r'[^\x00-\x7F]', '?', line) # Do this so we can compare results with ruby version if python3: m.update(line.encode()) else: m.update(line) processKey = m.hexdigest() cmd = Command(processKey, self.getLineNumber(), pid, startTime, user, workspace, ip, app, name, cmdArgs) nextLine = self.readLogfile() if nextLine: match = RE_TRACK.match(nextLine) if match: self.track(cmd, nextLine) else: self.addCommand(cmd, False) else: self.addCommand(cmd, False) # # Pattern matching a compute line # if not gotMatch: match = RE_COMPUTE.match(line) if match: gotMatch = True pid = int(match.group(2)) computedLapse = float(match.group(3)) self.computed(pid, computedLapse) # # Pattern matching a completed line # if not gotMatch: match = RE_COMPLETED.match(line) if match: gotMatch = True pid = int(match.group(2)) endTime = parseDateTime(match.group(1)) completedLapse = match.group(3) self.completed(pid, endTime, completedLapse) if nextLine: line = nextLine else: line = self.readLogfile() self.terminate() def croakusage(): print("Usage: log2sql.py -d <database_name> [ --sql ] log_filename") sys.exit(1) ###################### # START OF MAIN SCRIPT ###################### RE_INFO = re.compile('^Perforce server info:') RE_EMPTY = re.compile('^\s*$') RE_JSON_CMDARGS = re.compile('^(.*) \{.*\}$') RE_CMD = re.compile('^\t(\d+/\d+/\d+ \d+:\d+:\d+) pid (\d+) (.*)@(.*) (.*) \[(.*)\] \'(\w+-\w+) (.*)\'.*') RE_CMD_NOARG = re.compile('^\t(\d+/\d+/\d+ \d+:\d+:\d+) pid (\d+) (.*)@(.*) (.*) \[(.*)\] \'(\w+-\w+)\'.*') RE_COMPUTE = re.compile('^\t(\d+/\d+/\d+ \d+:\d+:\d+) pid (\d+) compute end ([0-9]+|[0-9]+\.[0-9]+|\.[0-9]+)s .*') RE_COMPLETED = re.compile('^\t(\d+/\d+/\d+ \d+:\d+:\d+) pid (\d+) completed ([0-9]+|[0-9]+\.[0-9]+|\.[0-9]+)s .*') RE_TRACK = re.compile('^---|^locks acquired by blocking after 3 non-blocking attempts') RE_TRACK_LAPSE = re.compile('^--- lapse (\d+)') RE_TRACK_RPC = re.compile('^--- rpc msgs/size in\+out (\d+)\+(\d+)/(\d+)mb\+(\d+)mb himarks (\d+)/(\d+)') RE_TRACK_USAGE = re.compile('^--- usage (\d+)\+(\d+)us (\d+)\+(\d+)io (\d+)\+(\d+)net (\d+)k (\d+)pf') RE_FAILED_AUTH = re.compile('^--- (failed authentication check)') RE_KILLED_BY = re.compile('^--- (killed by .*)') RE_EXITED = re.compile('^--- (exited on fatal server error)') RE_TRACK_TABLE = re.compile('^--- db.([a-zA-Z]*)') RE_TRACK_PAGES = re.compile('^--- pages in\+out\+cached (\d+)\+(\d+)\+(\d+)') RE_TRACK_LOCKS_ROWS = re.compile( '^--- locks read/write (\d+)/(\d+) rows get\+pos\+scan put\+del (\d+)\+(\d+)\+(\d+) (\d+)\+(\d+)') RE_TRACK_TOTAL_LOCK = re.compile('^--- total lock wait\+held read/write (\d+)ms\+(\d+)ms/(\d+)ms\+(\d+)ms') RE_TRACK_MAX_LOCK = re.compile( '^--- max lock wait\+held read/write (\d+)ms\+(\d+)ms/(\d+)ms\+(\d+)ms|--- locks wait+held read/write (\d+)ms\+(\d+)ms/(\d+)ms\+(\d+)ms') def main(): parser = argparse.ArgumentParser(add_help=True) parser.add_argument('-d', '--dbname', help="Database name to use", default=None) parser.add_argument('-s', '--sql', help="Use local SQL Lite database", action='store_true', default=False) options, args = parser.parse_known_args() if len(args) < 1: croakusage() options.logname = args[0] if not options.logname or not options.dbname: croakusage() log2sql = Log2sql(options) log2sql.processLog() if __name__ == '__main__': main()
# | Change | User | Description | Committed | |
---|---|---|---|---|---|
#41 | 23674 | Robert Cowham | Reduce time taken by 30% (improve performance) - use epoch times. | ||
#40 | 23673 | Robert Cowham |
Ues None instead of "NULL" - more pythonic and we insert more frequently than we output SQL statements. |
||
#39 | 23672 | Robert Cowham | Remove unnecessary quoting of strings | ||
#38 | 23631 | Robert Cowham |
Improve detection of completed commands Should significantly reduce the number of "running" commands recorded. |
||
#37 | 23591 | Robert Cowham |
Handle meta/db and similar records which may occur multiple times for a command with vtrack output. |
||
#36 | 23493 | Robert Cowham |
Process peek track records Process meta/client/change lock track records Check for unrecognised track records (debug output if found) |
||
#35 | 23447 | Robert Cowham | Refactor duplicate key creation | ||
#34 | 23446 | Robert Cowham | Duplicate pids for same command within same second now handled correctly! | ||
#33 | 23445 | Robert Cowham | Fixed testChanges - if a cmd has track info then it is completed. | ||
#32 | 23444 | Robert Cowham | Refactored tests | ||
#31 | 23443 | Robert Cowham |
Avoid processing cmds until time changes as this means out of order track entries are processed appropriately. (Normally a 'completed' is immediately followed by a track entry for the original cmd - but sometimes the track entry comes a bit later after some other unrelated cmds are logged. Previously this would lead to being treated as extra cmds and ones which would never be completed. This in turn lead to lots of performance loss in processCompletedCommands() function) |
||
#30 | 23438 | Robert Cowham | Add some logging to track down performance issues | ||
#29 | 23402 | Robert Cowham | Tweaked regexes for slight performance improvement | ||
#28 | 23400 | Robert Cowham | Iterate through lines in file - faster than readline() | ||
#27 | 23399 | Robert Cowham | Handle utf encoding errors | ||
#26 | 23369 | Robert Cowham | Remove quotes around datetime in sqlite | ||
#25 | 22906 | Robert Cowham | Check for log file not existing. | ||
#24 | 21858 | Robert Cowham | Fix problem with keys as iterable in py2/py3 | ||
#23 | 21829 | Robert Cowham | Fix problem with changes -o commands. | ||
#22 | 21796 | Robert Cowham | Handle rmt-FileFetch records which may be duplicates as they don't have 'completed' entries. | ||
#21 | 21780 | Robert Cowham |
Don't drop tables when creating sqlite db Performance tweaks after profiling - particularly date parsing. |
||
#20 | 21713 | Robert Cowham | Ignore temporary error inserting into trackuse table | ||
#19 | 21712 | Robert Cowham | Check for correct line number when looking to update for track entries | ||
#18 | 21708 | Robert Cowham |
Tweak parameters to improve error messages. Make output to stdout optional |
||
#17 | 21705 | Robert Cowham |
Minor refactoring for clarity. Process all completed records as we go. |
||
#16 | 21702 | Robert Cowham |
Rework algorithm to process blocks at a time, and to avoid the SQL update statements. Risk of increased memory vs improved speed. |
||
#15 | 21697 | Robert Cowham | Parse rcpSnd/rpcRcv and add to database. | ||
#14 | 21694 | Robert Cowham | Handle null values properly | ||
#13 | 21672 | Robert Cowham | Improve performance when inserting rows | ||
#12 | 21660 | Robert Cowham | Handle utf8 in python2 strings | ||
#11 | 21656 | Robert Cowham | Avoid SQL quoting issues with alternative insert statement. | ||
#10 | 21643 | Robert Cowham | Handle git-fusion entries (strip the json) - for --sql option | ||
#9 | 21402 | Robert Cowham |
Filter out swarm commands Test for db content in a basic fashion. |
||
#8 | 21393 | Robert Cowham |
Add a swarm test - large record. Improve performance by skipping easy matches early. |
||
#7 | 21378 | Robert Cowham | Batch updates and make SQL date arithmetic work with SQLite | ||
#6 | 21374 | Robert Cowham | Write to sqllite db | ||
#5 | 21368 | Robert Cowham | Make tests work under python3 | ||
#4 | 21367 | Robert Cowham | Restructured for unit tests, and first unit tests. | ||
#3 | 21366 | Robert Cowham | Fix problems with NULL values and int | ||
#2 | 21365 | Robert Cowham | Equivalent of Ruby version on smaller sets | ||
#1 | 21361 | Robert Cowham | Sort of works... |