log2sql.py #16

  • //
  • guest/
  • robert_cowham/
  • perforce/
  • utils/
  • log_analysis/
  • log2sql.py
  • View
  • Commits
  • Open Download .zip Download (34 KB)
#!/usr/bin/env python3
# -*- encoding: UTF8 -*-

##############################################################################
#
# Copyright (c) 2008,2016 Perforce Software, Inc.  All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1.  Redistributions of source code must retain the above copyright
#     notice, this list of conditions and the following disclaimer.
#
# 2.  Redistributions in binary form must reproduce the above copyright
#     notice, this list of conditions and the following disclaimer in the
#     documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL PERFORCE SOFTWARE, INC. BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# = Description
#
#   Output SQL queries generated by parsing commands logged in a Perforce
#   server log file.
#   The output can be imported directly in any SQL database.
#
#   log2sql populates the following tables:
#
#   +----------------+----------+------+-----+
#   |                 process                |
#   +----------------+----------+------+-----+
#   | Field          | Type     | Null | Key |
#   +----------------+----------+------+-----+
#   | processKey     | char(32) | NO   | PRI |
#   | lineNumber     | int      | NO   | PRI |
#   | startTime      | date     | NO   |     |
#   | endTime        | date     | YES  |     |
#   | computedLapse  | float    | YES  |     |
#   | completedLapse | float    | YES  |     |
#   | pid            | int      | NO   |     |
#   | user           | text     | NO   |     |
#   | workspace      | text     | NO   |     |
#   | ip             | text     | NO   |     |
#   | app            | text     | NO   |     |
#   | cmd            | text     | NO   |     |
#   | args           | text     | YES  |     |
#   | uCpu           | int      | YES  |     |
#   | sCpu           | int      | YES  |     |
#   | diskIn         | int      | YES  |     |
#   | diskOut        | int      | YES  |     |
#   | ipcIn          | int      | YES  |     |
#   | ipcOut         | int      | YES  |     |
#   | maxRss         | int      | YES  |     |
#   | pageFaults     | int      | YES  |     |
#   | rpcMsgsIn      | int      | YES  |     |
#   | rpcMsgsOut     | int      | YES  |     |
#   | rpcSizeIn      | int      | YES  |     |
#   | rpcSizeOut     | int      | YES  |     |
#   | rpcHimarkFwd   | int      | YES  |     |
#   | rpcHimarkRev   | int      | YES  |     |
#   | rpcSnd         | float    | YES  |     |
#   | rpcRcv         | float    | YES  |     |
#   | running        | int      | NO   |     |
#   | error          | text     | YES  |     |
#   +----------------+----------+------+-----+
#
#   +----------------+--------------+------+-----+
#   |                  tableUse                  |
#   +----------------+--------------+------+-----+
#   | Field          | Type         | Null | Key |
#   +----------------+--------------+------+-----+
#   | processKey     | char(32)     | NO   | PRI |
#   | lineNumber     | int          | NO   | PRI |
#   | tableName      | varchar(255) | NO   | PRI |
#   | pagesIn        | int          | YES  |     |
#   | pagesOut       | int          | YES  |     |
#   | pagesCached    | int          | YES  |     |
#   | readLocks      | int          | YES  |     |
#   | writeLocks     | int          | YES  |     |
#   | getRows        | int          | YES  |     |
#   | posRows        | int          | YES  |     |
#   | scanRows       | int          | YES  |     |
#   | putRows        | int          | YES  |     |
#   | delRows        | int          | YES  |     |
#   | totalReadWait  | int          | YES  |     |
#   | totalReadHeld  | int          | YES  |     |
#   | totalWriteWait | int          | YES  |     |
#   | totalWriteHeld | int          | YES  |     |
#   | maxReadWait    | int          | YES  |     |
#   | maxReadHeld    | int          | YES  |     |
#   | maxWriteWait   | int          | YES  |     |
#   | maxWriteHeld   | int          | YES  |     |
#   +----------------+--------------+------+-----+
#
# = Usage
#
#   log2sql.rb -d <database_name> log_file
#
# = Requirements
#
#   Ruby: http://www.ruby-lang.org
#
##############################################################################

from __future__ import print_function

import re
import sys
import os
import math
import argparse
import fileinput
import datetime
import hashlib
import sqlite3
from collections import OrderedDict

python3 = sys.version_info[0] >= 3

RowsPerTransaction = 1000   # Commit every so many rows if in SQL mode

def escapeChar(str):
    str = str.replace("\\", "\\\\")
    str = str.replace("\"", "\\\"")
    return str

def parseDateTime(str):
    # \d+/\d+/\d+ \d+:\d+:\d+
    date = datetime.datetime.strptime(str, "%Y/%m/%d %H:%M:%S")
    return date.strftime("\"%Y-%m-%d %H:%M:%S\"")

def dateAdd(str, seconds):
    # \d+/\d+/\d+ \d+:\d+:\d+
    date = datetime.datetime.strptime(str, "%Y/%m/%d %H:%M:%S")
    date = date + datetime.timedelta(seconds=seconds)
    return date.strftime("\"%Y-%m-%d %H:%M:%S\"")

def nullValue(str):
    if str == "NULL":
        return None
    return str

class Command:
    processKey = None
    pid = 0
    startTime = None
    endTime = None
    computedLapse = 0.0
    completedLapse = 0.0
    user = None
    workspace = None
    ip = None
    app = None
    name = None
    args = None
    uCpu = None
    sCpu = None
    diskIn = None
    diskOut = None
    ipcIn = None
    ipcOut = None
    maxRss = None
    pageFaults = None
    rpcMsgsIn = None
    rpcMsgsOut = None
    rpcSizeIn = None
    rpcSizeOut = None
    rpcHimarkFwd = None
    rpcHimarkRev = None
    rpcSnd = None
    rpcRcv = None
    running = 0
    lineNumber = 0
    error = None

    def __init__(self, processKey, lineNumber, pid, startTime,
                 user, workspace, ip, app, name, args):
        self.processKey = processKey
        self.lineNumber = lineNumber
        self.pid = pid
        self.startTime = startTime
        self.user = escapeChar(user)
        self.workspace = escapeChar(workspace)
        self.ip = ip
        self.app = app
        self.name = name
        if args:
            self.args = escapeChar(args)
        else:
            self.args = "NULL"
        self.endTime = self.computedLapse = self.completedLapse = self.uCpu = "NULL"
        self.sCpu = self.diskIn = self.diskOut = self.ipcIn = self.ipcOut = self.maxRss = "NULL"
        self.pageFaults = self.rpcMsgsIn = self.rpcMsgsOut = self.rpcSizeOut = "NULL"
        self.rpcSizeIn = self.rpcHimarkFwd = self.rpcHimarkRev = self.error = "NULL"
        self.rpcSnd = self.rpcRcv = "NULL"
        self.running = 0

    def updateFrom(self, other):
        "Update any potentiall null fields - see __init__"
        for f in ["endTime", "computedLapse", "completedLapse", "uCpu",
               "sCpu", "diskIn", "diskOut", "ipcIn", "ipcOut", "maxRss",
               "pageFaults", "rpcMsgsIn", "rpcMsgsOut", "rpcSizeOut",
                "rpcSizeIn", "rpcHimarkFwd", "rpcHimarkRev", "error",
                "rpcSnd", "rpcRcv"]:
            if getattr(other, f) != "NULL" and getattr(self, f) == "NULL":
                setattr(self, f, getattr(other, f))

    def setUsage(self, uCpu, sCpu, diskIn, diskOut, ipcIn, ipcOut, maxRss, pageFaults):
        self.uCpu = uCpu
        self.sCpu = sCpu
        self.diskIn = diskIn
        self.diskOut = diskOut
        self.ipcIn = ipcIn
        self.ipcOut = ipcOut
        self.maxRss = maxRss
        self.pageFaults = pageFaults

    def setRpc(self, rpcMsgsIn, rpcMsgsOut, rpcSizeIn,
               rpcSizeOut, rpcHimarkFwd, rpcHimarkRev, rpcSnd="NULL", rpcRcv="NULL"):
        self.rpcMsgsIn = rpcMsgsIn
        self.rpcMsgsOut = rpcMsgsOut
        self.rpcSizeIn = rpcSizeIn
        self.rpcSizeOut = rpcSizeOut
        self.rpcHimarkFwd = rpcHimarkFwd
        self.rpcHimarkRev = rpcHimarkRev
        self.rpcSnd = rpcSnd
        self.rpcRcv = rpcRcv

class Table:
    processKey = None
    lineNumber = 0
    tableName = None
    pagesIn = None
    pagesOut = None
    pagesCached = None
    readLocks = None
    writeLocks = None
    getRows = None
    posRows = None
    scanRows = None
    putRows = None
    delRow = None
    totalReadWait = None
    totalReadHeld = None
    totalWriteWait = None
    totalWriteHeld = None
    maxReadWait = None
    maxReadHeld = None
    maxWriteWait = None
    maxWriteHeld = None

    def __init__(self, processKey, lineNumber, tableName):
        self.processKey = processKey
        self.lineNumber = lineNumber
        self.tableName = tableName
        self.pagesIn = self.pagesOut = self.pagesCached = "NULL"
        self.readLocks = self.writeLocks = self.getRows = self.posRows = self.scanRows = "NULL"
        self.putRows = self.delRow = self.totalReadWait = self.totalReadHeld = "NULL"
        self.totalWriteWait = self.totalWriteHeld = self.maxReadWait = "NULL"
        self.maxReadHeld = self.maxWriteWait = self.maxWriteHeld = "NULL"

    def setPages(self, pagesIn, pagesOut, pagesCached):
        self.pagesIn = pagesIn
        self.pagesOut = pagesOut
        self.pagesCached = pagesCached

    def setLocksRows(self, readLocks, writeLocks, getRows, posRows,
                     scanRows, putRows, delRow):
        self.readLocks = readLocks
        self.writeLocks = writeLocks
        self.getRows = getRows
        self.posRows = posRows
        self.scanRows = scanRows
        self.putRows = putRows
        self.delRow = delRow

    def setTotalLock(self, totalReadWait, totalReadHeld,
                     totalWriteWait, totalWriteHeld):
        self.totalReadWait = totalReadWait
        self.totalReadHeld = totalReadHeld
        self.totalWriteWait = totalWriteWait
        self.totalWriteHeld = totalWriteHeld

    def setMaxLock(self, maxReadWait, maxReadHeld, maxWriteWait, maxWriteHeld):
        self.maxReadWait = maxReadWait
        self.maxReadHeld = maxReadHeld
        self.maxWriteWait = maxWriteWait
        self.maxWriteHeld = maxWriteHeld

class FifoList:
    max = None
    list = None

    def __init__(self, max):
        self.list = []
        self.max = max

    def add(self, cmd):
        if (len(self.list) + 1) > self.max:
            del self.list[0]
        self.list.append(cmd)

    def remove(self, cmd):
        for i, val in enumerate(self.list):
            if (self.list[i].processKey == cmd.processKey):
                del self.list[i]
                break

    def getLineNumber(self, cmd):
        lineNumber = 0
        for p in self.list:
            if (p.processKey == cmd.processKey):
                lineNumber = p.lineNumber
                break
        return lineNumber

class Block:
    def __init__(self):
        self.lines = []
        self.lineNo = 0

    def addLine(self, line, lineNo):
        self.lines.append(line)
        # Only at start of block
        if not self.lineNo:
            self.lineNo = lineNo

class Log2sql:
    logname = None
    dbname = None
    logfile = None
    ckpSize = None
    readBytes = None
    mod = None
    cmds = None
    lastCompletedCmds = None
    lineNo = 0

    def __init__(self, options, instream=None, outstream=None):
        self.logname = options.logname
        self.dbname = options.dbname
        self.options = options
        self.outstream = outstream    # For testing
        if outstream is None:
            self.outstream = sys.stdout
        if instream is None:
            self.logfile = open(self.logname, "r")
            self.ckpSize = os.stat(self.logname).st_size
        else:
            self.logfile = instream
            self.ckpSize = 500
        if self.options.sql:
            self.conn = sqlite3.connect("%s.db" % self.dbname)
            self.conn.text_factory = str
            self.cursor = self.conn.cursor()
            self.cursor.execute("PRAGMA synchronous = OFF")
            self.cursor.execute("PRAGMA journal_mode = MEMORY")
        self.cmds = {}
        self.tables = {}
        self.lastCompletedCmds = FifoList(1000)
        self.readBytes = 0.0
        self.mod = 10.0
        self.running = 0
        self.rowCount = 0
        self.db_create_database()
        self.db_create_process()
        self.db_create_tableUse()
        #self.db_commit(False)
        # if self.options.sql:
        # if self.options.sql:
        #     self.conn.isolation_level = 'DEFERRED'
        query = "BEGIN TRANSACTION;"
        self.output(query)
        if self.options.sql:
            self.conn.execute(query)
        self.output("Processing %s: 0%%" % self.logname, file=sys.stderr)

    def output(self, text, file=sys.stdout):
        if (file == sys.stderr):
            print(text, file=file)
        # elif not self.options.sql:
        else:
            self.outstream.write("%s\n" % text)

    def terminate(self):
        self.flush_output()
        self.db_commit(True)
        if self.options.sql:
            self.conn.commit()
            self.conn.close()

    def readLogfile(self):
        line = self.logfile.readline()
        if line:
            self.lineNo += 1
            # TODO
            # if line.respond_to?(:encode)
            #    line = line.encode("utf-8", 'binary', :invalid => :replace, :undef => :replace)
            #line = line.decode('utf-8', errors='ignore')
            self.readBytes += len(line)
            if math.floor(((self.readBytes / self.ckpSize) * 100.0) / self.mod) == 1.0:
                self.output("...%d%%" % self.mod, file=sys.stderr)
                # $stderr.flush
                self.mod += 10
        return line

    def getLineNumber(self):
        return self.lineNo

    def db_commit_updates(self):
        self.rowCount += 1
        if self.rowCount % RowsPerTransaction == 0:
            query = "COMMIT;"
            self.output(query)
            if self.options.sql:
                self.conn.commit()
            query = "BEGIN TRANSACTION;"
            self.output(query)
            if self.options.sql:
                self.conn.execute(query)

    def db_commit(self, state):
        if (state):
            query = "COMMIT;"
            self.output(query)
            if self.options.sql:
                self.conn.commit()
            query = "BEGIN TRANSACTION;"
            self.output(query)
            if self.options.sql:
                self.conn.execute(query)
        else:
            query = "SET autocommit=0;"
            self.output(query)

    def db_create_database(self):
        query = "CREATE DATABASE IF NOT EXISTS " + self.dbname + ";"
        self.output(query)
        query = "USE " + self.dbname + ";"
        self.output(query)

    def db_create_process(self):
        query = "DROP TABLE IF EXISTS process;"
        self.output(query)
        if self.options.sql:
            self.cursor.execute(query)
        query = "CREATE TABLE process (processkey CHAR(32) NOT NULL, lineNumber INT NOT NULL, pid INT NOT NULL, " \
                "startTime DATETIME NOT NULL,endTime DATETIME NULL, computedLapse FLOAT NULL,completedLapse FLOAT NULL, " \
                "user TEXT NOT NULL, workspace TEXT NOT NULL, ip TEXT NOT NULL, app TEXT NOT NULL, cmd TEXT NOT NULL, " \
                "args TEXT NULL, uCpu INT NULL, sCpu INT NULL, diskIn INT NULL, diskOut INT NULL, ipcIn INT NULL, " \
                "ipcOut INT NULL, maxRss INT NULL, pageFaults INT NULL, rpcMsgsIn INT NULL, rpcMsgsOut INT NULL, " \
                "rpcSizeIn INT NULL, rpcSizeOut INT NULL, rpcHimarkFwd INT NULL, rpcHimarkRev INT NULL, " \
                "rpcSnd FLOAT NULL, rpcRcv FLOAT NULL, running INT NULL, " \
                "error TEXT NULL, PRIMARY KEY (processkey, lineNumber));"
        self.output(query)
        if self.options.sql:
            self.cursor.execute(query)

    def db_create_tableUse(self):
        query = "DROP TABLE IF EXISTS tableUse;"
        self.output(query)
        if self.options.sql:
            self.cursor.execute(query)
        query = "CREATE TABLE tableUse (processkey CHAR(32) NOT NULL, lineNumber INT NOT NULL, " \
                "tableName VARCHAR(255) NOT NULL, pagesIn INT NULL, pagesOut INT NULL, pagesCached INT NULL, " \
                "readLocks INT NULL, writeLocks INT NULL, getRows INT NULL, posRows INT NULL, scanRows INT NULL, " \
                "putRows int NULL, delRows INT NULL, totalReadWait INT NULL, totalReadHeld INT NULL, " \
                "totalWriteWait INT NULL, totalWriteHeld INT NULL, maxReadWait INT NULL, maxReadHeld INT NULL, " \
                "maxWriteWait INT NULL, maxWriteHeld INT NULL, PRIMARY KEY (processkey, lineNumber, tableName));"
        self.output(query)
        if self.options.sql:
            self.cursor.execute(query)

    def addCommand(self, cmd, vtrack):
        cmd.running = self.running
        # If we are re-using pid then process previous command
        if cmd.pid in self.cmds:
            if cmd.processKey != self.cmds[cmd.pid].processKey:
                self.sql_process_insert(self.cmds[cmd.pid])
                self.cmds[cmd.pid] = cmd    # replace
            else:
                self.cmds[cmd.pid].updateFrom(cmd)
        else:
            self.cmds[cmd.pid] = cmd
        if not vtrack:
            self.running += 1

    def deleteCommand(self, cmd, vtrack):
        if cmd.pid in self.cmds:
            del self.cmds[cmd.pid]
            if not vtrack:
                self.running -= 1

    def computed(self, pid, computedLapse):
        if pid in self.cmds:
            # sum all the compute values of a same command
            if self.cmds[pid].computedLapse == "NULL":
                sum = 0.0
            else:
                sum = float(self.cmds[pid].computedLapse)
            sum += float(computedLapse)
            self.cmds[pid].computedLapse = str(sum)

    def completed(self, pid, endTime, completedLapse):
        if pid in self.cmds:
            self.cmds[pid].endTime = endTime
            self.cmds[pid].completedLapse = str(float(completedLapse))
            self.lastCompletedCmds.add(self.cmds[pid])
            #TODO: self.deleteCommand(self.cmds[pid], False)

    def track(self, cmd, lines):
        self.tables = {}
        tableName = None
        rpcMsgsIn = rpcMsgsOut = rpcSizeIn = rpcSizeOut = rpcSnd = rpcRcv = 0
        rpcHimarkFwd = rpcHimarRev = uCpu = 0
        sCpu = diskIn = diskOut = ipcIn = ipcOut = maxRss = pageFaults = 0
        readLocks = writeLocks = getRows = posRows = scanRows = 0
        putRows = delRow = totalReadWait = totalReadHeld = 0
        totalWriteWait = totalWriteHeld = maxReadWait = 0
        maxReadHeld = maxWriteWait = maxWriteHeld = 0
        if cmd.pid in self.cmds:
            cmd.lineNumber = self.cmds[cmd.pid].lineNumber
        for line in lines:
            if not RE_TRACK.match(line):
                break
            gotMatch = False
            match = RE_TRACK_LAPSE.match(line)
            if match:
                gotMatch = True
                cmd.completedLapse = match.group(1)
            if not gotMatch:
                match = RE_FAILED_AUTH.match(line)
                if match:
                    cmd.error = "\"" + match.group(1) + "\""
            if not gotMatch:
                match = RE_KILLED_BY.match(line)
                if match:
                    gotMatch = True
                    cmd.error = "\"" + match.group(1) + "\""
            if not gotMatch:
                match = RE_EXITED.match(line)
                if match:
                    gotMatch = True
                    cmd.error = "\"" + match.group(1) + "\""
            if not gotMatch:
                match = RE_TRACK_USAGE.match(line)
                if match:
                    gotMatch = True
                    cmd.setUsage(match.group(1), match.group(2), match.group(3), match.group(4),
                                 match.group(5), match.group(6), match.group(7), match.group(8))
            if not gotMatch:
                match = RE_TRACK_RPC2.match(line)
                if match:
                    gotMatch = True
                    cmd.setRpc(match.group(1), match.group(2), match.group(3),
                               match.group(4), match.group(5), match.group(6),
                               match.group(7), match.group(8))
            if not gotMatch:
                match = RE_TRACK_RPC.match(line)
                if match:
                    gotMatch = True
                    cmd.setRpc(match.group(1), match.group(2), match.group(3),
                               match.group(4), match.group(5), match.group(6))
            if not gotMatch:
                match = RE_TRACK_TABLE.match(line)
                if match:
                    gotMatch = True
                    tableName = match.group(1)
                    self.tables[tableName] = Table(cmd.processKey,
                                                   cmd.lineNumber, tableName)
            if not gotMatch:
                match = RE_TRACK_PAGES.match(line)
                if match:
                    gotMatch = True
                    if tableName in self.tables:
                        self.tables[tableName].setPages(match.group(1), match.group(2), match.group(3))
            if not gotMatch:
                match = RE_TRACK_LOCKS_ROWS.match(line)
                if match:
                    gotMatch = True
                    if tableName in self.tables:
                        self.tables[tableName].setLocksRows(match.group(1), match.group(2),
                                                            match.group(3), match.group(4),
                                                            match.group(5), match.group(6),
                                                            match.group(7))
            if not gotMatch:
                match = RE_TRACK_TOTAL_LOCK.match(line)
                if match:
                    gotMatch = True
                    if tableName in self.tables:
                        self.tables[tableName].setTotalLock(match.group(1), match.group(2),
                                                            match.group(3), match.group(4))
                        # The max is only reported if there is more than one
                        # lock taken on the table
                        if (self.tables[tableName].readLocks == "NULL" or
                            int(self.tables[tableName].readLocks) < 2) and \
                            (self.tables[tableName].writeLocks == "NULL" or
                            int(self.tables[tableName].writeLocks) < 2):
                            self.tables[tableName].setMaxLock(match.group(1), match.group(2),
                                                              match.group(3), match.group(4))
            if not gotMatch:
                match = RE_TRACK_MAX_LOCK.match(line)
                if match:
                    gotMatch = True
                    if tableName in self.tables:
                        self.tables[tableName].setMaxLock(match.group(1), match.group(2),
                                                          match.group(3), match.group(4))
        self.sql_process_update(cmd)
        self.addCommand(cmd, True)
        if python3:
            for k, v in self.tables.items():
                self.sql_tableUse_insert(v)
        else:
            for table in self.tables.values():
                self.sql_tableUse_insert(table)

    def flush_output(self):
        if python3:
            for p, v in self.cmds.items():
                self.sql_process_insert(v)
        else:
            for p, v in self.cmds.iteritems():
                self.sql_process_insert(v)
        self.output("", file=sys.stderr)

    def sql_process_insert(self, cmd):
        query = 'INSERT IGNORE INTO process VALUES ("%s",%d,%d,%s,%s,%s,%s,"%s","%s","%s","%s","%s","%s",%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%d,%s);' % \
                (cmd.processKey, cmd.lineNumber, cmd.pid,
                 cmd.startTime, cmd.endTime, cmd.computedLapse,
                 cmd.completedLapse, cmd.user, cmd.workspace,
                 cmd.ip, cmd.app, cmd.name, cmd.args, cmd.rpcMsgsIn,
                 cmd.rpcMsgsOut, cmd.rpcSizeIn, cmd.rpcSizeOut,
                 cmd.rpcHimarkFwd, cmd.rpcHimarkRev, cmd.rpcSnd, cmd.rpcRcv, cmd.uCpu,
                 cmd.sCpu, cmd.diskIn, cmd.diskOut, cmd.ipcIn, cmd.ipcOut,
                 cmd.maxRss, cmd.pageFaults, cmd.running, cmd.error)
        self.output(query)
        if self.options.sql:
            self.cursor.execute('INSERT INTO process VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)',
                                (cmd.processKey, cmd.lineNumber, cmd.pid,
                                 cmd.startTime, nullValue(cmd.endTime), nullValue(cmd.computedLapse),
                                 nullValue(cmd.completedLapse), nullValue(cmd.user), nullValue(cmd.workspace),
                                 nullValue(cmd.ip), nullValue(cmd.app), nullValue(cmd.name), nullValue(cmd.args),
                                 nullValue(cmd.rpcMsgsIn),
                                 nullValue(cmd.rpcMsgsOut), nullValue(cmd.rpcSizeIn), nullValue(cmd.rpcSizeOut),
                                 nullValue(cmd.rpcHimarkFwd), nullValue(cmd.rpcHimarkRev),
                                 nullValue(cmd.rpcSnd), nullValue(cmd.rpcRcv), nullValue(cmd.uCpu),
                                 nullValue(cmd.sCpu), nullValue(cmd.diskIn), nullValue(cmd.diskOut),
                                 nullValue(cmd.ipcIn), nullValue(cmd.ipcOut),
                                 nullValue(cmd.maxRss), nullValue(cmd.pageFaults), nullValue(cmd.running),
                                 nullValue(cmd.error)))
        self.db_commit_updates()

    def sql_process_update(self, cmd):
        if cmd.completedLapse != "NULL":
            cmd.endTime = dateAdd(cmd.startTime, int(cmd.completedLapse))
        else:
            cmd.endTime = cmd.startTime

    def sql_tableUse_insert(self, tab):
        query = 'INSERT IGNORE INTO tableUse VALUES ("%s",%d,"%s",%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,' \
                '%s,%s,%s,%s);' % \
                (tab.processKey, tab.lineNumber, tab.tableName, tab.pagesIn,
                 tab.pagesOut, tab.pagesCached, tab.readLocks,
                 tab.writeLocks, tab.getRows, tab.posRows, tab.scanRows,
                 tab.putRows, tab.delRow,
                 tab.totalReadWait, tab.totalReadHeld,
                 tab.totalWriteWait, tab.totalWriteHeld,
                 tab.maxReadWait, tab.maxReadHeld,
                 tab.maxWriteWait, tab.maxWriteHeld)
        self.output(query)
        if self.options.sql:
            self.cursor.execute('INSERT INTO tableUse VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)',
                (tab.processKey, tab.lineNumber, tab.tableName,
                 nullValue(tab.pagesIn), nullValue(tab.pagesOut), nullValue(tab.pagesCached),
                 nullValue(tab.readLocks), nullValue(tab.writeLocks), nullValue(tab.getRows), nullValue(tab.posRows),
                 nullValue(tab.scanRows), nullValue(tab.putRows), nullValue(tab.delRow),
                 nullValue(tab.totalReadWait), nullValue(tab.totalReadHeld),
                 nullValue(tab.totalWriteWait), nullValue(tab.totalWriteHeld),
                 nullValue(tab.maxReadWait), nullValue(tab.maxReadHeld),
                 nullValue(tab.maxWriteWait), nullValue(tab.maxWriteHeld)))
        self.db_commit_updates()

    def processInfoBlock(self, block):
        cmd = None
        i = 0
        # First line of block is info line - process the rest
        for line in block.lines[1:]:
            i += 1
            # Check for track lines and once we have found one, process them all
            if cmd:
                match = RE_TRACK.match(line)
                if match:
                    self.track(cmd, block.lines[i:])
                    break   # Block has been processed

            gotMatch = False
            match = RE_CMD_NOARG.match(line)
            if not match:
                match = RE_CMD.match(line)
            if match:
                gotMatch = True
                pid = int(match.group(2))
                startTime = parseDateTime(match.group(1))
                user = match.group(3)
                workspace = match.group(4)
                ip = match.group(5)
                # following gsub required due to a 2009.2 P4V bug
                app = match.group(6).replace("\x00", "/")
                name = match.group(7)
                cmdArgs = None
                if len(match.groups()) == 8:
                    cmdArgs = match.group(8)
                    # Strip Swarm/Git Fusion commands with lots of json
                    smatch = re.search(RE_JSON_CMDARGS, cmdArgs)
                    if smatch:
                        cmdArgs = smatch.group(1)
                m = hashlib.md5()
                line = re.sub(r'[^\x00-\x7F]', '?', line)   # Do this so we can compare results with ruby version
                if python3:
                    m.update(line.encode())
                else:
                    m.update(line)
                processKey = m.hexdigest()
                cmd = Command(processKey, block.lineNo, pid, startTime,
                              user, workspace, ip, app, name, cmdArgs)
                self.addCommand(cmd, False)

            # Pattern matching a compute line
            if not gotMatch:
                match = RE_COMPUTE.match(line)
                if match:
                    gotMatch = True
                    pid = int(match.group(2))
                    computedLapse = float(match.group(3))
                    self.computed(pid, computedLapse)
            # Pattern matching a completed line
            if not gotMatch:
                match = RE_COMPLETED.match(line)
                if match:
                    gotMatch = True
                    pid = int(match.group(2))
                    endTime = parseDateTime(match.group(1))
                    completedLapse = match.group(3)
                    self.completed(pid, endTime, completedLapse)

    def blockEnd(self, line):
        "Blank line or one of terminators"
        terminators = ["Perforce server info:",
                       "Perforce server error:"
                       "locks acquired by blocking after",
                       "Rpc himark:"]
        if line == "" or line == "\n" or line == "\r\n":
            return True
        for t in terminators:
            if line[:len(t)] == t:
                return True
        return False

    def blockInfo(self, line):
        t = "Perforce server info:"
        if line[:len(t)] == t:
            return True
        return False

    def processLog(self):
        block = Block()
        line = self.readLogfile()
        while line:
            if self.blockEnd(line):
                if block.lines:
                    if self.blockInfo(block.lines[0]):
                        self.processInfoBlock(block)
                    # TODO: process errors etc
                    block = Block()
                block.addLine(line, self.lineNo)
            else:
                block.addLine(line, self.lineNo)
            line = self.readLogfile()
        if block.lines:
            if self.blockInfo(block.lines[0]):
                self.processInfoBlock(block)
        self.terminate()

def croakusage():
    print("Usage: log2sql.py -d <database_name> [ --sql ] log_filename")
    sys.exit(1)

######################
# START OF MAIN SCRIPT
######################

RE_INFO = re.compile('^Perforce server info:')
RE_EMPTY = re.compile('^\s*$')
RE_JSON_CMDARGS = re.compile('^(.*) \{.*\}$')
RE_CMD = re.compile('^\t(\d+/\d+/\d+ \d+:\d+:\d+) pid (\d+) (.*)@(.*) (.*) \[(.*)\] \'(\w+-\w+) (.*)\'.*')
RE_CMD_NOARG = re.compile('^\t(\d+/\d+/\d+ \d+:\d+:\d+) pid (\d+) (.*)@(.*) (.*) \[(.*)\] \'(\w+-\w+)\'.*')
RE_COMPUTE = re.compile('^\t(\d+/\d+/\d+ \d+:\d+:\d+) pid (\d+) compute end ([0-9]+|[0-9]+\.[0-9]+|\.[0-9]+)s.*')
RE_COMPLETED = re.compile('^\t(\d+/\d+/\d+ \d+:\d+:\d+) pid (\d+) completed ([0-9]+|[0-9]+\.[0-9]+|\.[0-9]+)s.*')
RE_TRACK = re.compile('^---|^locks acquired by blocking after 3 non-blocking attempts')
RE_TRACK_LAPSE = re.compile('^--- lapse (\d+)')
RE_TRACK_RPC = re.compile('^--- rpc msgs/size in\+out (\d+)\+(\d+)/(\d+)mb\+(\d+)mb himarks (\d+)/(\d+)')
RE_TRACK_RPC2 = re.compile('^--- rpc msgs/size in\+out (\d+)\+(\d+)/(\d+)mb\+(\d+)mb himarks (\d+)/(\d+) snd/rcv ([0-9]+|[0-9]+\.[0-9]+|\.[0-9]+)s/([0-9]+|[0-9]+\.[0-9]+|\.[0-9]+)s')
RE_TRACK_USAGE = re.compile('^--- usage (\d+)\+(\d+)us (\d+)\+(\d+)io (\d+)\+(\d+)net (\d+)k (\d+)pf')
RE_FAILED_AUTH = re.compile('^--- (failed authentication check)')
RE_KILLED_BY = re.compile('^--- (killed by .*)')
RE_EXITED = re.compile('^--- (exited on fatal server error)')
RE_TRACK_TABLE = re.compile('^--- db.([a-zA-Z]*)')
RE_TRACK_PAGES = re.compile('^---   pages in\+out\+cached (\d+)\+(\d+)\+(\d+)')
RE_TRACK_LOCKS_ROWS = re.compile(
    '^---   locks read/write (\d+)/(\d+) rows get\+pos\+scan put\+del (\d+)\+(\d+)\+(\d+) (\d+)\+(\d+)')
RE_TRACK_TOTAL_LOCK = re.compile('^---   total lock wait\+held read/write (\d+)ms\+(\d+)ms/(\d+)ms\+(\d+)ms')
RE_TRACK_MAX_LOCK = re.compile(
    '^---   max lock wait\+held read/write (\d+)ms\+(\d+)ms/(\d+)ms\+(\d+)ms|---   locks wait+held read/write (\d+)ms\+(\d+)ms/(\d+)ms\+(\d+)ms')


def main():
    parser = argparse.ArgumentParser(add_help=True)
    parser.add_argument('-d', '--dbname', help="Database name to use", default=None)
    parser.add_argument('-s', '--sql', help="Use local SQL Lite database", action='store_true', default=False)
    options, args = parser.parse_known_args()

    if len(args) < 1:
        croakusage()
    options.logname = args[0]
    if not options.logname or not options.dbname:
        croakusage()

    log2sql = Log2sql(options)
    log2sql.processLog()

if __name__ == '__main__':
    main()
# Change User Description Committed
#41 23674 Robert Cowham Reduce time taken by 30% (improve performance) - use epoch times.
#40 23673 Robert Cowham Ues None instead of "NULL" - more pythonic and we insert more frequently than we output
SQL statements.
#39 23672 Robert Cowham Remove unnecessary quoting of strings
#38 23631 Robert Cowham Improve detection of completed commands
Should significantly reduce the number of "running" commands recorded.
#37 23591 Robert Cowham Handle meta/db and similar records which may occur multiple times for a command with
vtrack output.
#36 23493 Robert Cowham Process peek track records
Process meta/client/change lock track records
Check for unrecognised track records (debug output if found)
#35 23447 Robert Cowham Refactor duplicate key creation
#34 23446 Robert Cowham Duplicate pids for same command within same second now handled correctly!
#33 23445 Robert Cowham Fixed testChanges - if a cmd has track info then it is completed.
#32 23444 Robert Cowham Refactored tests
#31 23443 Robert Cowham Avoid processing cmds until time changes as this means out of order track
entries are processed appropriately.
(Normally a 'completed' is immediately followed by a track entry for the original
cmd - but sometimes the track entry comes a bit later after some other
unrelated cmds are logged. Previously this would lead to being treated as extra
cmds and ones which would never be completed. This in turn lead to lots of
performance loss in processCompletedCommands() function)
#30 23438 Robert Cowham Add some logging to track down performance issues
#29 23402 Robert Cowham Tweaked regexes for slight performance improvement
#28 23400 Robert Cowham Iterate through lines in file - faster than readline()
#27 23399 Robert Cowham Handle utf encoding errors
#26 23369 Robert Cowham Remove quotes around datetime in sqlite
#25 22906 Robert Cowham Check for log file not existing.
#24 21858 Robert Cowham Fix problem with keys as iterable in py2/py3
#23 21829 Robert Cowham Fix problem with changes -o commands.
#22 21796 Robert Cowham Handle rmt-FileFetch records which may be duplicates as they don't have 'completed' entries.
#21 21780 Robert Cowham Don't drop tables when creating sqlite db
Performance tweaks after profiling - particularly date parsing.
#20 21713 Robert Cowham Ignore temporary error inserting into trackuse table
#19 21712 Robert Cowham Check for correct line number when looking to update for track entries
#18 21708 Robert Cowham Tweak parameters to improve error messages.
Make output to stdout optional
#17 21705 Robert Cowham Minor refactoring for clarity.
Process all completed records as we go.
#16 21702 Robert Cowham Rework algorithm to process blocks at a time, and to avoid the SQL update statements.
Risk of increased memory vs improved speed.
#15 21697 Robert Cowham Parse rcpSnd/rpcRcv and add to database.
#14 21694 Robert Cowham Handle null values properly
#13 21672 Robert Cowham Improve performance when inserting rows
#12 21660 Robert Cowham Handle utf8 in python2 strings
#11 21656 Robert Cowham Avoid SQL quoting issues with alternative insert statement.
#10 21643 Robert Cowham Handle git-fusion entries (strip the json) - for --sql option
#9 21402 Robert Cowham Filter out swarm commands
Test for db content in a basic fashion.
#8 21393 Robert Cowham Add a swarm test - large record.
Improve performance by skipping easy matches early.
#7 21378 Robert Cowham Batch updates and make SQL date arithmetic work with SQLite
#6 21374 Robert Cowham Write to sqllite db
#5 21368 Robert Cowham Make tests work under python3
#4 21367 Robert Cowham Restructured for unit tests, and first unit tests.
#3 21366 Robert Cowham Fix problems with NULL values and int
#2 21365 Robert Cowham Equivalent of Ruby version on smaller sets
#1 21361 Robert Cowham Sort of works...