filter.py #2

#!/usr/bin/env python3
'''Cut down the volume of P4LOG entries by remove all track
outputs without any lock wait+held timings.

This is for administrators who always log with track=1 and find
the log files too big to process - like TRACK2SQL and friends.

Usage
=====

Reading from files. They can be optionally compressed with Gzip,
Bzip2, XZ or LZMA:

  python3 filter.py log log.gz log.bz2 log.xz

Or from STDIN:

  zcat log.gz | python3 filter.py

'''

# from .helper import *
import argparse
import bz2
import gzip
import lzma                     # new in Python 3.3
import re
import sys

def gen_file_handles(fnames, mode='rt', encoding='utf8', errors='surrogateescape'):
    # surrogateescape was added in Python 3.x
    for f in fnames:
        if f.endswith('.gz'):
            fd = gzip.open(f, mode=mode, encoding=encoding, errors=errors)
        elif f.endswith('.xz') or f.endswith('.lzma'):
            fd = lzma.open(f, mode=mode, encoding=encoding, errors=errors)
        elif f.endswith('.bz2'):
            fd = bz2.open(f, mode=mode, encoding=encoding, errors=errors)
        elif f == '-':
            bstream = sys.stdin.detach()
            import io
            fd = io.TextIOWrapper(bstream, encoding=encoding, errors=errors)
        else:
            fd = open(f, mode=mode, encoding=encoding, errors=errors)
        yield fd


def gen_lines(file_handles):
    for fd in file_handles:
        for line in fd:
            yield line.rstrip() # remove any trailing whitespaces


def gen_log_entries(lines):
    e = []
    for l in lines:
        if l.startswith('Perforce server ') and l.endswith(':'):
            if e:
                yield e
            e = []
        e.append(l)
    yield e                     # end of file

re_locks_read_write = re.compile(r'---   locks read/write (\d+)/(\d+) rows get\+pos\+scan put\+del (\d+)\+(\d+)\+(\d+) (\d+)\+(\d+)')
re_tot_lock_wait_held = re.compile(r'---   total lock wait\+held read/write (\d+)ms\+(\d+)ms/(\d+)ms\+(\d+)ms')
re_max_lock_wait_held = re.compile(  r'---   max lock wait\+held read/write (\d+)ms\+(\d+)ms/(\d+)ms\+(\d+)ms')

L = {                           # LIMITS
    'DB_LOCKS'      : ( 1, 1, 1000,   10000,   100000,   100000),
    'DB_ROWS_IN'    : ( 1, 1, 10000,  100000,  1000000,  10000000),
    'DB_ROWS_OUT'   : ( 1, 1, 1000,   10000,   100000,   100000),
    'DB_READ_WAIT'  : ( 1, 1, 100,    1000,    5000,     5000),
    'DB_WRITE_WAIT' : ( 1, 1, 100,    1000,    5000,     5000),
    'DB_READ_HELD'  : ( 1, 1, 100,    100,     5000,     5000),
    'DB_WRITE_HELD' : ( 1, 1, 100,    100,     500,      500),
}

def myfilter(entry, lvl):
    '''
    Return False if the log entry should be skipped
    '''
    trackoutput = False
    for line in entry:
        if line.startswith('--- '):
            # track output
            trackoutput = True

        m = re_locks_read_write.match(line)
        if m:
            rl,wl,gr,pr,sr,putr,delr = map(int, m.groups())
            if rl > L['DB_LOCKS'][lvl] or wl > L['DB_LOCKS'][lvl] or \
               gr+sr > L['DB_ROWS_IN'][lvl] or putr+delr > L['DB_ROWS_OUT'][lvl]:
                return True

        m = re_tot_lock_wait_held.match(line)
        if m:
            rw,rh,ww,wh = map(int, m.groups())
            if rw > L['DB_READ_WAIT'][lvl] or rh > L['DB_READ_HELD'][lvl] or \
               ww > L['DB_WRITE_WAIT'][lvl] or wh > L['DB_WRITE_HELD'][lvl]:
                return True

        m = re_max_lock_wait_held.match(line)
        if m:
            rw,rh,ww,wh = map(int, m.groups())
            if rw > L['DB_READ_WAIT'][lvl] or rh > L['DB_READ_HELD'][lvl] or \
               ww > L['DB_WRITE_WAIT'][lvl] or wh > L['DB_WRITE_HELD'][lvl]:
                return True
    if trackoutput:
        return False
    return True


if __name__ == '__main__':
    ap = argparse.ArgumentParser(description='A filter for P4LOG')
    ap.add_argument('-t', '--tracklevel', type=int, default=5, choices=[0,1,2,3,4,5])
    ap.add_argument('-e', '--error-handler', choices='strict ignore replace surrogateescape'.split(),
            default='surrogateescape', help='decode error handler')
    ap.add_argument('inputs', nargs='*', default=['-'])
    args = ap.parse_args()
    print(args)

    for e in gen_log_entries(gen_lines(gen_file_handles(args.inputs, errors=args.error_handler))):
        if myfilter(e, args.tracklevel):
            print( '\n'.join(e) )
#	Change	User	Description	Committed
#2	10097	Lester Cheung	Using argparse to parse command-line arguments and handle unicode decode error from stdin.
#1	10057	Lester Cheung	Filter script to reduce number of entries in a -vtrack=1 log. This is for administrators who always log with track=1 and find the log files too big to process - like TRACK2SQL and friends.