#!/usr/bin/env python3.3 ''' /* * Copyright (c) 2015, Charles McLouth * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL STEWART LORD BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. auditconverter2 - This python script converts audit logs for ingestion into the Helix Threat Detection analytics engine. It converts all input files to be encoded as utf-8. It converts all structured log formats to standard P4AUDIT format. It compresses output files see auditconverter2 -h for usage $Id: //guest/cmclouth/projects/auditconverter/src/auditconverter2.py#5 $ */ ''' import logging import sys import os import argparse import datetime #import operator import auditconverter scriptversion = "1.0" scriptname = os.path.basename(sys.argv[0]) # standard format: '%s %s@%s %s %s %s#%s' # structured format: '6,,,%s,,,%s,%s,,%s,,,,%s,%s,%s' # (self.f_date, self.f_user, self.f_client, self.f_host, self.f_action, self.f_file, self.f_rev) P4AUDIT_RECORDFORMAT='%s %s@%s %s %s %s#%s' STRUCTURED_RECORDFORMAT='6,,,%s,,,%s,%s,,%s,,,,%s,%s,%s' def isDEBUG(record): return record.levelname == 'DEBUG' def isINFO(record): return record.levelname == 'INFO' def isWARN(record): return record.levelname == 'WARNING' def isERROR(record): return record.levelname in ['ERROR', 'CRITICAL'] def processInputParams(pargs=sys.argv): ''' process commandline arguments and run function ''' gParser = argparse.ArgumentParser() gParser.description="This python script converts audit logs for ingesting into the Helix Threat Detection analytics engine.\n"\ "It converts all input files to be encoded as utf-8, converts all structured log formats to standard P4AUDIT format."\ "Optionally it will anonymize the data in the output log files and compress them." gParser.add_argument('-V', '--version', action='version', version='%(prog)s ' + scriptversion) gParser.add_argument('-i', '--inputFile', dest='inputFile', metavar='inputFile', \ help='audit log to convert.') gParser.add_argument('-o', '--output', dest='output', metavar='output', \ help='a directory to write converted log files to.') gParser.add_argument('-f', '--format', dest='logformat', metavar='logformat', \ type=int, default=0, \ help='Output record format. 0 (zero) for P4AUDIT 1 (one) for Structured Audit Log.') gParser.add_argument('-c', '--compress', dest='compress', action='store_true', \ help='compress output log files with gzip compatible compression.') args = gParser.parse_args(pargs) # must have i or a or both if (not hasattr(args, 'inputFile') or args.inputFile is None): gParser.print_help() gParser.error('inputFile (-i) are required.') if (not hasattr(args, 'output') or args.inputFile is None): gParser.print_help() gParser.error('output (-o) is required.') # validate inputs if hasattr(args, 'inputFile') and args.inputFile is not None: if not os.path.isfile(args.inputFile): gParser.print_help() gParser.error("invalid inputFile (-i) '%s'" % (args.inputFile)) if hasattr(args, 'output') and args.output is not None: if not os.path.isdir(args.output): gParser.print_help() gParser.error("invalid output directory (-o) '%s'" % (args.output)) return args if __name__ == '__main__': logger = logging.getLogger(scriptname) logger.propagate = False logger.setLevel(logging.INFO) debugHandler = logging.StreamHandler() #debugHandler = logging.FileHandler('mergetracker.out') debugHandler.setLevel(logging.DEBUG) debugHandler.setFormatter(logging.Formatter('%(levelname)s:%(filename)s:%(lineno)d:%(funcName)s:%(message)s')) debugHandler.addFilter(isDEBUG) logger.addHandler(debugHandler) infoHandler = logging.StreamHandler(sys.stdout) infoHandler.setLevel(logging.INFO) infoHandler.setFormatter(logging.Formatter('%(message)s')) infoHandler.addFilter(isINFO) logger.addHandler(infoHandler) warnHandler = logging.StreamHandler() warnHandler.setLevel(logging.WARN) warnHandler.setFormatter(logging.Formatter('%(message)s')) warnHandler.addFilter(isWARN) logger.addHandler(warnHandler) errorHandler = logging.StreamHandler() errorHandler.setLevel(logging.ERROR) errorHandler.setFormatter(logging.Formatter('%(message)s')) errorHandler.addFilter(isERROR) logger.addHandler(errorHandler) auditconverter.logger = logger args = processInputParams(sys.argv[1:]) if args.logformat == 1: outputRecordFormat = STRUCTURED_RECORDFORMAT else: outputRecordFormat = P4AUDIT_RECORDFORMAT utf8converter = auditconverter.UTF8Converter() maxWriteSize = None fileName = args.inputFile fRead = None fWrite = None charsWritten = 0 fileNameOut = None dtStart = datetime.datetime.now() try: fileNameOut = os.path.basename(fileName) if fileName.endswith('.gz'): baseName = os.path.basename(fileName) fileNameOut = baseName[0:len(baseName)-3] fileNameOut += '.utf8' if args.compress: fileNameOut += '.gz' fileNameOut = os.path.join(args.output, fileNameOut) fRead = auditconverter.AuditFileIO(fileName, True, False, utf8converter, None) fWrite = auditconverter.AuditFileIO(fileNameOut, False, False, utf8converter, maxWriteSize) logger.info('Processing file: %s converting to: %s' % (fRead.fileName, fWrite.fileName)) while True: try: aRecord = fRead.readRecord(True) # eof if aRecord is None: break # write output fWrite.writeLine(aRecord.getLine(outputRecordFormat)) except auditconverter.AuditException as e: errCode = e.args[0] fileName = e.args[1] lineNo = e.args[2] logger.error(str(e)) # if errCode in [1,3,4]: # if errCode == 1: logger.error('file=%s;line=%d;err=%d;' % (fileName, lineNo, errCode)) # elif errCode == 3: lineBin = logger.error('file=%s;line=%d;err=%d;:%s' % (fileName, lineNo, errCode, e.args[3])) # elif errCode == 4: logger.error('file=%s;line=%d;err=%d;:%s' % (fileName, lineNo, errCode, e.args[4])) if fRead.linesRead % 100000 == 0: logger.info('Progress: %s converting to: %s with %d lines.' % (fRead.fileName, fWrite.fileName, fRead.linesRead)) # if fRead.linesRead != fWrite.linesWritten: logger.warn('Lines read: %d; Lines written: %d' % (fRead.linesRead, fWrite.linesWritten)) dtStop = datetime.datetime.now() seconds = (dtStop-dtStart).seconds if seconds == 0: seconds = 1 logger.info('Completed: %s converting to: %s with %d lines in %d seconds (%d lines/second.)' % (fRead.fileName, fWrite.fileName, fRead.linesRead, seconds, int(fRead.linesRead/seconds))) except: logger.exception('unknown exception processing input file: %s' % (fileName)) finally: if fRead is not None: fRead.close() if fWrite is not None: fWrite.close()