#!c:/program files/python/python.exe # Below are the settings that you will have to modify to reflect your setup. # number of seconds to wait after each iteration. Performance impact on both client and server should be minimal, # so keep this at 0 if you want quick refresh. Increase this to reduce load (if any) or if you want more time to # study the output from a single snapshot. refresh_delay = 0 # Name of an account that has perforce admin access. p4_admin_name = "admin" # Server details. Change these (all of them!) to the correct values for your environment. # Name of the perforce server process. Should be "p4s" if you installed Perforce as a service, or "p4d" if you run it as # a standalone application p4_process_name = "p4s" # Nr. of cpu's in server server_cpu_count = 4 # Address of server p4_port = "p4:1665" # Windows networking name of server server_name = "p4" # Name of the typeperf counter for the server network interface server_network_name = "Intel[R] Advanced Network Services Virtual Adapter" # Name of the typeperf counter for the server database disk interface server_db_disk_name = "8 D:" # Name of the typeperf counter for the server depot disk interface server_depot_disk_name = "5 F:" # Name of the typeperf counter for the server swap disk interface server_swap_disk_name = "0 C:" # If you use extra global options to connect to the server (say -C winansi), add them here p4_extra_options = "" # Proxy details. If you want, you can add a number of Perforce proxies here. Again, proxies should be Windows # machines (XP or later) and the current user should have admin rights on them. The script will then monitor cpu, # network and disk usage on those machines. Add more below by appending to the list, or leave the list empty # The proxy_list consists of the following items: # Windows networking name of proxy # Nr. of cpu's in proxy # Name of the typeperf counter for the proxy network interface # Name of the typeperf counter for the proxy cache disk interface proxy_list = [ #("p4proxy1", 4, "Intel[R] PRO_1000 MT Network Connection _2", "1 D:"), #("p4proxy2", 4, "Intel[R] PRO_1000 MT Network Connection _2", "1 D:"), ] # Client details. If you want, you can also track the stats of a client. This is useful when profiling, # so you can see where the bottlenecks are. Add details below, as per the proxy_list above client_list = [ #("client", 2, "Realtek RTL8169_8110 Family PCI Gigabit Ethernet NIC [NDIS 6.0]", "1 D:"), ] # This is the number of history pages to keep; they'll usually be fairly small (~25 KB), so a value of 10000 # will give you about a day of history (assuming a 10 second refresh rate) for some 2.5 GB of diskspace history_size = 10000 # Lastly, specify some threshold values that will determine the color of the various output fields # A value less than the first threshold value will give a green background, anything between the two values # will result in yellow, while a value higher than the last threshold will give a red background. # cpu values are percentages from 0 - 100% cpu_threshholds = [50, 75] # disk values are percentages from 0 - 100% disk_threshholds = [50, 75] # network values are in MBps (that's bytes), so adjust these to your network bandwith net_thresholds = [40, 60] # runtime is measured in minutes runtime_thresholds = [30, 60] # Memory is measured in Megabyte. # Available memory decreases with increased usage, so the value measured is actually negated first before # the threshold is applied. Threshold values are therefore negative as well. available_mem_thresholds = [-1000, -500] # Virtual memory thresholds, finding good values for these takes a bit of experimentation p4s_virtual_mem_thresholds = [6000, 8000] # Working set should be safely below actually available memory p4s_working_mem_thresholds = [1500, 3000] # End of configuration section. There is of course more you might want to customize to suit your particular # environment, but you'll need to dig into the script below for that. ######################################################################################################################## import re import sys import traceback import time import os import subprocess import string import socket import platform import getpass import getopt import traceback import smtplib #################################################################################################### def FormatExceptionInfo(maxTBlevel=5) : cla, exc, trbk = sys.exc_info() excName = cla.__name__ try : excArgs = exc.__dict__["args"] except KeyError : excArgs = "" output = "Exception " + excName + " occurred, arguments = " + excArgs + "\r\n" output = output + "Traceback = \r\n" excTb = traceback.format_tb(trbk, maxTBlevel) for tb in excTb : output = output + " " + tb + "\r\n" return output #################################################################################################### msg_template = """\ To: %TO%\r\n\ From: %FROM%\r\n\ Subject: %SUBJECT%\r\n\ \r\n\ """ sender = "p4_profiler_win@" + socket.getfqdn() subject = "[p4_profiler_win] Exception thrown, script will continue." def SendMail(recipient, mailserver, error) : if mailserver and recipient : session = smtplib.SMTP(mailserver) session.set_debuglevel(0) msg = msg_template.replace("%TO%", recipient) msg = msg.replace("%FROM%", sender) msg = msg.replace("%SUBJECT%", subject) msg = msg + error session.sendmail(sender, recipient, msg) session.quit() ######################################################################################################################## def GetTime(time) : time_info = time.split(":") if not time_info or len(time_info) != 3 : return -1 hours = long(time_info[0]) minutes = long(time_info[1]) seconds_info = time_info[2].split(".") seconds = long(seconds_info[0]) msecs = 0 if len(seconds_info) > 1 and seconds_info[1] : msecs = long(seconds_info[1]) return msecs + 1000 * (seconds + 60 * (minutes + 60 * hours)) ######################################################################################################################## prev_process_time = {} def GetProcessInfo() : global prev_process_time process = subprocess.Popen("pslist \\\\" + server_name, 0, None, None, subprocess.PIPE, subprocess.PIPE) pipe = process.stdout lines = pipe.readlines() pipe.close() process.wait() cur_process_time = {} cur_process_mem = {} cur_process_name = {} for line in lines : info = line.split() if len(info) == 8 : tid = int(info[1]) cur_process_name[tid] = info[0] cur_process_mem[tid] = int(info[5]) cur_process_time[tid] = GetTime(info[6]) process_time = {} total_time = 0 p4s_mem_high = 0 p4s_tid = -1 for tid, t in cur_process_time.iteritems() : if prev_process_time.has_key(tid) : process_time[tid] = t - prev_process_time[tid] total_time = total_time + process_time[tid] if cur_process_name[tid] == p4_process_name : if cur_process_mem[tid] > p4s_mem_high : p4s_tid = tid p4s_mem_high = cur_process_mem[tid] prev_process_time = cur_process_time idle_cpu = 0 if process_time.has_key(0) : idle_cpu = int(100 * process_time[0] / total_time + 0.5) p4_server_cpu = 0 if p4s_tid > 0 and process_time.has_key(p4s_tid) : p4_server_cpu = int(100 * process_time[p4s_tid] / total_time + 0.5) other_cpu = 100 - idle_cpu - p4_server_cpu return total_time, idle_cpu, p4_server_cpu, other_cpu ######################################################################################################################## prev_p4_process_time = {} def GetP4Info() : global prev_p4_process_time process = subprocess.Popen("pslist -e -d -m \\\\" + server_name + " " + p4_process_name, 0, None, None, subprocess.PIPE, subprocess.PIPE) pipe = process.stdout lines = pipe.readlines() pipe.close() process.wait() cur_p4_process_time = {} p4_kb_vm = 0 p4_kb_ws = 0 phase = 0 for line in lines : info = line.split() if len(info) == 7 : cur_p4_process_time[info[0]] = GetTime(info[4]) + GetTime(info[5]) if len(info) == 9 and info[0] == p4_process_name : vm = int(info[2]) ws = int(info[3]) if vm > p4_kb_vm : p4_kb_vm = vm if ws > p4_kb_ws : p4_kb_ws = ws p4_process_time = {} for process, t in cur_p4_process_time.iteritems() : if prev_p4_process_time.has_key(process) : p4_process_time[process] = t - prev_p4_process_time[process] prev_p4_process_time = cur_p4_process_time return p4_kb_vm, p4_kb_ws, p4_process_time ######################################################################################################################## def GetMonitorInfo(admin_ok) : p4_info = {} if admin_ok : command = "p4 -u " + p4_admin_name + " -p " + p4_port + " " + p4_extra_options + " monitor show -a -e -l" else : command = "p4 -p " + p4_port + " monitor show" process = subprocess.Popen(command, 0, None, None, subprocess.PIPE, subprocess.PIPE) pipe = process.stdout lines = pipe.readlines() pipe.close() process.wait() tid = 0 tool = "??" connection = "unknown" status = '?' user = "unknown" client = "unknown" t = 0 command = "unknown" monitor_regex = re.compile("\s*(\S*)(.*)\s+([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)\s*(\S)\s*(\S*)\s*(\S*)\s*(\S*)\s*(.*)") for line in lines : if admin_ok : match = monitor_regex.search(line) if match : tid = match.group(1).strip() tool = match.group(2).strip().lower() connection = match.group(3).strip().lower() status = match.group(4).strip() user = match.group(5).strip() client = match.group(6).strip() t = GetTime(match.group(7).strip()) command = match.group(8).strip() else : info = line.split() tid = info[0] status = info[1] user = info[2] t = GetTime(info[3]) command = info[4] p4_info[tid] = tool, connection, status, user, client, t, command return p4_info ######################################################################################################################## def GetMachineStats(machine_name, cpu_count, network_name, disk_name) : ip_address = socket.gethostbyname(machine_name) stats = None net_counter = "\"\\\\" + machine_name + "\\network interface(" + network_name + ")\\Bytes Total/sec\"" disk_counter = "\"\\\\" + machine_name + "\\physicaldisk(" + disk_name + ")\\% Idle Time\"" command = "typeperf -sc 1 " + net_counter + " " + disk_counter for i in range(cpu_count) : command = command + " \"\\\\" + machine_name + "\\Processor(" + repr(i) + ")\\% Processor Time\"" process = subprocess.Popen(command, 0, None, None, subprocess.PIPE, subprocess.PIPE) pipe = process.stdout lines = pipe.readlines() pipe.close() process.wait() for line in lines : info = line.split(',') if len(info) == 3 + cpu_count : if info[0][1] != '(' : net_MBps = -1 disk_busy = -1 net_bps = float(info[1].strip()[1:-1]) if net_bps >= 0 : net_MBps = float(net_bps) / (1024 * 1024) disk_free = float(info[2].strip()[1:-1]) if disk_free >= 0 : disk_busy = int(100.5 - disk_free) cpu = [] for i in range(cpu_count) : cpu.append(int(float(info[3 + i].strip()[1:-1]) + 0.5)) stats = machine_name, ip_address, int(net_MBps), disk_busy, cpu return stats ######################################################################################################################## def GetServerStats() : net_counter = "\"\\\\" + server_name + "\\Network Interface(" + server_network_name + ")\\Bytes Total/sec\"" db_counter = "\"\\\\" + server_name + "\\physicaldisk(" + server_db_disk_name + ")\\% Idle Time\"" depot_counter = "\"\\\\" + server_name + "\\physicaldisk(" + server_depot_disk_name + ")\\% Idle Time\"" swap_counter = "\"\\\\" + server_name + "\\physicaldisk(" + server_swap_disk_name + ")\\% Idle Time\"" mem_av_counter = "\"\\\\" + server_name + "\\Memory\\Available MBytes\"" server_swap_disk_name command = "typeperf -sc 1 " + net_counter + " " + db_counter + " " + depot_counter + " " + swap_counter + " " + mem_av_counter process = subprocess.Popen(command, 0, None, None, subprocess.PIPE, subprocess.PIPE) pipe = process.stdout lines = pipe.readlines() pipe.close() process.wait() net_MBps = -1 db_busy = -1 depot_busy = -1 swap_busy = -1 mem_av = -1 for line in lines : info = line.split(',') if len(info) == 6 : if info[0][1] != '(' : net_bps = float(info[1].strip()[1:-1]) if net_bps >= 0 : net_MBps = net_bps / (1024 * 1024) db_idle = float(info[2].strip()[1:-1]) if db_idle >= 0 : db_busy = int(100.5 - db_idle) depot_idle = float(info[3].strip()[1:-1]) if depot_idle >= 0 : depot_busy = int(100.5 - depot_idle) swap_idle = float(info[4].strip()[1:-1]) if swap_idle >= 0 : swap_busy = int(100.5 - swap_idle) mem_av = int(float(info[5].strip()[1:-1])) return int(net_MBps), db_busy, depot_busy, swap_busy, mem_av ######################################################################################################################## def CheckAdminAccess() : process = subprocess.Popen("p4 -u " + p4_admin_name + " -p " + p4_port + " " + p4_extra_options + " login -s", 0, None, None, subprocess.PIPE, subprocess.PIPE) err_pipe = process.stderr err_output = err_pipe.read() err_pipe.close() process.wait() # if nothing has gone to std_err, we have admin access return not err_output ######################################################################################################################## def Login(password) : process = subprocess.Popen("p4 -u " + p4_admin_name + " -p " + p4_port + " " + p4_extra_options + " login", 0, None, subprocess.PIPE, subprocess.PIPE, subprocess.PIPE) in_pipe = process.stdin err_pipe = process.stderr in_pipe.write(password) in_pipe.close() err_output = err_pipe.read() err_pipe.close() process.wait() return not err_output ######################################################################################################################## hosts = {} def GetHostName(ip_address) : global hosts if hosts.has_key(ip_address) : return hosts[ip_address] try : host = socket.gethostbyaddr(ip_address)[0].split('.')[0] except (socket.herror, socket.gaierror) : host = ip_address hosts[ip_address] = host return host ######################################################################################################################## cur_threads = {} def TidKey(tid) : return cur_threads[tid][5] ######################################################################################################################## def GetStyle(value, thresholds) : if value < thresholds[0] : return "ok" elif value < thresholds[1] : return "high" else : return "critical" ######################################################################################################################## mon_template = None def OutputNavigation(is_history_page, page_count) : global mon_template if not mon_template : mon_template_file = open("p4mon_template.html", 'r') mon_template = mon_template_file.read() mon_template_file.close() output = mon_template if is_history_page : prev_page = (page_count + history_size - 1) % history_size navigate = " " navigate = navigate + "   " navigate = navigate + "" output = output.replace("$navigate", navigate) output = output.replace("$css", "../p4mon.css") output = output.replace("$refresh", "") output = output.replace("$background_color", "'#ffffe0'") else : output = output.replace("$navigate", " ") output = output.replace("$css", "p4mon.css") output = output.replace("$refresh", "") output = output.replace("$background_color", "'#ffffff'") return output ######################################################################################################################## thread_template = None def OutputThreadInfo(tids, cur_threads, p4_threads, total_time) : global thread_template if not thread_template : thread_template_file = open("p4mon_thread_template.html", 'r') thread_template = thread_template_file.read() thread_template_file.close() threads_output = "" for tid in tids : thread_output = thread_template thread_output = thread_output.replace("$tid", tid) tool_info, connection, status, user, client, runtime, command = cur_threads[tid] cpu = 0 if p4_threads.has_key(tid) : cpu = int(100 * p4_threads[tid] / total_time + 0.5) connection = GetHostName(connection) tool_info = tool_info.split('/') tool = tool_info[0] version = "unknown" if len(tool_info) > 1 : version_info = tool_info[1].split() if len(version_info) > 0 : version = version_info[0] hours = int(runtime / (60 * 60 * 1000)) t = runtime % (60 * 60 * 1000) minutes = int(t / (60 * 1000)) t = t % (60 * 1000) seconds = int(t / 1000) time_string = "%02d:%02d:%02d" % (hours, minutes, seconds) thread_output = thread_output.replace("$runtime", time_string) thread_output = thread_output.replace("$style_runtime", GetStyle(runtime / (60 * 1000), runtime_thresholds)) thread_output = thread_output.replace("$user", user) thread_output = thread_output.replace("$cpu", repr(cpu)) core_cpu_thresholds = [cpu_threshholds[0] / server_cpu_count, cpu_threshholds[1] / server_cpu_count] thread_output = thread_output.replace("$style_cpu", GetStyle(cpu, core_cpu_thresholds)) thread_output = thread_output.replace("$tool", tool) thread_output = thread_output.replace("$version", version) thread_output = thread_output.replace("$connection", connection) thread_output = thread_output.replace("$client", client) split_command = command.split(None, 1) command = split_command[0] thread_output = thread_output.replace("$command", command) parameters = "" if len(split_command) > 1 : parameters = split_command[1] thread_output = thread_output.replace("$parameters", parameters) threads_output = threads_output + thread_output return threads_output ######################################################################################################################## machine_header = None machine_template = None def OutputMachineInfo(machine_stats, machine_list, machine_type) : global machine_header if not machine_header : machine_header_file = open("p4mon_machine_header.html", 'r') machine_header = machine_header_file.read() machine_header_file.close() global machine_template if not machine_template : machine_template_file = open("p4mon_machine_template.html", 'r') machine_template = machine_template_file.read() machine_template_file.close() output = "" if machine_stats : output = machine_header.replace("$machine_type", machine_type) machines_output = "" for stats in machine_stats : machine_name, machine_address, machine_net_MBps, disk_busy, cpu = stats machine_output = machine_template.replace("$machine_name", machine_name) machine_output = machine_output.replace("$machine_address", machine_address) machine_cpu_count = 1 for machine_settings in machine_list : machine_settings_name, machine_settings_cpu_count, machine_settings_network_name, machine_settings_disk_name = machine_settings if machine_settings_name == machine_name : machine_cpu_count = machine_settings_cpu_count for machine_index in range(4) : if machine_index < machine_cpu_count : machine_output = machine_output.replace("$machine_cpu" + repr(machine_index + 1), repr(cpu[machine_index]) + "%") machine_output = machine_output.replace("$style_machine_cpu" + repr(machine_index + 1), GetStyle(cpu[machine_index], cpu_threshholds)) else : machine_output = machine_output.replace("$machine_cpu" + repr(machine_index + 1), " ") machine_output = machine_output.replace("$style_machine_cpu" + repr(machine_index + 1), GetStyle(0, cpu_threshholds)) machine_output = machine_output.replace("$machine_net", repr(machine_net_MBps)) machine_output = machine_output.replace("$style_machine_net", GetStyle(machine_net_MBps, net_thresholds)) machine_output = machine_output.replace("$machine_disk", repr(disk_busy)) machine_output = machine_output.replace("$style_machine_disk", GetStyle(disk_busy, disk_threshholds)) machines_output = machines_output + machine_output output = output.replace("$machine_stats", machines_output) return output ######################################################################################################################## def OutputServerInfo(output, p4_info, process_info, server_stats) : p4_kb_vm, p4_kb_ws, p4_threads = p4_info total_time, idle_cpu, p4_server_cpu, other_cpu = process_info net_MBps, db_busy, depot_busy, swap_busy, mem_av = server_stats output = output.replace("$server_name", socket.getfqdn(server_name).lower()) output = output.replace("$p4_process_name", p4_process_name) output = output.replace("$cpu_p4s", repr(p4_server_cpu)) output = output.replace("$style_cpu_p4s", GetStyle(p4_server_cpu, cpu_threshholds)) output = output.replace("$cpu_idle", repr(idle_cpu)) output = output.replace("$style_cpu_idle", GetStyle(100 - idle_cpu, cpu_threshholds)) output = output.replace("$cpu_other", repr(other_cpu)) output = output.replace("$style_cpu_other", GetStyle(other_cpu, cpu_threshholds)) output = output.replace("$mem_av", repr(mem_av)) output = output.replace("$style_mem_av", GetStyle(-mem_av, available_mem_thresholds)) p4_mb_vm = p4_kb_vm / 1024 output = output.replace("$mem_p4s_virtual", repr(p4_mb_vm)) output = output.replace("$style_mem_p4s_virtual", GetStyle(p4_mb_vm, p4s_virtual_mem_thresholds)) p4_mb_ws = p4_kb_ws / 1024 output = output.replace("$mem_p4s_ws", repr(p4_mb_ws)) output = output.replace("$style_mem_p4s_ws", GetStyle(p4_mb_ws, p4s_working_mem_thresholds)) output = output.replace("$net", repr(net_MBps)) output = output.replace("$style_net", GetStyle(net_MBps, net_thresholds)) output = output.replace("$disk_swap", repr(swap_busy)) output = output.replace("$style_disk_swap", GetStyle(swap_busy, disk_threshholds)) output = output.replace("$disk_db", repr(db_busy)) output = output.replace("$style_disk_db", GetStyle(db_busy, disk_threshholds)) output = output.replace("$disk_depot", repr(depot_busy)) output = output.replace("$style_disk_depot", GetStyle(depot_busy, disk_threshholds)) output = output.replace("$write_time", repr(time.time() * 1000)) return output ######################################################################################################################## usage_string = "\ Script that monitors a Perforce server. Outputs a html page.\n\n\ Usage:\n\ p4_profiler_win.py [-?] [-o filename] [-e address] [-m mailserver]\n\ -? display this message\n\ -o filename name of html output file (default = p4mon.html)\n\ -e address email any errors to the specified address\n\ -m mailserver address of mailserver to use\ " # We need to be on at least Windows XP, as the Windows 2000 typeperf command is too limited if os.name != "nt" or platform.platform().find("2000") >= 0 : print "This script needs at least Windows XP to run." sys.exit(1) try : opts, args = getopt.getopt(sys.argv[1:], "?o:e:m:") except getopt.GetoptError : print usage_string sys.exit(1) html_filename = "p4mon.html" email_address = None mailserver = None for opt, opt_arg in opts : if opt == "-?" : print usage_string sys.exit(0) if opt == "-o" : html_filename = opt_arg if opt == "-e" : email_address = opt_arg if opt == "-m" : mailserver = opt_arg # If we need to maintain history, make sure the directory is there and empty if history_size > 0 : ap = os.path.abspath(html_filename) history_dir = os.path.dirname(ap) + "/history" if os.path.exists(history_dir) : for f in os.listdir(history_dir) : os.remove(history_dir + "/" + f) else : os.mkdir(history_dir) # See if we can get admin access for more detailed monitor info admin_ok = False if CheckAdminAccess() : admin_ok = True else : password = getpass.getpass("Admin password = ") if password : admin_ok = Login(password) page_count = 0 last_exception_mail_time = 0 while True : # The entire main loop is inside a try block so we can catch all exceptions, send out an email # (if an address was specified) and continue monitoring try : # We begin with collecting the stats of all machines p4_info = GetP4Info() cur_threads = GetMonitorInfo(admin_ok) tids = cur_threads.keys() tids.sort(None, TidKey, True) process_info = GetProcessInfo() server_stats = GetServerStats() proxy_stats = [] for proxy_settings in proxy_list : proxy_name, proxy_cpu_count, proxy_network_name, proxy_disk_name = proxy_settings stats = GetMachineStats(proxy_name, proxy_cpu_count, proxy_network_name, proxy_disk_name) if stats : proxy_stats.append(stats) client_stats = [] for client_settings in client_list : client_name, client_cpu_count, client_network_name, client_disk_name = client_settings stats = GetMachineStats(client_name, client_cpu_count, client_network_name, client_disk_name) if stats : client_stats.append(stats) # Now that we have all the information, we write the output html files # We do two passes, during the first we write the history page, the second one will write the live page for p in range(2) : if p == 0 and history_size <= 0 : continue output = OutputNavigation(p == 0, page_count) output = OutputServerInfo(output, p4_info, process_info, server_stats) p4_kb_vm, p4_kb_ws, p4_threads = p4_info total_time, idle_cpu, p4_server_cpu, other_cpu = process_info threads_output = OutputThreadInfo(tids, cur_threads, p4_threads, total_time) output = output.replace("$p4_threads", threads_output) proxy_output = OutputMachineInfo(proxy_stats, proxy_list, "Proxy") output = output.replace("$proxy_info", proxy_output) client_output = OutputMachineInfo(client_stats, client_list, "Client") output = output.replace("$client_info", client_output) if p == 0 : # remove the next page, so prev/next links don't cross the boundary next_page = (page_count + 1) % history_size next_page_name = history_dir + "/p4mon-" + repr(next_page) + ".html" if os.path.exists(next_page_name) : os.remove(next_page_name) output_file = open(history_dir + "/p4mon-" + repr(page_count) + ".html" , 'w') output_file.write(output) output_file.close() else : output_file = open(html_filename, 'w') output_file.write(output) output_file.close() page_count = (page_count + 1) % history_size time.sleep(refresh_delay) except : cur_time = time.clock() if cur_time - last_exception_mail_time > 3600 : # only one mail / hour, please if email_address and mailserver : error_msg = FormatExceptionInfo() SendMail(email_address, mailserver, error_msg) last_exception_mail_time = cur_time # for debugging purposes, it's usually pratical to re-raise any thrown exceptions, uncomment the next line to make that happen. #raise