#!/bin/bash
# Network latency monitoring - pings the commit server and records latency
# Managed by Ansible - do not edit manually
METRICS_DIR="{{ p4prometheus_metrics_dir }}"
METRICS_FILE="${METRICS_DIR}/p4_network_latency.prom"
TMP_FILE="${METRICS_FILE}.$$"
TARGET="{{ commit_dns }}"
# Send 3 pings with 1 second timeout
ping_output=$(ping -c 3 -W 1 "$TARGET" 2>&1)
ping_rc=$?
if [ $ping_rc -eq 0 ]; then
target_reachable=1
# Extract avg latency from "min/avg/max/mdev = x/x/x/x ms"
avg_ms=$(echo "$ping_output" | tail -1 | sed -E 's|.*/([0-9.]+)/.*|\1|')
packet_loss=$(echo "$ping_output" | grep -oP '[0-9.]+(?=% packet loss)')
else
target_reachable=0
avg_ms=0
packet_loss=100
fi
cat > "$TMP_FILE" <<EOF
# HELP p4_network_target_reachable Whether the commit server is reachable via ping.
# TYPE p4_network_target_reachable gauge
p4_network_target_reachable{target="$TARGET"} $target_reachable
# HELP p4_network_latency_ms Average ping latency to commit server in milliseconds.
# TYPE p4_network_latency_ms gauge
p4_network_latency_ms{target="$TARGET"} $avg_ms
# HELP p4_network_packet_loss_percent Packet loss percentage to commit server.
# TYPE p4_network_packet_loss_percent gauge
p4_network_packet_loss_percent{target="$TARGET"} $packet_loss
# HELP p4_network_latency_timestamp_seconds Unix timestamp of last latency check.
# TYPE p4_network_latency_timestamp_seconds gauge
p4_network_latency_timestamp_seconds{target="$TARGET"} $(date +%s)
EOF
mv "$TMP_FILE" "$METRICS_FILE"
| # | Change | User | Description | Committed | |
|---|---|---|---|---|---|
| #1 | 32507 | Russell C. Jackson (Rusty) |
Fix monitoring role bugs and add health check, network latency, and disk space monitoring. - Fix circular symlink (src and dest were identical) - Fix force_apt_get on generic package module (split by OS family) - Add missing become:yes on privileged tasks - Add perforce_location defaults to prevent undefined variable errors - Make case sensitivity check query live server via p4 info - Remove redundant tasks already handled by install_p4prom.sh - Remove unused handlers - Add p4 health check probe (p4 info liveness and response time) - Add network latency monitoring (ping commit server) - Add disk space monitoring with configurable warn/crit thresholds |