p4_network_latency.sh.j2 #1

  • //
  • guest/
  • russell_jackson/
  • ansible-sdp/
  • roles/
  • perforce-sdp-monitoring/
  • templates/
  • p4_network_latency.sh.j2
  • View
  • Commits
  • Open Download .zip Download (1 KB)
#!/bin/bash
# Network latency monitoring - pings the commit server and records latency
# Managed by Ansible - do not edit manually

METRICS_DIR="{{ p4prometheus_metrics_dir }}"
METRICS_FILE="${METRICS_DIR}/p4_network_latency.prom"
TMP_FILE="${METRICS_FILE}.$$"
TARGET="{{ commit_dns }}"

# Send 3 pings with 1 second timeout
ping_output=$(ping -c 3 -W 1 "$TARGET" 2>&1)
ping_rc=$?

if [ $ping_rc -eq 0 ]; then
    target_reachable=1
    # Extract avg latency from "min/avg/max/mdev = x/x/x/x ms"
    avg_ms=$(echo "$ping_output" | tail -1 | sed -E 's|.*/([0-9.]+)/.*|\1|')
    packet_loss=$(echo "$ping_output" | grep -oP '[0-9.]+(?=% packet loss)')
else
    target_reachable=0
    avg_ms=0
    packet_loss=100
fi

cat > "$TMP_FILE" <<EOF
# HELP p4_network_target_reachable Whether the commit server is reachable via ping.
# TYPE p4_network_target_reachable gauge
p4_network_target_reachable{target="$TARGET"} $target_reachable
# HELP p4_network_latency_ms Average ping latency to commit server in milliseconds.
# TYPE p4_network_latency_ms gauge
p4_network_latency_ms{target="$TARGET"} $avg_ms
# HELP p4_network_packet_loss_percent Packet loss percentage to commit server.
# TYPE p4_network_packet_loss_percent gauge
p4_network_packet_loss_percent{target="$TARGET"} $packet_loss
# HELP p4_network_latency_timestamp_seconds Unix timestamp of last latency check.
# TYPE p4_network_latency_timestamp_seconds gauge
p4_network_latency_timestamp_seconds{target="$TARGET"} $(date +%s)
EOF

mv "$TMP_FILE" "$METRICS_FILE"
# Change User Description Committed
#1 32507 Russell C. Jackson (Rusty) Fix monitoring role bugs and add health check, network latency, and disk space monitoring.

- Fix circular symlink (src and dest were identical)
- Fix force_apt_get on generic package module (split by OS family)
- Add missing become:yes on privileged tasks
- Add perforce_location defaults to prevent undefined variable errors
- Make case sensitivity check query live server via p4 info
- Remove redundant tasks already handled by install_p4prom.sh
- Remove unused handlers
- Add p4 health check probe (p4 info liveness and response time)
- Add network latency monitoring (ping commit server)
- Add disk space monitoring with configurable warn/crit thresholds