p4_healthcheck.sh.j2 #1

#!/bin/bash
# P4 health check probe - writes Prometheus metrics to textfile directory
# Managed by Ansible - do not edit manually

METRICS_DIR="{{ p4prometheus_metrics_dir }}"
METRICS_FILE="${METRICS_DIR}/p4_healthcheck.prom"
TMP_FILE="${METRICS_FILE}.$$"
INSTANCE="{{ perforce_id }}"

source /p4/common/bin/p4_vars "$INSTANCE"

# Run p4 info and capture timing
start_time=$(date +%s%N)
p4_output=$(p4 -ztag info 2>&1)
p4_rc=$?
end_time=$(date +%s%N)

duration_ms=$(( (end_time - start_time) / 1000000 ))

# Determine if server is up (1) or down (0)
if [ $p4_rc -eq 0 ]; then
    p4_up=1
else
    p4_up=0
fi

cat > "$TMP_FILE" <<EOF
# HELP p4_up Whether the Perforce server is responding to p4 info.
# TYPE p4_up gauge
p4_up{instance="$INSTANCE"} $p4_up
# HELP p4_healthcheck_duration_ms Duration of p4 info command in milliseconds.
# TYPE p4_healthcheck_duration_ms gauge
p4_healthcheck_duration_ms{instance="$INSTANCE"} $duration_ms
# HELP p4_healthcheck_timestamp_seconds Unix timestamp of last health check.
# TYPE p4_healthcheck_timestamp_seconds gauge
p4_healthcheck_timestamp_seconds{instance="$INSTANCE"} $(date +%s)
EOF

mv "$TMP_FILE" "$METRICS_FILE"

#	Change	User	Description	Committed
#1	32507	Russell C. Jackson (Rusty)	Fix monitoring role bugs and add health check, network latency, and disk space monitoring. - Fix circular symlink (src and dest were identical) - Fix force_apt_get on generic package module (split by OS family) - Add missing become:yes on privileged tasks - Add perforce_location defaults to prevent undefined variable errors - Make case sensitivity check query live server via p4 info - Remove redundant tasks already handled by install_p4prom.sh - Remove unused handlers - Add p4 health check probe (p4 info liveness and response time) - Add network latency monitoring (ping commit server) - Add disk space monitoring with configurable warn/crit thresholds