p4_healthcheck.sh.j2 #1

  • //
  • guest/
  • russell_jackson/
  • ansible-sdp/
  • roles/
  • perforce-sdp-monitoring/
  • templates/
  • p4_healthcheck.sh.j2
  • View
  • Commits
  • Open Download .zip Download (1 KB)
#!/bin/bash
# P4 health check probe - writes Prometheus metrics to textfile directory
# Managed by Ansible - do not edit manually

METRICS_DIR="{{ p4prometheus_metrics_dir }}"
METRICS_FILE="${METRICS_DIR}/p4_healthcheck.prom"
TMP_FILE="${METRICS_FILE}.$$"
INSTANCE="{{ perforce_id }}"

source /p4/common/bin/p4_vars "$INSTANCE"

# Run p4 info and capture timing
start_time=$(date +%s%N)
p4_output=$(p4 -ztag info 2>&1)
p4_rc=$?
end_time=$(date +%s%N)

duration_ms=$(( (end_time - start_time) / 1000000 ))

# Determine if server is up (1) or down (0)
if [ $p4_rc -eq 0 ]; then
    p4_up=1
else
    p4_up=0
fi

cat > "$TMP_FILE" <<EOF
# HELP p4_up Whether the Perforce server is responding to p4 info.
# TYPE p4_up gauge
p4_up{instance="$INSTANCE"} $p4_up
# HELP p4_healthcheck_duration_ms Duration of p4 info command in milliseconds.
# TYPE p4_healthcheck_duration_ms gauge
p4_healthcheck_duration_ms{instance="$INSTANCE"} $duration_ms
# HELP p4_healthcheck_timestamp_seconds Unix timestamp of last health check.
# TYPE p4_healthcheck_timestamp_seconds gauge
p4_healthcheck_timestamp_seconds{instance="$INSTANCE"} $(date +%s)
EOF

mv "$TMP_FILE" "$METRICS_FILE"
# Change User Description Committed
#1 32507 Russell C. Jackson (Rusty) Fix monitoring role bugs and add health check, network latency, and disk space monitoring.

- Fix circular symlink (src and dest were identical)
- Fix force_apt_get on generic package module (split by OS family)
- Add missing become:yes on privileged tasks
- Add perforce_location defaults to prevent undefined variable errors
- Make case sensitivity check query live server via p4 info
- Remove redundant tasks already handled by install_p4prom.sh
- Remove unused handlers
- Add p4 health check probe (p4 info liveness and response time)
- Add network latency monitoring (ping commit server)
- Add disk space monitoring with configurable warn/crit thresholds