p4_disk_space.sh.j2 #1

  • //
  • guest/
  • russell_jackson/
  • ansible-sdp/
  • roles/
  • perforce-sdp-monitoring/
  • templates/
  • p4_disk_space.sh.j2
  • View
  • Commits
  • Open Download .zip Download (2 KB)
#!/bin/bash
# Disk space monitoring for SDP volumes - writes Prometheus metrics
# Managed by Ansible - do not edit manually

METRICS_DIR="{{ p4prometheus_metrics_dir }}"
METRICS_FILE="${METRICS_DIR}/p4_disk_space.prom"
TMP_FILE="${METRICS_FILE}.$$"
INSTANCE="{{ perforce_id }}"

# SDP volume paths to monitor
VOLUMES=(
{% for vol in p4_monitor_volumes %}
    "{{ vol }}"
{% endfor %}
)

# Thresholds (percentage used)
WARN_THRESHOLD={{ p4_disk_warn_percent }}
CRIT_THRESHOLD={{ p4_disk_crit_percent }}

{
echo "# HELP p4_disk_total_bytes Total disk space in bytes."
echo "# TYPE p4_disk_total_bytes gauge"
echo "# HELP p4_disk_used_bytes Used disk space in bytes."
echo "# TYPE p4_disk_used_bytes gauge"
echo "# HELP p4_disk_avail_bytes Available disk space in bytes."
echo "# TYPE p4_disk_avail_bytes gauge"
echo "# HELP p4_disk_used_percent Percentage of disk space used."
echo "# TYPE p4_disk_used_percent gauge"
echo "# HELP p4_disk_alert Disk space alert level: 0=ok, 1=warning, 2=critical."
echo "# TYPE p4_disk_alert gauge"

for vol in "${VOLUMES[@]}"; do
    if [ -d "$vol" ]; then
        # Use df with 1K blocks for byte precision
        read -r total used avail pct <<< $(df -k "$vol" | tail -1 | awk '{print $2, $3, $4, $5}')
        pct_num=${pct%\%}

        total_bytes=$((total * 1024))
        used_bytes=$((used * 1024))
        avail_bytes=$((avail * 1024))

        if [ "$pct_num" -ge "$CRIT_THRESHOLD" ]; then
            alert=2
        elif [ "$pct_num" -ge "$WARN_THRESHOLD" ]; then
            alert=1
        else
            alert=0
        fi

        echo "p4_disk_total_bytes{instance=\"$INSTANCE\",volume=\"$vol\"} $total_bytes"
        echo "p4_disk_used_bytes{instance=\"$INSTANCE\",volume=\"$vol\"} $used_bytes"
        echo "p4_disk_avail_bytes{instance=\"$INSTANCE\",volume=\"$vol\"} $avail_bytes"
        echo "p4_disk_used_percent{instance=\"$INSTANCE\",volume=\"$vol\"} $pct_num"
        echo "p4_disk_alert{instance=\"$INSTANCE\",volume=\"$vol\"} $alert"
    fi
done

echo "# HELP p4_disk_check_timestamp_seconds Unix timestamp of last disk check."
echo "# TYPE p4_disk_check_timestamp_seconds gauge"
echo "p4_disk_check_timestamp_seconds{instance=\"$INSTANCE\"} $(date +%s)"
} > "$TMP_FILE"

mv "$TMP_FILE" "$METRICS_FILE"
# Change User Description Committed
#1 32507 Russell C. Jackson (Rusty) Fix monitoring role bugs and add health check, network latency, and disk space monitoring.

- Fix circular symlink (src and dest were identical)
- Fix force_apt_get on generic package module (split by OS family)
- Add missing become:yes on privileged tasks
- Add perforce_location defaults to prevent undefined variable errors
- Make case sensitivity check query live server via p4 info
- Remove redundant tasks already handled by install_p4prom.sh
- Remove unused handlers
- Add p4 health check probe (p4 info liveness and response time)
- Add network latency monitoring (ping commit server)
- Add disk space monitoring with configurable warn/crit thresholds