#!/bin/bash
# Generate monitoring metrics for a replica for use with Prometheus (collected via node_explorer)
# If required, put this job into perforce user crontab, e.g. every 10 minutes (to avoid too much load)
#
# */10 * * * * /p4/common/site/bin/monitor_replica_metrics.sh $INSTANCE > /dev/null 2>&1 ||:
#
# Very similar to monitor_metrics.sh but that gets run more frequently.
#
# Please note you need to make sure that the specified directory below (which may be linked)
# can be read by the node_exporter user (and is setup via --collector.textfile.directory parameter)
#
# Note we use a tempfile for each metric to avoid partial reads. Textfile collector only looks for files
# ending in .prom so we do a finale rename when ready
if [[ -z "${BASH_VERSINFO}" ]] || [[ -z "${BASH_VERSINFO[0]}" ]] || [[ ${BASH_VERSINFO[0]} -lt 4 ]]; then
echo "This script requires Bash version >= 4";
exit 1;
fi
# This might also be /hxlogs/metrics
metrics_root=/p4/metrics
SDP_INSTANCE=${SDP_INSTANCE:-Unset}
SDP_INSTANCE=${1:-$SDP_INSTANCE}
if [[ $SDP_INSTANCE == Unset ]]; then
echo -e "\\nError: Instance parameter not supplied.\\n"
echo "You must supply the Perforce SDP instance as a parameter to this script."
exit 1
fi
# Load SDP controlled shell environment.
# shellcheck disable=SC1091
source /p4/common/bin/p4_vars "$SDP_INSTANCE" ||\
{ echo -e "\\nError: Failed to load SDP environment.\\n"; exit 1; }
p4="$P4BIN -u $P4USER -p $P4PORT"
# Get server id
SERVER_ID=$($p4 serverid | awk '{print $3}')
SERVER_ID=${SERVER_ID:-unset}
monitor_pull () {
# p4 pull -l metrics
fname="$metrics_root/p4_pull-${SDP_INSTANCE}-${SERVER_ID}.prom"
tmpfname="$fname.$$"
pullfile="/tmp/pull.out"
$p4 pull -l > "$pullfile" 2> /dev/null
echo "# HELP p4_pull_errors P4 pull transfers failed count" > "$tmpfname"
echo "# TYPE p4_pull_errors counter" >> "$tmpfname"
count=$(grep -Ea "failed\.$" "$pullfile" | wc -l)
echo "p4_pull_errors{serverid=\"$SERVER_ID\",sdpinst=\"$SDP_INSTANCE\"} $count" >> "$tmpfname"
echo "# HELP p4_pull_queue P4 pull files in queue count" >> "$tmpfname"
echo "# TYPE p4_pull_queue counter" >> "$tmpfname"
count=$(grep -vEa "failed\.$" "$pullfile" | wc -l)
echo "p4_pull_queue{serverid=\"$SERVER_ID\",sdpinst=\"$SDP_INSTANCE\"} $count" >> "$tmpfname"
mv "$tmpfname" "$fname"
}
monitor_pull
# Make sure all readable by node_exporter or other user
chmod 755 $metrics_root/*.prom
| # | Change | User | Description | Committed | |
|---|---|---|---|---|---|
| #2 | 26677 | Robert Cowham |
Definitive version of these files are in the github project github.com/perforce/p4prometheus/demo |
||
| #1 | 26032 | Robert Cowham |
Latest version. Includes one for replicas to monitor pull queue size |