monitor_replica_metrics.sh #1

  • //
  • guest/
  • perforce_software/
  • sdp/
  • dev/
  • Server/
  • Unix/
  • p4/
  • common/
  • site/
  • bin/
  • monitor_replica_metrics.sh
  • View
  • Commits
  • Open Download .zip Download (2 KB)
#!/bin/bash
# Generate monitoring metrics for a replica for use with Prometheus (collected via node_explorer)
# If required, put this job into perforce user crontab, e.g. every 10 minutes (to avoid too much load)
#
#   */10 * * * * /p4/common/site/bin/monitor_replica_metrics.sh $INSTANCE > /dev/null 2>&1 ||:
#
# Very similar to monitor_metrics.sh but that gets run more frequently.
#
# Please note you need to make sure that the specified directory below (which may be linked)
# can be read by the node_exporter user (and is setup via --collector.textfile.directory parameter)
#
# Note we use a tempfile for each metric to avoid partial reads. Textfile collector only looks for files
# ending in .prom so we do a finale rename when ready

if [[ -z "${BASH_VERSINFO}" ]] || [[ -z "${BASH_VERSINFO[0]}" ]] || [[ ${BASH_VERSINFO[0]} -lt 4 ]]; then
    echo "This script requires Bash version >= 4"; 
    exit 1; 
fi

# This might also be /hxlogs/metrics
metrics_root=/p4/metrics
 
SDP_INSTANCE=${SDP_INSTANCE:-Unset} 
SDP_INSTANCE=${1:-$SDP_INSTANCE} 
if [[ $SDP_INSTANCE == Unset ]]; then 
   echo -e "\\nError: Instance parameter not supplied.\\n" 
   echo "You must supply the Perforce SDP instance as a parameter to this script." 
   exit 1 
fi 
 
# Load SDP controlled shell environment.
# shellcheck disable=SC1091
source /p4/common/bin/p4_vars "$SDP_INSTANCE" ||\
   { echo -e "\\nError: Failed to load SDP environment.\\n"; exit 1; }
 
p4="$P4BIN -u $P4USER -p $P4PORT"
 
# Get server id
SERVER_ID=$($p4 serverid | awk '{print $3}')
SERVER_ID=${SERVER_ID:-unset}


monitor_pull () {
    # p4 pull -l metrics
    fname="$metrics_root/p4_pull-${SDP_INSTANCE}-${SERVER_ID}.prom"
    tmpfname="$fname.$$"
    pullfile="/tmp/pull.out"
    $p4 pull -l > "$pullfile" 2> /dev/null 
    echo "# HELP p4_pull_errors P4 pull transfers failed count" > "$tmpfname"
    echo "# TYPE p4_pull_errors counter" >> "$tmpfname"
    count=$(grep -Ea "failed\.$" "$pullfile" | wc -l)
    echo "p4_pull_errors{serverid=\"$SERVER_ID\",sdpinst=\"$SDP_INSTANCE\"} $count" >> "$tmpfname"
 
    echo "# HELP p4_pull_queue P4 pull files in queue count" >> "$tmpfname"
    echo "# TYPE p4_pull_queue counter" >> "$tmpfname"
    count=$(grep -vEa "failed\.$" "$pullfile" | wc -l)
    echo "p4_pull_queue{serverid=\"$SERVER_ID\",sdpinst=\"$SDP_INSTANCE\"} $count" >> "$tmpfname"
 
    mv "$tmpfname" "$fname"
}

monitor_pull

# Make sure all readable by node_exporter or other user
chmod 755 $metrics_root/*.prom
# Change User Description Committed
#2 26677 Robert Cowham Definitive version of these files are in the github project
github.com/perforce/p4prometheus/demo
#1 26032 Robert Cowham Latest version.
Includes one for replicas to monitor pull queue size