monitor_metrics.sh #1

  • //
  • guest/
  • perforce_software/
  • sdp/
  • main/
  • Server/
  • Unix/
  • p4/
  • common/
  • site/
  • bin/
  • monitor_metrics.sh
  • View
  • Commits
  • Open Download .zip Download (4 KB)
#!/bin/bash
# Generate monitoring metrics for use with Prometheus (collected via node_explorer)
# If required, put this job into perforce user crontab:
#
#   */1 * * * * /p4/common/site/bin/monitor_metrics.sh $INSTANCE > /dev/null 2>&1 ||:
#
# Please note you need to make sure that the specified directory below (which may be linked)
# can be read by the node_exporter user (and is setup via --collector.textfile.directory parameter)
#
# Note we use a tempfile for each metric to avoid partial reads. Textfile collector only looks for files
# ending in .prom so we do a finale rename when ready

# This might also be /hxlogs/metrics
metrics_root=/p4/metrics

SDP_INSTANCE=${SDP_INSTANCE:-Unset} 
SDP_INSTANCE=${1:-$SDP_INSTANCE} 
if [[ $SDP_INSTANCE == Unset ]]; then 
   echo -e "\\nError: Instance parameter not supplied.\\n" 
   echo "You must supply the Perforce SDP instance as a parameter to this script." 
   exit 1 
fi 

# Load SDP controlled shell environment.
# shellcheck disable=SC1091
source /p4/common/bin/p4_vars "$SDP_INSTANCE" ||\
   { echo -e "\\nError: Failed to load SDP environment.\\n"; exit 1; }

p4="$P4BIN -u $P4USER -p $P4PORT"

# Get server id
SERVER_ID=$($p4 serverid | awk '{print $3}')
SERVER_ID=${SERVER_ID:-unset}

monitor_uptime () {
    # Server uptime as a simple seconds parameter - parsed from p4 info:
    # Server uptime: 168:39:20
    fname="$metrics_root/p4_uptime-${SDP_INSTANCE}-${SERVER_ID}.prom"
    tmpfname="$fname.$$"
    uptime=$($p4 info 2>&1 | grep uptime | awk '{print $3}')
    [[ -z "$uptime" ]] && uptime="0:0:0"
    uptime=${uptime//:/ }
    arr=($uptime)
    hours=${arr[0]}
    mins=${arr[1]}
    secs=${arr[2]}
    #echo $hours $mins $secs
    # Ensure base 10 arithmetic used to avoid overflow errors
    uptime_secs=$(((10#$hours * 3600) + (10#$mins * 60) + 10#$secs))
    echo -e "# HELP p4_server_uptime P4D Server uptime (seconds)\\n" > "$tmpfname"
    echo -e "# TYPE p4_server_uptime counter\\n" >> "$tmpfname"
    echo -e "p4_server_uptime{serverid=\"$SERVER_ID\",sdpinst=\"$SDP_INSTANCE\"} $uptime_secs\\n" >> "$tmpfname"
    mv "$tmpfname" "$fname"
}

monitor_change () {
    # Latest changelist counter as single counter value
    fname="$metrics_root/p4_change-${SDP_INSTANCE}-${SERVER_ID}.prom"
    tmpfname="$fname.$$"
    curr_change=$($p4 counters 2>&1 | grep change | awk '{print $3}')
    if [[ ! -z "$curr_change" ]]; then
        echo -e "# HELP p4_change_counter P4D change counter\\n" > "$tmpfname"
        echo -e "# TYPE p4_change_counter counter\\n" >> "$tmpfname"
        echo -e "p4_change_counter{serverid=\"$SERVER_ID\",sdpinst=\"$SDP_INSTANCE\"} $curr_change\\n" >> "$tmpfname"
        mv "$tmpfname" "$fname"
    fi 
}

monitor_processes () {
    # Monitor metrics summarised by cmd or user
    fname="$metrics_root/p4_monitor-${SDP_INSTANCE}-${SERVER_ID}.prom"
    tmpfname="$fname.$$"
    monfile="/tmp/mon.out"
    $p4 monitor show > "$monfile" 2> /dev/null 
    echo -e "# HELP p4_monitor_by_cmd P4 running processes\\n" > "$tmpfname"
    echo -e "# TYPE p4_monitor_by_cmd counter\\n" >> "$tmpfname"
    awk '{print $5}' "$monfile" | sort | uniq -c | while read count cmd
    do
        echo -e "p4_monitor_by_cmd{serverid=\"$SERVER_ID\",sdpinst=\"%$SDP_INSTANCE\",cmd=\"$cmd\"} $count\\n" >> "$tmpfname"
    done

    echo -e "# HELP p4_monitor_by_user P4 running processes\\n" >> "$tmpfname"
    echo -e "# TYPE p4_monitor_by_user counter\\n" >> "$tmpfname"
    awk '{print $3}' "$monfile" | sort | uniq -c | while read count user
    do
        echo -e "p4_monitor_by_user{serverid=\"$SERVER_ID\",sdpinst=\"%$SDP_INSTANCE\",user=\"$user\"} $count\\n" >> "$tmpfname"
    done

    mv "$tmpfname" "$fname"
}

monitor_completed_cmds () {
    # Metric for completed commands by parsing log file - might be considered expensive to compute as log grows.
    fname="$metrics_root/p4_completed_cmds-${SDP_INSTANCE}-${SERVER_ID}.prom"
    tmpfname="$fname.$$"
    num_cmds=$(grep -c completed "/p4/$SDP_INSTANCE/logs/log")
    echo -e "#HELP p4_completed_cmds_per_day Completed p4 commands\\n" > "$tmpfname"
    echo -e "#TYPE p4_completed_cmds_per_day counter\\n" >> "$tmpfname"
    echo -e "p4_completed_cmds_per_day{serverid=\"$SERVER_ID\",sdpinst=\"$SDP_INSTANCE\"} $num_cmds\\n" >> "$tmpfname"
    mv "$tmpfname" "$fname"
}

monitor_uptime
monitor_change
monitor_processes
monitor_completed_cmds
# Make sure all readable by node_explorer or other user
chmod 755 $metrics_root/*.prom
# Change User Description Committed
#3 27331 C. Thomas Tyler Released SDP 2020.1.27325 (2021/01/29).
Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#2 26161 C. Thomas Tyler Released SDP 2019.3.26159 (2019/11/06).
Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
#1 25933 C. Thomas Tyler Released SDP 2019.2.25923 (2019/08/05).
Copy Up using 'p4 copy -r -b perforce_software-sdp-dev'.
//guest/perforce_software/sdp/dev/Server/Unix/p4/common/site/bin/monitor_metrics.sh
#1 25745 Robert Cowham Monitor metrics - tidied up