solana/scripts/oom-monitor.sh

37 lines
880 B
Bash
Raw Normal View History

#!/usr/bin/env bash
2018-08-07 12:38:13 -07:00
#
# Reports Linux OOM Killer activity
#
2018-11-11 09:25:59 -08:00
set -e
2018-08-07 12:38:13 -07:00
2018-09-08 17:46:43 -07:00
cd "$(dirname "$0")"
# shellcheck source=scripts/oom-score-adj.sh
2018-09-08 17:46:43 -07:00
source oom-score-adj.sh
# shellcheck source=scripts/configure-metrics.sh
source configure-metrics.sh
2018-08-07 12:38:13 -07:00
2018-09-08 17:46:43 -07:00
[[ $(uname) = Linux ]] || exit 0
2018-08-07 12:38:13 -07:00
syslog=/var/log/syslog
2018-09-08 17:46:43 -07:00
[[ -r $syslog ]] || {
2018-08-07 12:38:13 -07:00
echo Unable to read $syslog
2018-09-08 17:46:43 -07:00
exit 1
}
2018-08-07 12:38:13 -07:00
# Adjust OOM score to reduce the chance that this script will be killed
# during an Out of Memory event since the purpose of this script is to
# report such events
oom_score_adj "self" -500
while read -r victim; do
echo "Out of memory event detected, $victim killed"
2018-09-08 17:46:43 -07:00
./metrics-write-datapoint.sh "oom-killer,victim=$victim,hostname=$HOSTNAME killed=1"
2018-08-07 12:38:13 -07:00
done < <( \
tail --follow=name --retry -n0 $syslog \
| sed --unbuffered -n "s/^.* earlyoom\[[0-9]*\]: Killing process .\(.*\). with signal .*/\1/p" \
2018-08-07 12:38:13 -07:00
)
2018-09-08 17:46:43 -07:00
2018-08-07 12:38:13 -07:00
exit 1