Add simple OOM Killer monitor

This commit is contained in:
Michael Vines 2018-08-07 12:38:13 -07:00
parent 02f9cb415b
commit df808dedd1
2 changed files with 37 additions and 0 deletions

32
multinode-demo/oom_monitor.sh Executable file
View File

@ -0,0 +1,32 @@
#!/bin/bash -e
#
# Reports Linux OOM Killer activity
#
here=$(dirname "$0")
# shellcheck source=multinode-demo/common.sh
source "$here"/common.sh
if [[ $(uname) != Linux ]]; then
exit 0
fi
syslog=/var/log/syslog
if [[ ! -r $syslog ]]; then
echo Unable to read $syslog
exit 0
fi
# Adjust OOM score to reduce the chance that this script will be killed
# during an Out of Memory event since the purpose of this script is to
# report such events
oom_score_adj "self" -500
while read -r victim; do
echo "Out of memory event detected, $victim killed"
"$here"/metrics_write_datapoint.sh "oom-killer,victim=$victim killed=1"
done < <( \
tail --follow=name --retry -n0 $syslog \
| sed --unbuffered -n 's/^.* Out of memory: Kill process [1-9][0-9]* (\([^)]*\)) .*/\1/p' \
)
exit 1

View File

@ -77,6 +77,11 @@ apps:
plugs:
- network
- network-bind
daemon-oom-monitor:
daemon: simple
command: oom_monitor.sh
plugs:
- network
parts:
solana: