Fix OOM reporting
This commit is contained in:
parent
5afcdcbbe6
commit
7029e4395c
|
@ -45,12 +45,6 @@ if [[ -d $SNAP ]]; then # Running inside a Linux Snap?
|
||||||
# 0700
|
# 0700
|
||||||
mkdir -p "$SNAP_DATA"/{drone,leader,validator}
|
mkdir -p "$SNAP_DATA"/{drone,leader,validator}
|
||||||
|
|
||||||
SOLANA_METRICS_CONFIG="$(snapctl get metrics-config)"
|
|
||||||
SOLANA_DEFAULT_METRICS_RATE="$(snapctl get default-metrics-rate)"
|
|
||||||
export SOLANA_DEFAULT_METRICS_RATE
|
|
||||||
SOLANA_CUDA="$(snapctl get enable-cuda)"
|
|
||||||
RUST_LOG="$(snapctl get rust-log)"
|
|
||||||
|
|
||||||
elif [[ -n $USE_SNAP ]]; then # Use the Linux Snap binaries
|
elif [[ -n $USE_SNAP ]]; then # Use the Linux Snap binaries
|
||||||
solana_program() {
|
solana_program() {
|
||||||
declare program="$1"
|
declare program="$1"
|
||||||
|
|
|
@ -3,19 +3,21 @@
|
||||||
# Reports Linux OOM Killer activity
|
# Reports Linux OOM Killer activity
|
||||||
#
|
#
|
||||||
|
|
||||||
here=$(dirname "$0")
|
cd "$(dirname "$0")"
|
||||||
# shellcheck source=scripts/oom-score-adj.sh
|
|
||||||
source "$here"/oom-score-adj.sh
|
|
||||||
|
|
||||||
if [[ $(uname) != Linux ]]; then
|
# shellcheck source=scripts/oom-score-adj.sh
|
||||||
exit 0
|
source oom-score-adj.sh
|
||||||
fi
|
|
||||||
|
# shellcheck source=scripts/configure-metrics.sh
|
||||||
|
source configure-metrics.sh
|
||||||
|
|
||||||
|
[[ $(uname) = Linux ]] || exit 0
|
||||||
|
|
||||||
syslog=/var/log/syslog
|
syslog=/var/log/syslog
|
||||||
if [[ ! -r $syslog ]]; then
|
[[ -r $syslog ]] || {
|
||||||
echo Unable to read $syslog
|
echo Unable to read $syslog
|
||||||
exit 0
|
exit 1
|
||||||
fi
|
}
|
||||||
|
|
||||||
# Adjust OOM score to reduce the chance that this script will be killed
|
# Adjust OOM score to reduce the chance that this script will be killed
|
||||||
# during an Out of Memory event since the purpose of this script is to
|
# during an Out of Memory event since the purpose of this script is to
|
||||||
|
@ -24,9 +26,10 @@ oom_score_adj "self" -500
|
||||||
|
|
||||||
while read -r victim; do
|
while read -r victim; do
|
||||||
echo "Out of memory event detected, $victim killed"
|
echo "Out of memory event detected, $victim killed"
|
||||||
"$here"/metrics-write-datapoint.sh "oom-killer,victim=$victim killed=1"
|
./metrics-write-datapoint.sh "oom-killer,victim=$victim,hostname=$HOSTNAME killed=1"
|
||||||
done < <( \
|
done < <( \
|
||||||
tail --follow=name --retry -n0 $syslog \
|
tail --follow=name --retry -n0 $syslog \
|
||||||
| sed --unbuffered -n 's/^.* Out of memory: Kill process [1-9][0-9]* (\([^)]*\)) .*/\1/p' \
|
| sed --unbuffered -n 's/^.* Out of memory: Kill process [1-9][0-9]* (\([^)]*\)) .*/\1/p' \
|
||||||
)
|
)
|
||||||
|
|
||||||
exit 1
|
exit 1
|
||||||
|
|
|
@ -0,0 +1,22 @@
|
||||||
|
#!/bin/bash
|
||||||
|
#
|
||||||
|
# Snap daemons have no access to the environment so |snap set solana ...| is
|
||||||
|
# used to set runtime configuration.
|
||||||
|
#
|
||||||
|
# This script exports the snap runtime configuration options back as
|
||||||
|
# environment variables before invoking the specified program
|
||||||
|
#
|
||||||
|
|
||||||
|
if [[ -d $SNAP ]]; then # Running inside a Linux Snap?
|
||||||
|
RUST_LOG="$(snapctl get rust-log)"
|
||||||
|
SOLANA_CUDA="$(snapctl get enable-cuda)"
|
||||||
|
SOLANA_DEFAULT_METRICS_RATE="$(snapctl get default-metrics-rate)"
|
||||||
|
SOLANA_METRICS_CONFIG="$(snapctl get metrics-config)"
|
||||||
|
|
||||||
|
export RUST_LOG
|
||||||
|
export SOLANA_CUDA
|
||||||
|
export SOLANA_DEFAULT_METRICS_RATE
|
||||||
|
export SOLANA_METRICS_CONFIG
|
||||||
|
fi
|
||||||
|
|
||||||
|
exec "$@"
|
|
@ -63,25 +63,25 @@ apps:
|
||||||
- home
|
- home
|
||||||
daemon-validator:
|
daemon-validator:
|
||||||
daemon: simple
|
daemon: simple
|
||||||
command: multinode-demo/validator.sh
|
command: scripts/snap-config-to-env.sh $SNAP/multinode-demo/validator.sh
|
||||||
plugs:
|
plugs:
|
||||||
- network
|
- network
|
||||||
- network-bind
|
- network-bind
|
||||||
daemon-leader:
|
daemon-leader:
|
||||||
daemon: simple
|
daemon: simple
|
||||||
command: multinode-demo/leader.sh
|
command: scripts/snap-config-to-env.sh $SNAP/multinode-demo/leader.sh
|
||||||
plugs:
|
plugs:
|
||||||
- network
|
- network
|
||||||
- network-bind
|
- network-bind
|
||||||
daemon-drone:
|
daemon-drone:
|
||||||
daemon: simple
|
daemon: simple
|
||||||
command: multinode-demo/drone.sh
|
command: scripts/snap-config-to-env.sh $SNAP/multinode-demo/drone.sh
|
||||||
plugs:
|
plugs:
|
||||||
- network
|
- network
|
||||||
- network-bind
|
- network-bind
|
||||||
daemon-oom-monitor:
|
daemon-oom-monitor:
|
||||||
daemon: simple
|
daemon: simple
|
||||||
command: scripts/oom-monitor.sh
|
command: scripts/snap-config-to-env.sh $SNAP/scripts/oom-monitor.sh
|
||||||
plugs:
|
plugs:
|
||||||
- network
|
- network
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue