Add open file descriptor monitoring (#5655)
This commit is contained in:
parent
6979a17674
commit
81bb208a62
|
@ -15,8 +15,8 @@
|
|||
"editable": true,
|
||||
"gnetId": null,
|
||||
"graphTooltip": 0,
|
||||
"id": 851,
|
||||
"iteration": 1565991401072,
|
||||
"id": 883,
|
||||
"iteration": 1566852798488,
|
||||
"links": [
|
||||
{
|
||||
"asDropdown": true,
|
||||
|
@ -2516,7 +2516,7 @@
|
|||
"x": 12,
|
||||
"y": 24
|
||||
},
|
||||
"id": 23,
|
||||
"id": 61,
|
||||
"interval": null,
|
||||
"links": [],
|
||||
"mappingType": 1,
|
||||
|
@ -2569,7 +2569,7 @@
|
|||
],
|
||||
"orderByTime": "ASC",
|
||||
"policy": "default",
|
||||
"query": "SELECT sum(\"one\") FROM \"$testnet\".\"autogen\".\"panic\" WHERE $timeFilter",
|
||||
"query": "SELECT SUM(\"points_lost\") FROM \"$testnet\".\"autogen\".\"metrics\" WHERE $timeFilter\n",
|
||||
"rawQuery": true,
|
||||
"refId": "A",
|
||||
"resultFormat": "table",
|
||||
|
@ -2591,7 +2591,7 @@
|
|||
}
|
||||
],
|
||||
"thresholds": "",
|
||||
"title": "Total Panics",
|
||||
"title": "Lost Datapoints",
|
||||
"type": "singlestat",
|
||||
"valueFontSize": "80%",
|
||||
"valueMaps": [
|
||||
|
@ -2840,7 +2840,7 @@
|
|||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"gridPos": {
|
||||
"h": 6,
|
||||
"h": 3,
|
||||
"w": 8,
|
||||
"x": 0,
|
||||
"y": 26
|
||||
|
@ -2852,7 +2852,7 @@
|
|||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
|
@ -2888,7 +2888,7 @@
|
|||
"hide": false,
|
||||
"orderByTime": "ASC",
|
||||
"policy": "default",
|
||||
"query": "SELECT MEAN(\"points_written\") as \"Mean points written\" FROM \"$testnet\".\"autogen\".\"metrics\" WHERE $timeFilter GROUP BY time(5s) fill(null)\n",
|
||||
"query": "SELECT MEAN(\"points_written\") as \"mean\" FROM \"$testnet\".\"autogen\".\"metrics\" WHERE $timeFilter GROUP BY time(5s) fill(null)\n",
|
||||
"rawQuery": true,
|
||||
"refId": "B",
|
||||
"resultFormat": "time_series",
|
||||
|
@ -2925,7 +2925,7 @@
|
|||
],
|
||||
"orderByTime": "ASC",
|
||||
"policy": "default",
|
||||
"query": "SELECT MAX(\"points_written\") as \"Max points written\" FROM \"$testnet\".\"autogen\".\"metrics\" WHERE $timeFilter GROUP BY time(5s) fill(null)\n",
|
||||
"query": "SELECT MAX(\"points_written\") as \"max\" FROM \"$testnet\".\"autogen\".\"metrics\" WHERE $timeFilter GROUP BY time(5s) fill(null)\n",
|
||||
"rawQuery": true,
|
||||
"refId": "A",
|
||||
"resultFormat": "time_series",
|
||||
|
@ -3263,6 +3263,162 @@
|
|||
"alignLevel": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"gridPos": {
|
||||
"h": 3,
|
||||
"w": 8,
|
||||
"x": 0,
|
||||
"y": 29
|
||||
},
|
||||
"id": 62,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [],
|
||||
"nullPointMode": "connected",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"groupBy": [
|
||||
{
|
||||
"params": [
|
||||
"$__interval"
|
||||
],
|
||||
"type": "time"
|
||||
},
|
||||
{
|
||||
"params": [
|
||||
"null"
|
||||
],
|
||||
"type": "fill"
|
||||
}
|
||||
],
|
||||
"hide": false,
|
||||
"orderByTime": "ASC",
|
||||
"policy": "default",
|
||||
"query": "SELECT MEAN(\"count\") as \"mean\" FROM \"$testnet\".\"autogen\".\"open-files\" WHERE $timeFilter GROUP BY time(5s) fill(null)\n",
|
||||
"rawQuery": true,
|
||||
"refId": "B",
|
||||
"resultFormat": "time_series",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"value"
|
||||
],
|
||||
"type": "field"
|
||||
},
|
||||
{
|
||||
"params": [],
|
||||
"type": "mean"
|
||||
}
|
||||
]
|
||||
],
|
||||
"tags": []
|
||||
},
|
||||
{
|
||||
"groupBy": [
|
||||
{
|
||||
"params": [
|
||||
"$__interval"
|
||||
],
|
||||
"type": "time"
|
||||
},
|
||||
{
|
||||
"params": [
|
||||
"null"
|
||||
],
|
||||
"type": "fill"
|
||||
}
|
||||
],
|
||||
"orderByTime": "ASC",
|
||||
"policy": "default",
|
||||
"query": "SELECT MAX(\"count\") as \"max\" FROM \"$testnet\".\"autogen\".\"open-files\" WHERE $timeFilter GROUP BY time(5s) fill(null)\n",
|
||||
"rawQuery": true,
|
||||
"refId": "A",
|
||||
"resultFormat": "time_series",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"value"
|
||||
],
|
||||
"type": "field"
|
||||
},
|
||||
{
|
||||
"params": [],
|
||||
"type": "mean"
|
||||
}
|
||||
]
|
||||
],
|
||||
"tags": []
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Open Files per node",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 1,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": []
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"decimals": 0,
|
||||
"format": "short",
|
||||
"label": "",
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": "0.2",
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"decimals": null,
|
||||
"format": "short",
|
||||
"label": "",
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
],
|
||||
"yaxis": {
|
||||
"align": false,
|
||||
"alignLevel": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"columns": [],
|
||||
"datasource": "$datasource",
|
||||
|
@ -8173,10 +8329,6 @@
|
|||
},
|
||||
{
|
||||
"allValue": ".*",
|
||||
"current": {
|
||||
"text": "All",
|
||||
"value": "$__all"
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
|
@ -8228,5 +8380,5 @@
|
|||
"timezone": "",
|
||||
"title": "Testnet Monitor (edge)",
|
||||
"uid": "testnet-edge",
|
||||
"version": 3
|
||||
"version": 1
|
||||
}
|
|
@ -752,7 +752,7 @@ stopNode() {
|
|||
PS4=\"$PS4\"
|
||||
set -x
|
||||
! tmux list-sessions || tmux kill-session
|
||||
for pid in solana/{net-stats,oom-monitor}.pid; do
|
||||
for pid in solana/{net-stats,fd-monitor,oom-monitor}.pid; do
|
||||
pgid=\$(ps opgid= \$(cat \$pid) | tr -d '[:space:]')
|
||||
if [[ -n \$pgid ]]; then
|
||||
sudo kill -- -\$pgid
|
||||
|
|
|
@ -50,9 +50,13 @@ skip)
|
|||
esac
|
||||
|
||||
(
|
||||
sudo scripts/oom-monitor.sh
|
||||
sudo SOLANA_METRICS_CONFIG="$SOLANA_METRICS_CONFIG" scripts/oom-monitor.sh
|
||||
) > oom-monitor.log 2>&1 &
|
||||
echo $! > oom-monitor.pid
|
||||
scripts/fd-monitor.sh > fd-monitor.log 2>&1 &
|
||||
echo $! > fd-monitor.pid
|
||||
scripts/net-stats.sh > net-stats.log 2>&1 &
|
||||
echo $! > net-stats.pid
|
||||
|
||||
! tmux list-sessions || tmux kill-session
|
||||
|
||||
|
|
|
@ -93,6 +93,8 @@ local|tar|skip)
|
|||
sudo SOLANA_METRICS_CONFIG="$SOLANA_METRICS_CONFIG" scripts/oom-monitor.sh
|
||||
) > oom-monitor.log 2>&1 &
|
||||
echo $! > oom-monitor.pid
|
||||
scripts/fd-monitor.sh > fd-monitor.log 2>&1 &
|
||||
echo $! > fd-monitor.pid
|
||||
scripts/net-stats.sh > net-stats.log 2>&1 &
|
||||
echo $! > net-stats.pid
|
||||
|
||||
|
|
|
@ -0,0 +1,20 @@
|
|||
#!/usr/bin/env bash
|
||||
#
|
||||
# Reports open file descriptors for the current user
|
||||
#
|
||||
set -e
|
||||
|
||||
[[ $(uname) == Linux ]] || exit 0
|
||||
|
||||
cd "$(dirname "$0")"
|
||||
|
||||
# shellcheck source=scripts/configure-metrics.sh
|
||||
source configure-metrics.sh
|
||||
|
||||
while true; do
|
||||
count=$(lsof -u $UID | wc -l)
|
||||
./metrics-write-datapoint.sh "open-files,hostname=$HOSTNAME count=$count"
|
||||
sleep 10
|
||||
done
|
||||
|
||||
exit 1
|
|
@ -22,5 +22,5 @@ if [[ -n $INFLUX_HOST ]]; then
|
|||
fi
|
||||
|
||||
echo "${host}/write?db=${INFLUX_DATABASE}&u=${INFLUX_USERNAME}&p=${INFLUX_PASSWORD}" \
|
||||
| xargs curl --max-time 5 -XPOST --data-binary "$point"
|
||||
| xargs curl --max-time 5 --silent --show-error -XPOST --data-binary "$point"
|
||||
exit 0
|
||||
|
|
Loading…
Reference in New Issue