Add CPU and RAM usage to Metrics (#6968)

* Add CPU usage to Metrics

* Add RAM usage and rename to system-stats

* Shellcheck

* Remove SC exception

* Address review comments
This commit is contained in:
Sagar Dhawan 2019-11-14 20:36:34 -08:00 committed by GitHub
parent 006c39380a
commit 3ce6248f8c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 190 additions and 0 deletions

View File

@ -9603,6 +9603,170 @@
"align": false,
"alignLevel": null
}
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 109
},
"id": 74,
"panels": [],
"title": "Resources",
"type": "row"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"gridPos": {
"h": 4,
"w": 8,
"x": 0,
"y": 110
},
"id": 70,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "connected",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"groupBy": [
{
"params": [
"$__interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT mean(\"cpu_usage\") as \"cpu_usage\" FROM \"$testnet\".\"autogen\".\"system-stats\" WHERE hostname =~ /$hostid/ AND $timeFilter GROUP BY time(5s) fill(null)\n",
"rawQuery": true,
"refId": "A",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "mean"
}
]
],
"tags": []
},
{
"groupBy": [
{
"params": [
"$__interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT max(\"ram_usage\") as \"ram_usage\" FROM \"$testnet\".\"autogen\".\"system-stats\" WHERE hostname =~ /$hostid/ AND $timeFilter GROUP BY time(1s) fill(null)\n",
"rawQuery": true,
"refId": "B",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "mean"
}
]
],
"tags": []
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Resource Usage",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "percent",
"label": null,
"logBase": 1,
"max": "100",
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
}
],
"refresh": "60s",

View File

@ -139,6 +139,8 @@ cat >> ~/solana/on-reboot <<EOF
echo \$! > net-stats.pid
scripts/iftop.sh > iftop.log 2>&1 &
echo \$! > iftop.pid
scripts/system-stats.sh > system-stats.log 2>&1 &
echo \$! > system-stats.pid
if ${GPU_CUDA_OK} && [[ -e /dev/nvidia0 ]]; then
echo Selecting solana-validator-cuda

24
scripts/system-stats.sh Normal file
View File

@ -0,0 +1,24 @@
#!/usr/bin/env bash
#
# Reports cpu and ram usage statistics
#
set -e
[[ $(uname) == Linux ]] || exit 0
# need to cd like this to avoid #SC1091
cd "$(dirname "$0")/.."
source scripts/configure-metrics.sh
while true; do
# collect the total cpu usage by subtracting idle usage from 100%
cpu_usage=$(top -bn1 | grep '%Cpu(s):' | sed "s/.*, *\([0-9.]*\)%* id.*/\1/" | awk '{print 100 - $1}')
# collect the total ram usage by dividing used memory / total memory
ram_total_and_usage=$(top -bn1 | grep 'MiB Mem'| sed "s/.*: *\([0-9.]*\)%* total.*, *\([0-9.]*\)%* used.*/\1 \2/")
read -r total used <<< "$ram_total_and_usage"
ram_usage=$(awk "BEGIN {print $used / $total * 100}")
report="cpu_usage=$cpu_usage,ram_usage=$ram_usage"
./scripts/metrics-write-datapoint.sh "system-stats,hostname=$HOSTNAME $report"
sleep 1
done