remove inactive hosts (#31521)
This commit is contained in:
parent
77a56b02ea
commit
f42955c93e
|
@ -22,32 +22,24 @@ check_service() {
|
|||
|
||||
# Loop through the servers
|
||||
for server in "${servers[@]}"; do
|
||||
ssh_success=false
|
||||
ssh_attempts=0
|
||||
while ! $ssh_success && [ $ssh_attempts -lt 3 ]; do
|
||||
local service_not_running=true
|
||||
local retries=3
|
||||
for _ in $(seq 1 $retries); do
|
||||
# Check if the service is running
|
||||
if ssh -o StrictHostKeyChecking=no sol@"$server" sudo systemctl is-active "$service" >/dev/null 2>&1; then
|
||||
ssh_success=true
|
||||
if ssh -o StrictHostKeyChecking=no sol@"$server" sudo systemctl is-active "$service" >/dev/null; then
|
||||
# Service is running
|
||||
message="The $service service is running on $server."
|
||||
echo "$message"
|
||||
service_not_running=false
|
||||
break
|
||||
else
|
||||
ssh_attempts=$((ssh_attempts + 1))
|
||||
sleep 5
|
||||
# Service is not running, wait for 10 seconds and check again
|
||||
sleep 10
|
||||
fi
|
||||
done
|
||||
|
||||
if $ssh_success; then
|
||||
# Service is running
|
||||
message="The $service service is running on $server."
|
||||
echo "$message"
|
||||
else
|
||||
# SSH connection failed after retries
|
||||
message="ERROR: Unable to establish SSH connection to $server after 3 retries."
|
||||
echo "$message"
|
||||
curl -H "Content-Type: application/json" -d '{"content":"'"$message"', manual intervention is required."}' "$DISCORD_WEBHOOK"
|
||||
continue
|
||||
fi
|
||||
|
||||
# Service is not running, try to restart it
|
||||
if ! $ssh_success; then
|
||||
if $service_not_running; then
|
||||
# Service is not running, send alert and try to restart it
|
||||
message="The $service service is not running on $server. Restarting..."
|
||||
echo "$message"
|
||||
curl -H "Content-Type: application/json" -d '{"content":"'"$message"'"}' "$DISCORD_WEBHOOK"
|
||||
|
|
|
@ -52,7 +52,7 @@ scrape_configs:
|
|||
scrape_interval: 15s
|
||||
scrape_timeout: 14s
|
||||
static_configs:
|
||||
- targets: ['dev-server-us-da11:9100','dev-server-us-ny5:9100','dev-server-us-sv15:9100','dev-server-eu-am6:9100','dev-server-asia-sg1:9100','dev-server-us-da11-2:9100','dev-server-us-da11-3:9100','dev-server-us-da11-4:9100','dev-server-us-da11-5:9100','dev-server-asia-hk2:9100','dev-server-asia-sg1-2:9100','dev-server-asia-ty11:9100','dev-server-eu-am6-2:9100','dev-server-asia-sg1-2:9100','dev-server-us-sv15-2:9100','dev-server-us-da11-6:9100','dev-server-us-da11-7:9100','dev-server-eu-ld7-1:9100','dev-server-us-da11-8:9100','dev-server-eu-ld7-2:9100']
|
||||
- targets: ['dev-server-us-da11:9100','dev-server-us-ny5:9100','dev-server-us-sv15:9100','dev-server-eu-am6:9100','dev-server-asia-sg1:9100','dev-server-us-da11-2:9100','dev-server-us-da11-3:9100','dev-server-us-da11-4:9100','dev-server-us-da11-5:9100','dev-server-asia-hk2:9100','dev-server-asia-sg1-2:9100','dev-server-eu-am6-2:9100','dev-server-asia-sg1-2:9100','dev-server-us-sv15-2:9100','dev-server-us-da11-6:9100','dev-server-us-da11-7:9100','dev-server-eu-ld7-1:9100','dev-server-us-da11-8:9100','dev-server-eu-ld7-2:9100']
|
||||
|
||||
- job_name: 'Development'
|
||||
scrape_interval: 15s
|
||||
|
|
Loading…
Reference in New Issue