fix(ci): Delete Google Cloud test instances after 3 days (#5192)

* Delete test instances after 3 days

* Use correct delete command, improve shell quoting

* Use sed to provide the correct zone or region

* Fix quoting

* Fix IFS

* Fix IFS for multiple disks

* Document why we can't quote some shell variables

* Document that instances can get deleted

* Fix exact names in deletion docs
This commit is contained in:
teor 2022-09-22 09:16:38 +10:00 committed by GitHub
parent 59835947a5
commit 12d084f6cc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 56 additions and 21 deletions

View File

@ -8,7 +8,10 @@ on:
workflow_dispatch:
env:
# Delete all resources created before $DELETE_AGE_DAYS days ago.
# Delete all resources created before $DELETE_INSTANCE_DAYS days ago.
# We keep this short to reduce CPU, RAM, and storage costs.
DELETE_INSTANCE_DAYS: 3
# Delete all other resources created before $DELETE_AGE_DAYS days ago.
# We keep this short to reduce storage costs.
DELETE_AGE_DAYS: 2
# But keep the latest $KEEP_LATEST_IMAGE_COUNT images of each type.
@ -37,38 +40,73 @@ jobs:
service_account: 'github-service-account@zealous-zebra.iam.gserviceaccount.com'
token_format: 'access_token'
# Deletes all instances older than $DELETE_INSTANCE_DAYS days.
#
# We only delete instances that end in 7 or more hex characters,
# to avoid deleting managed instance groups and manually created instances.
#
# ${INSTANCE_AND_ZONE} expands to:
# <instance-name> --zone=<zone-name>
# so it can't be shell-quoted.
- name: Delete old instances
run: |
DELETE_BEFORE_DATE=$(date --date="$DELETE_INSTANCE_DAYS days ago" '+%Y%m%d')
IFS=$'\n'
INSTANCES=$(gcloud compute instances list --sort-by=creationTimestamp --filter="name~-[0-9a-f]{7,}$ AND creationTimestamp < $DELETE_BEFORE_DATE" --format='value(NAME,ZONE)' | \
sed 's/\(.*\)\t\(.*\)/\1 --zone=\2/')
for INSTANCE_AND_ZONE in $INSTANCES
do
IFS=$' '
gcloud compute instances delete --verbosity=info ${INSTANCE_AND_ZONE} --delete-disks=all || continue
IFS=$'\n'
done
# Deletes all the instance templates older than $DELETE_AGE_DAYS days.
- name: Delete old instance templates
run: |
DELETE_BEFORE_DATE=$(date --date="$DELETE_AGE_DAYS days ago" '+%Y%m%d')
TEMPLATES=$(gcloud compute instance-templates list --sort-by=creationTimestamp --filter="name~-[0-9a-f]+$ AND creationTimestamp < $DELETE_BEFORE_DATE" --format='value(NAME)')
TEMPLATES=$(gcloud compute instance-templates list --sort-by=creationTimestamp --filter="name~-[0-9a-f]{7,}$ AND creationTimestamp < $DELETE_BEFORE_DATE" --format='value(NAME)')
for TEMPLATE in $TEMPLATES
do
gcloud compute instance-templates delete ${TEMPLATE} || continue
gcloud compute instance-templates delete "${TEMPLATE}" || continue
done
# Deletes all the disks older than $DELETE_AGE_DAYS days.
#
# Disks that are attached to an instance template can't be deleted, so it is safe to delete all disks here.
# Disks that are attached to an instance template can't be deleted, so it is safe to try to delete all disks here.
#
# ${DISK_AND_LOCATION} expands to:
# <disk-name> --[zone|region]=<location-name>
# so it can't be shell-quoted.
- name: Delete old disks
run: |
DELETE_BEFORE_DATE=$(date --date="$DELETE_AGE_DAYS days ago" '+%Y%m%d')
IFS=$'\n'
# Disks created by PR jobs, and other jobs that use a commit hash
COMMIT_DISKS=$(gcloud compute disks list --sort-by=creationTimestamp --filter="name~-[0-9a-f]+$ AND creationTimestamp < $DELETE_BEFORE_DATE" --format='value(NAME)')
COMMIT_DISKS=$(gcloud compute disks list --sort-by=creationTimestamp --filter="name~-[0-9a-f]{7,}$ AND creationTimestamp < $DELETE_BEFORE_DATE" --format='value(NAME,LOCATION,LOCATION_SCOPE)' | \
sed 's/\(.*\)\t\(.*\)\t\(.*\)/\1 --\3=\2/')
for DISK in $COMMIT_DISKS
for DISK_AND_LOCATION in $COMMIT_DISKS
do
gcloud compute disks delete --verbosity=info ${DISK} || continue
IFS=$' '
gcloud compute disks delete --verbosity=info ${DISK_AND_LOCATION} || continue
IFS=$'\n'
done
IFS=$'\n'
# Disks created by managed instance groups, and other jobs that start with "zebrad-"
ZEBRAD_DISKS=$(gcloud compute disks list --sort-by=creationTimestamp --filter="name~^zebrad- AND creationTimestamp < $DELETE_BEFORE_DATE" --format='value(NAME)')
ZEBRAD_DISKS=$(gcloud compute disks list --sort-by=creationTimestamp --filter="name~^zebrad- AND creationTimestamp < $DELETE_BEFORE_DATE" --format='value(NAME,LOCATION,LOCATION_SCOPE)' | \
sed 's/\(.*\)\t\(.*\)\t\(.*\)/\1 --\3=\2/')
for DISK in $ZEBRAD_DISKS
for DISK_AND_LOCATION in $ZEBRAD_DISKS
do
gcloud compute disks delete --verbosity=info ${DISK} || continue
IFS=$' '
gcloud compute disks delete --verbosity=info ${DISK_AND_LOCATION} || continue
IFS=$'\n'
done
# Deletes cache images older than $DELETE_AGE_DAYS days.
@ -98,7 +136,7 @@ jobs:
continue
fi
gcloud compute images delete ${IMAGE} || continue
gcloud compute images delete "${IMAGE}" || continue
done
ZEBRAD_TIP_IMAGES=$(gcloud compute images list --sort-by=~creationTimestamp --filter="name~^zebrad-cache-.*net-tip AND creationTimestamp < $DELETE_BEFORE_DATE" --format='value(NAME)')
@ -112,7 +150,7 @@ jobs:
continue
fi
gcloud compute images delete ${IMAGE} || continue
gcloud compute images delete "${IMAGE}" || continue
done
LWD_TIP_IMAGES=$(gcloud compute images list --sort-by=~creationTimestamp --filter="name~^lwd-cache-.*net-tip AND creationTimestamp < $DELETE_BEFORE_DATE" --format='value(NAME)')
@ -126,5 +164,5 @@ jobs:
continue
fi
gcloud compute images delete ${IMAGE} || continue
gcloud compute images delete "${IMAGE}" || continue
done

View File

@ -47,16 +47,13 @@ Please shut down large instances when they are not being used.
### Automated Deletion
The [Delete GCP Resources](https://github.com/ZcashFoundation/zebra/blob/main/.github/workflows/delete-gcp-resources.yml)
workflow automatically deletes instance templates, disks, and images older than a few days.
workflow automatically deletes test instances, instance templates, disks, and images older than a few days.
Running instances and their disks are protected from deletion.
If you want to keep instance templates, disks, or images in Google Cloud, name them so they don't match the automated names:
- deleted instance templates and disks end in a commit hash, so use a name ending in `-` or `-[^0-9a-f]+`
- deleted images start with `zebrad-cache` or `lwd-cache`, so use a name starting with anything else
Our other Google Cloud projects don't have automated deletion, so you can also use them for experiments or production deployments.
If you want to keep instances, instance templates, disks, or images in Google Cloud, name them so they don't match the automated names:
- deleted instances, instance templates and disks end in a commit hash, so use a name that doesn't end in `-[0-9a-f]{7,}`
- deleted disks and images start with `zebrad-` or `lwd-`, so use a name starting with anything else
Our production Google Cloud project doesn't have automated deletion.
## Troubleshooting