fix(ci): garbage collect instances no matter previous steps status (#4255)

* fix(ci): garbage collect instances no matter the status

As we're not going to reuse test instances, the safest method to apply is to always delete this instances if they fail, get skipped or succeed running a workflow

* Apply suggestions from code review

Co-authored-by: Deirdre Connolly <durumcrustulum@gmail.com>

* docs(ci): imrpove comment

Co-authored-by: Deirdre Connolly <durumcrustulum@gmail.com>
This commit is contained in:
Gustavo Valverde 2022-05-01 12:52:57 -04:00 committed by GitHub
parent 2d783f3138
commit b3bccd6655
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 27 additions and 42 deletions

View File

@ -105,16 +105,6 @@ jobs:
service_account: 'github-service-account@zealous-zebra.iam.gserviceaccount.com' service_account: 'github-service-account@zealous-zebra.iam.gserviceaccount.com'
token_format: 'access_token' token_format: 'access_token'
# Check if our destination compute instance exists and delete it
- name: Delete existing instance with same SHA
run: |
INSTANCE=$(gcloud compute instances list --filter=full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} --format='value(NAME)')
if [ -z "${INSTANCE}" ]; then
echo "No instance to delete"
else
gcloud compute instances delete "${INSTANCE}" --zone "${{ env.ZONE }}" --delete-disks all --quiet
fi
# Creates Compute Engine virtual machine instance w/ disks # Creates Compute Engine virtual machine instance w/ disks
- name: Create GCP compute instance - name: Create GCP compute instance
run: | run: |
@ -211,8 +201,14 @@ jobs:
--description="Created from commit ${{ env.GITHUB_SHA_SHORT }} with height ${{ env.SYNC_HEIGHT }}" --description="Created from commit ${{ env.GITHUB_SHA_SHORT }} with height ${{ env.SYNC_HEIGHT }}"
- name: Delete test instance - name: Delete test instance
# Do not delete the instance if the sync timeouts in GitHub # If the `full-sync` step timeouts (+6 hours) the previous step (creating the image) willl be skipped.
if: ${{ steps.full-sync.outcome == 'success' || steps.full-sync.outcome == 'failure' }} # Even if the instance continues running, no image will be created, so it's better to delete it.
if: always()
continue-on-error: true continue-on-error: true
run: | run: |
gcloud compute instances delete "full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" --zone "${{ env.ZONE }}" --delete-disks all --quiet INSTANCE=$(gcloud compute instances list --filter=full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} --format='value(NAME)')
if [ -z "${INSTANCE}" ]; then
echo "No instance to delete"
else
gcloud compute instances delete "${INSTANCE}" --zone "${{ env.ZONE }}" --delete-disks all --quiet
fi

View File

@ -199,18 +199,6 @@ jobs:
service_account: 'github-service-account@zealous-zebra.iam.gserviceaccount.com' service_account: 'github-service-account@zealous-zebra.iam.gserviceaccount.com'
token_format: 'access_token' token_format: 'access_token'
# Check if our destination compute instance exists and delete it
- name: Delete existing instance with same SHA
id: delete-old-instance
if: ${{ steps.changed-files-specific.outputs.any_changed == 'true' || github.event.inputs.regenerate-disks == 'true' || github.event_name == 'push'}}
run: |
INSTANCE=$(gcloud compute instances list --filter=regenerate-disk-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} --format='value(NAME)')
if [ -z "${INSTANCE}" ]; then
echo "No instance to delete"
else
gcloud compute instances delete "${INSTANCE}" --zone "${{ env.ZONE }}" --delete-disks all --quiet
fi
- name: Create GCP compute instance - name: Create GCP compute instance
id: create-instance id: create-instance
if: ${{ steps.changed-files-specific.outputs.any_changed == 'true' || github.event.inputs.regenerate-disks == 'true' || github.event_name == 'push'}} if: ${{ steps.changed-files-specific.outputs.any_changed == 'true' || github.event.inputs.regenerate-disks == 'true' || github.event_name == 'push'}}
@ -325,11 +313,18 @@ jobs:
--description="Created from commit ${{ env.GITHUB_SHA_SHORT }} with height ${{ env.SYNC_HEIGHT }}" --description="Created from commit ${{ env.GITHUB_SHA_SHORT }} with height ${{ env.SYNC_HEIGHT }}"
- name: Delete test instance - name: Delete test instance
# Do not delete the instance if the sync timeouts in GitHub # If the `sync-to-checkpoint` step timeouts (+6 hours) the previous step (creating the image) willl be skipped.
if: ${{ steps.sync-to-checkpoint.outcome == 'success' || steps.sync-to-checkpoint.outcome == 'failure' }} # Even if the instance continues running, no image will be created, so it's better to delete it.
if: always()
continue-on-error: true continue-on-error: true
run: | run: |
gcloud compute instances delete "regenerate-disk-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" --zone "${{ env.ZONE }}" --delete-disks all --quiet INSTANCE=$(gcloud compute instances list --filter=regenerate-disk-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} --format='value(NAME)')
if [ -z "${INSTANCE}" ]; then
echo "No instance to delete"
else
gcloud compute instances delete "${INSTANCE}" --zone "${{ env.ZONE }}" --delete-disks all --quiet
fi
# Test that Zebra syncs and fully validates a few thousand blocks from a cached post-checkpoint state # Test that Zebra syncs and fully validates a few thousand blocks from a cached post-checkpoint state
test-stateful-sync: test-stateful-sync:
@ -359,17 +354,6 @@ jobs:
service_account: 'github-service-account@zealous-zebra.iam.gserviceaccount.com' service_account: 'github-service-account@zealous-zebra.iam.gserviceaccount.com'
token_format: 'access_token' token_format: 'access_token'
# Check if our destination compute instance exists and delete it
- name: Delete existing instance with same SHA
id: delete-old-instance
run: |
INSTANCE=$(gcloud compute instances list --filter=sync-checkpoint-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} --format='value(NAME)')
if [ -z "${INSTANCE}" ]; then
echo "No instance to delete"
else
gcloud compute instances delete "${INSTANCE}" --zone "${{ env.ZONE }}" --delete-disks all --quiet
fi
- name: Get disk state name from gcloud - name: Get disk state name from gcloud
id: get-disk-name id: get-disk-name
run: | run: |
@ -456,8 +440,13 @@ jobs:
exit ${EXIT_CODE} exit ${EXIT_CODE}
- name: Delete test instance - name: Delete test instance
# Do not delete the instance if the sync timeouts in GitHub # We don't want to leave a failed instance in GCP using resources
if: ${{ steps.sync-past-checkpoint.outcome == 'success' || steps.sync-past-checkpoint.outcome == 'failure' }} if: always()
continue-on-error: true continue-on-error: true
run: | run: |
gcloud compute instances delete "sync-checkpoint-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" --zone "${{ env.ZONE }}" --delete-disks all --quiet INSTANCE=$(gcloud compute instances list --filter=sync-checkpoint-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} --format='value(NAME)')
if [ -z "${INSTANCE}" ]; then
echo "No instance to delete"
else
gcloud compute instances delete "${INSTANCE}" --zone "${{ env.ZONE }}" --delete-disks all --quiet
fi