refactor(test): cleanup GCP instances on a single PR (#3766)

* refactor(test): reuse same GCP instance on a single PR

This also ensures the deployments are faster, and we only delete the instance when merging or closing the PR, instead of doing it on each push to the PR

* fix(deploy): add zone to updates

* fix: typo

* fix(ci): improve conditions for updates

* fix(deploy): delete old deployments instead of reusing it

* fix(deploy): keep delete command after run

* fix(deploy): always create an instance

* fix(deploy): delete disks on every delete command.

* imp(ci): use better id name

* Update .github/workflows/test.yml

Co-authored-by: Deirdre Connolly <durumcrustulum@gmail.com>

* imp: handle errors correctly on deletion

* fix: do not hide valid errors

* fix: edge case where the container is not ready yet

Co-authored-by: Deirdre Connolly <durumcrustulum@gmail.com>
This commit is contained in:
Gustavo Valverde 2022-03-09 17:10:05 -04:00 committed by GitHub
parent e4e6275983
commit 03d123b428
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 92 additions and 8 deletions

40
.github/workflows/clean.yml vendored Normal file
View File

@ -0,0 +1,40 @@
name: Clean
on:
workflow_dispatch:
pull_request:
branches:
- main
types: [ closed ]
env:
NETWORK: Mainnet
PROJECT_ID: ${{ secrets.GCP_PROJECT_ID }}
REGION: us-central1
ZONE: us-central1-a
jobs:
delete:
name: Delete test deployments
runs-on: ubuntu-latest
needs: [ build ]
steps:
- uses: actions/checkout@v2.4.0
with:
persist-credentials: false
- name: Inject slug/short variables
uses: rlespinasse/github-slug-action@v4
# Setup gcloud CLI
- name: Authenticate to Google Cloud
id: auth
uses: google-github-actions/auth@v0.5.0
with:
credentials_json: ${{ secrets.GOOGLE_CREDENTIALS }}
- name: Delete test instance
continue-on-error: true
run: |
TEST_INSTANCES=$(gcloud compute instances list --filter="${{ env.GITHUB_REF_SLUG_URL }}" --format='value(NAME)')
for instance in ${TEST_INSTANCES}; do gcloud compute instances delete $instance --zone "${{ env.ZONE }}" --delete-disks all --quiet; done

View File

@ -135,6 +135,17 @@ jobs:
with:
credentials_json: ${{ secrets.GOOGLE_CREDENTIALS }}
# Check if our destination compute instance exists and delete it
- name: Delete existing instance with same SHA
id: delete-old-instance
run: |
INSTANCE=$(gcloud compute instances list --filter=full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} --format='value(NAME)')
if [ -z "${INSTANCE}" ]; then
echo "No instance to delete"
else
gcloud compute instances delete "${INSTANCE}" --zone "${{ env.ZONE }}" --delete-disks all --quiet
fi
# Creates Compute Engine virtual machine instance w/ disks
- name: Create GCP compute instance
id: create-instance
@ -159,7 +170,7 @@ jobs:
# https://www.googlecloudcommunity.com/gc/Infrastructure-Compute-Storage/SSH-into-Compute-Container-not-easily-possible/td-p/170915
- name: Get container name from logs
id: get-container-name
if: steps.create-instance.outcome == 'success'
if: ${{ steps.create-instance.outcome == 'success' }}
run: |
INSTANCE_ID=$(gcloud compute instances describe full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} --zone ${{ env.ZONE }} --format='value(id)')
echo "Using instance: $INSTANCE_ID"
@ -170,9 +181,11 @@ jobs:
done
CONTAINER_NAME=$(gcloud logging read 'log_name=projects/${{ env.PROJECT_ID }}/logs/cos_system AND jsonPayload.MESSAGE:full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}' --format='value(jsonPayload.MESSAGE)' --limit=1 | grep -o '...-full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-....' | tr -d "'.")
echo "::set-output name=zebra_container::$CONTAINER_NAME"
sleep 60
- name: Full sync mainnet
id: full-sync-mainnet
if: ${{ steps.create-instance.outcome == 'success' }}
run: |
gcloud compute ssh \
full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
@ -188,4 +201,4 @@ jobs:
if: ${{ steps.full-sync-mainnet.outcome == 'success' || steps.full-sync-mainnet.outcome == 'failure' }}
continue-on-error: true
run: |
gcloud compute instances delete "full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" --delete-disks all --zone "${{ env.ZONE }}"
gcloud compute instances delete "full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" --zone "${{ env.ZONE }}" --delete-disks all --quiet

View File

@ -28,7 +28,7 @@ on:
env:
CARGO_INCREMENTAL: '1'
ZEBRA_SKIP_IPV6_TESTS: "1"
ZEBRA_SKIP_IPV6_TESTS: '1'
RUST_BACKTRACE: full
RUST_LIB_BACKTRACE: full
COLORBT_SHOW_HIDDEN: '1'
@ -135,6 +135,11 @@ jobs:
docker pull ${{ env.GAR_BASE }}/${{ env.IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }}
docker run -e ZEBRA_SKIP_IPV6_TESTS --name zebrad-tests -t ${{ env.GAR_BASE }}/${{ env.IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} cargo test --locked --release --features enable-sentry --workspace -- --include-ignored
# This test changes zebra-chain's activation heights,
# which can recompile all the Zebra crates,
# so we want its build products to be cached separately.
#
# Also, we don't want to accidentally use the fake heights in other tests.
test-fake-activation-heights:
name: Test with fake activation heights
runs-on: ubuntu-latest
@ -241,6 +246,18 @@ jobs:
with:
credentials_json: ${{ secrets.GOOGLE_CREDENTIALS }}
# Check if our destination compute instance exists and delete it
- name: Delete existing instance with same SHA
id: delete-old-instance
if: ${{ steps.changed-files-specific.outputs.any_changed == 'true' || github.event.inputs.regenerate-disks == 'true' }}
run: |
INSTANCE=$(gcloud compute instances list --filter=regenerate-disk-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} --format='value(NAME)')
if [ -z "${INSTANCE}" ]; then
echo "No instance to delete"
else
gcloud compute instances delete "${INSTANCE}" --zone "${{ env.ZONE }}" --delete-disks all --quiet
fi
- name: Create GCP compute instance
id: create-instance
if: ${{ steps.changed-files-specific.outputs.any_changed == 'true' || github.event.inputs.regenerate-disks == 'true' }}
@ -276,7 +293,7 @@ jobs:
# https://www.googlecloudcommunity.com/gc/Infrastructure-Compute-Storage/SSH-into-Compute-Container-not-easily-possible/td-p/170915
- name: Get container name from logs
id: get-container-name
if: steps.create-instance.outcome == 'success'
if: ${{ steps.create-instance.outcome == 'success' }}
run: |
INSTANCE_ID=$(gcloud compute instances describe regenerate-disk-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} --zone ${{ env.ZONE }} --format='value(id)')
echo "Using instance: $INSTANCE_ID"
@ -287,10 +304,11 @@ jobs:
done
CONTAINER_NAME=$(gcloud logging read 'log_name=projects/${{ env.PROJECT_ID }}/logs/cos_system AND jsonPayload.MESSAGE:regenerate-disk-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}' --format='value(jsonPayload.MESSAGE)' --limit=1 | grep -o '...-regenerate-disk-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-....' | tr -d "'.")
echo "::set-output name=zebra_container::$CONTAINER_NAME"
sleep 60
- name: Regenerate stateful disks logs
id: sync-to-checkpoint
if: steps.create-instance.outcome == 'success'
if: ${{ steps.create-instance.outcome == 'success' }}
run: |
gcloud compute ssh \
regenerate-disk-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
@ -319,7 +337,7 @@ jobs:
if: ${{ steps.sync-to-checkpoint.outcome == 'success' || steps.sync-to-checkpoint.outcome == 'failure' }}
continue-on-error: true
run: |
gcloud compute instances delete "regenerate-disk-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" --delete-disks all --zone "${{ env.ZONE }}"
gcloud compute instances delete "regenerate-disk-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" --zone "${{ env.ZONE }}" --delete-disks all --quiet
# Test that Zebra syncs and fully validates a few thousand blocks from a cached post-checkpoint state
test-stateful-sync:
@ -345,6 +363,18 @@ jobs:
with:
credentials_json: ${{ secrets.GOOGLE_CREDENTIALS }}
# Check if our destination compute instance exists and delete it
- name: Delete existing instance with same SHA
id: delete-old-instance
if: ${{ needs.regenerate-stateful-disks.outputs.any_changed != 'true' || github.event.inputs.regenerate-disks != 'true'}}
run: |
INSTANCE=$(gcloud compute instances list --filter=sync-checkpoint-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} --format='value(NAME)')
if [ -z "${INSTANCE}" ]; then
echo "No instance to delete"
else
gcloud compute instances delete "${INSTANCE}" --zone "${{ env.ZONE }}" --delete-disks all --quiet
fi
- name: Get disk state name from gcloud
id: get-disk-name
if: ${{ needs.regenerate-stateful-disks.outputs.any_changed != 'true' || github.event.inputs.regenerate-disks != 'true'}}
@ -391,7 +421,7 @@ jobs:
# https://www.googlecloudcommunity.com/gc/Infrastructure-Compute-Storage/SSH-into-Compute-Container-not-easily-possible/td-p/170915
- name: Get container name from logs
id: get-container-name
if: steps.create-instance.outcome == 'success'
if: ${{ steps.create-instance.outcome == 'success' }}
run: |
INSTANCE_ID=$(gcloud compute instances describe sync-checkpoint-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} --zone ${{ env.ZONE }} --format='value(id)')
echo "Using instance: $INSTANCE_ID"
@ -402,6 +432,7 @@ jobs:
done
CONTAINER_NAME=$(gcloud logging read 'log_name=projects/${{ env.PROJECT_ID }}/logs/cos_system AND jsonPayload.MESSAGE:sync-checkpoint-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}' --format='value(jsonPayload.MESSAGE)' --limit=1 | grep -o '...-sync-checkpoint-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-....' | tr -d "'.")
echo "::set-output name=zebra_container::$CONTAINER_NAME"
sleep 60
- name: Sync past mandatory checkpoint logs
id: sync-past-checkpoint
@ -420,4 +451,4 @@ jobs:
if: ${{ steps.sync-past-checkpoint.outcome == 'success' || steps.sync-past-checkpoint.outcome == 'failure' }}
continue-on-error: true
run: |
gcloud compute instances delete "sync-checkpoint-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" --delete-disks all --zone "${{ env.ZONE }}"
gcloud compute instances delete "sync-checkpoint-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" --zone "${{ env.ZONE }}" --delete-disks all --quiet