feat(ssh): enable OS Login for GCP test instances (#5602)
* feat(ssh): enable OS Login for GCP test instances * fix(ssh): force service account impersonation for OS Login * debug: show actual user trying to impersonate SA * fix(glcloud): configure gcloud before running commands * fix(ssh): add VM zone to ssh command * fix(auth): bringing changes from #5614 * fix(auth): impersonation is working as expected now * fix(gcloud): setup the GCP CLI after authenticating (#5606) Previous behavior: `gcloud` commands have been running without an appropiate authentication as the `auth` auction was sucessfully executed, but the actual gcloud CLI being used in further jobs was not using the correct configuration nor credentials Expected behavior: All `gcloud` commands should be properly configured and authenticated. Solution: Add the `google-github-actions/setup-gcloud` action after each `google-github-actions/auth` invocation, and before running any `gcloud` command. Remove the need of an OAuth Access token when not required by following steps * fix(auth): revert to latest version * fix: wrong replace * fix(ci): use a specific debian image for VM containers * fix(ssh): delete generated SSH keys by CI after 30 seconds * debug: remove debug commands * fix(compute): use a lightweight container image * fix(ci): add missing sudo to docker command * Update .github/workflows/deploy-gcp-tests.yml Co-authored-by: Deirdre Connolly <durumcrustulum@gmail.com> * fix(ssh): delete ssh-keys for the specific GHA service account Co-authored-by: Deirdre Connolly <durumcrustulum@gmail.com>
This commit is contained in:
parent
04df10fc7d
commit
844ebf0dbd
|
@ -145,11 +145,11 @@ jobs:
|
|||
--boot-disk-size 300GB \
|
||||
--boot-disk-type pd-ssd \
|
||||
--create-disk name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",device-name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",size=300GB,type=pd-ssd \
|
||||
--container-image debian-11 \
|
||||
--container-image gcr.io/google-containers/busybox \
|
||||
--container-restart-policy=never \
|
||||
--machine-type ${{ env.MACHINE_TYPE }} \
|
||||
--scopes cloud-platform \
|
||||
--metadata=google-monitoring-enabled=true,google-logging-enabled=true \
|
||||
--metadata=google-monitoring-enabled=TRUE,google-logging-enabled=TRUE,enable-oslogin=TRUE \
|
||||
--metadata-from-file=startup-script=.github/workflows/scripts/gcp-vm-startup-script.sh \
|
||||
--tags ${{ inputs.app_name }} \
|
||||
--zone ${{ env.ZONE }}
|
||||
|
@ -160,10 +160,9 @@ jobs:
|
|||
# SSH into the just created VM, and create a docker volume with the newly created disk.
|
||||
- name: Create ${{ inputs.test_id }} Docker volume
|
||||
run: |
|
||||
gcloud compute ssh \
|
||||
github-service-account@${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
||||
gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
||||
--ssh-key-expire-after=30s \
|
||||
--zone ${{ env.ZONE }} \
|
||||
--quiet \
|
||||
--ssh-flag="-o ServerAliveInterval=5" \
|
||||
--ssh-flag="-o ConnectionAttempts=20" \
|
||||
--ssh-flag="-o ConnectTimeout=5" \
|
||||
|
@ -211,10 +210,9 @@ jobs:
|
|||
# Launch the test without any cached state
|
||||
- name: Launch ${{ inputs.test_id }} test
|
||||
run: |
|
||||
gcloud compute ssh \
|
||||
github-service-account@${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
||||
gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
||||
--ssh-key-expire-after=30s \
|
||||
--zone ${{ env.ZONE }} \
|
||||
--quiet \
|
||||
--ssh-flag="-o ServerAliveInterval=5" \
|
||||
--ssh-flag="-o ConnectionAttempts=20" \
|
||||
--ssh-flag="-o ConnectTimeout=5" \
|
||||
|
@ -366,11 +364,11 @@ jobs:
|
|||
--boot-disk-size 300GB \
|
||||
--boot-disk-type pd-ssd \
|
||||
--create-disk image=${{ env.CACHED_DISK_NAME }},name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",device-name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",size=300GB,type=pd-ssd \
|
||||
--container-image debian-11 \
|
||||
--container-image gcr.io/google-containers/busybox \
|
||||
--container-restart-policy=never \
|
||||
--machine-type ${{ env.MACHINE_TYPE }} \
|
||||
--scopes cloud-platform \
|
||||
--metadata=google-monitoring-enabled=true,google-logging-enabled=true \
|
||||
--metadata=google-monitoring-enabled=TRUE,google-logging-enabled=TRUE,enable-oslogin=TRUE \
|
||||
--metadata-from-file=startup-script=.github/workflows/scripts/gcp-vm-startup-script.sh \
|
||||
--tags ${{ inputs.app_name }} \
|
||||
--zone ${{ env.ZONE }}
|
||||
|
@ -383,10 +381,9 @@ jobs:
|
|||
# but the cached state can be smaller if we just increased the disk size.)
|
||||
- name: Create ${{ inputs.test_id }} Docker volume
|
||||
run: |
|
||||
gcloud compute ssh \
|
||||
github-service-account@${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
||||
gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
||||
--ssh-key-expire-after=30s \
|
||||
--zone ${{ env.ZONE }} \
|
||||
--quiet \
|
||||
--ssh-flag="-o ServerAliveInterval=5" \
|
||||
--ssh-flag="-o ConnectionAttempts=20" \
|
||||
--ssh-flag="-o ConnectTimeout=5" \
|
||||
|
@ -452,10 +449,9 @@ jobs:
|
|||
# TODO: we should find a better logic for this use cases
|
||||
if: ${{ (inputs.needs_zebra_state && !inputs.needs_lwd_state) && inputs.test_id != 'lwd-full-sync' }}
|
||||
run: |
|
||||
gcloud compute ssh \
|
||||
github-service-account@${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
||||
gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
||||
--ssh-key-expire-after=30s \
|
||||
--zone ${{ env.ZONE }} \
|
||||
--quiet \
|
||||
--ssh-flag="-o ServerAliveInterval=5" \
|
||||
--ssh-flag="-o ConnectionAttempts=20" \
|
||||
--ssh-flag="-o ConnectTimeout=5" \
|
||||
|
@ -502,10 +498,9 @@ jobs:
|
|||
# TODO: we should find a better logic for this use cases
|
||||
if: ${{ (inputs.needs_zebra_state && inputs.needs_lwd_state) || inputs.test_id == 'lwd-full-sync' }}
|
||||
run: |
|
||||
gcloud compute ssh \
|
||||
github-service-account@${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
||||
gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
||||
--ssh-key-expire-after=30s \
|
||||
--zone ${{ env.ZONE }} \
|
||||
--quiet \
|
||||
--ssh-flag="-o ServerAliveInterval=5" \
|
||||
--ssh-flag="-o ConnectionAttempts=20" \
|
||||
--ssh-flag="-o ConnectTimeout=5" \
|
||||
|
@ -568,10 +563,9 @@ jobs:
|
|||
# Errors in the tests are caught by the final test status job.
|
||||
- name: Show logs for ${{ inputs.test_id }} test (sprout)
|
||||
run: |
|
||||
gcloud compute ssh \
|
||||
github-service-account@${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
||||
gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
||||
--ssh-key-expire-after=30s \
|
||||
--zone ${{ env.ZONE }} \
|
||||
--quiet \
|
||||
--ssh-flag="-o ServerAliveInterval=5" \
|
||||
--ssh-flag="-o ConnectionAttempts=20" \
|
||||
--ssh-flag="-o ConnectTimeout=5" \
|
||||
|
@ -627,10 +621,9 @@ jobs:
|
|||
# Show recent logs, following until Canopy activation (or the test finishes)
|
||||
- name: Show logs for ${{ inputs.test_id }} test (heartwood)
|
||||
run: |
|
||||
gcloud compute ssh \
|
||||
github-service-account@${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
||||
gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
||||
--ssh-key-expire-after=30s \
|
||||
--zone ${{ env.ZONE }} \
|
||||
--quiet \
|
||||
--ssh-flag="-o ServerAliveInterval=5" \
|
||||
--ssh-flag="-o ConnectionAttempts=20" \
|
||||
--ssh-flag="-o ConnectTimeout=5" \
|
||||
|
@ -683,10 +676,9 @@ jobs:
|
|||
# Show recent logs, following until NU5 activation (or the test finishes)
|
||||
- name: Show logs for ${{ inputs.test_id }} test (canopy)
|
||||
run: |
|
||||
gcloud compute ssh \
|
||||
github-service-account@${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
||||
gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
||||
--ssh-key-expire-after=30s \
|
||||
--zone ${{ env.ZONE }} \
|
||||
--quiet \
|
||||
--ssh-flag="-o ServerAliveInterval=5" \
|
||||
--ssh-flag="-o ConnectionAttempts=20" \
|
||||
--ssh-flag="-o ConnectTimeout=5" \
|
||||
|
@ -741,10 +733,9 @@ jobs:
|
|||
# Show recent logs, following until block 1,740,000 (or the test finishes)
|
||||
- name: Show logs for ${{ inputs.test_id }} test (1740k)
|
||||
run: |
|
||||
gcloud compute ssh \
|
||||
github-service-account@${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
||||
gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
||||
--ssh-key-expire-after=30s \
|
||||
--zone ${{ env.ZONE }} \
|
||||
--quiet \
|
||||
--ssh-flag="-o ServerAliveInterval=5" \
|
||||
--ssh-flag="-o ConnectionAttempts=20" \
|
||||
--ssh-flag="-o ConnectTimeout=5" \
|
||||
|
@ -801,10 +792,9 @@ jobs:
|
|||
# Show recent logs, following until block 1,760,000 (or the test finishes)
|
||||
- name: Show logs for ${{ inputs.test_id }} test (1760k)
|
||||
run: |
|
||||
gcloud compute ssh \
|
||||
github-service-account@${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
||||
gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
||||
--ssh-key-expire-after=30s \
|
||||
--zone ${{ env.ZONE }} \
|
||||
--quiet \
|
||||
--ssh-flag="-o ServerAliveInterval=5" \
|
||||
--ssh-flag="-o ConnectionAttempts=20" \
|
||||
--ssh-flag="-o ConnectTimeout=5" \
|
||||
|
@ -861,10 +851,9 @@ jobs:
|
|||
# Show recent logs, following until block 1,780,000 (or the test finishes)
|
||||
- name: Show logs for ${{ inputs.test_id }} test (1780k)
|
||||
run: |
|
||||
gcloud compute ssh \
|
||||
github-service-account@${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
||||
gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
||||
--ssh-key-expire-after=30s \
|
||||
--zone ${{ env.ZONE }} \
|
||||
--quiet \
|
||||
--ssh-flag="-o ServerAliveInterval=5" \
|
||||
--ssh-flag="-o ConnectionAttempts=20" \
|
||||
--ssh-flag="-o ConnectTimeout=5" \
|
||||
|
@ -922,10 +911,9 @@ jobs:
|
|||
# Show recent logs, following until block 1,800,000 (or the test finishes)
|
||||
- name: Show logs for ${{ inputs.test_id }} test (1800k)
|
||||
run: |
|
||||
gcloud compute ssh \
|
||||
github-service-account@${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
||||
gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
||||
--ssh-key-expire-after=30s \
|
||||
--zone ${{ env.ZONE }} \
|
||||
--quiet \
|
||||
--ssh-flag="-o ServerAliveInterval=5" \
|
||||
--ssh-flag="-o ConnectionAttempts=20" \
|
||||
--ssh-flag="-o ConnectTimeout=5" \
|
||||
|
@ -982,10 +970,9 @@ jobs:
|
|||
# Show recent logs, following until block 1,820,000 (or the test finishes)
|
||||
- name: Show logs for ${{ inputs.test_id }} test (1820k)
|
||||
run: |
|
||||
gcloud compute ssh \
|
||||
github-service-account@${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
||||
gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
||||
--ssh-key-expire-after=30s \
|
||||
--zone ${{ env.ZONE }} \
|
||||
--quiet \
|
||||
--ssh-flag="-o ServerAliveInterval=5" \
|
||||
--ssh-flag="-o ConnectionAttempts=20" \
|
||||
--ssh-flag="-o ConnectTimeout=5" \
|
||||
|
@ -1041,10 +1028,9 @@ jobs:
|
|||
# TODO: when doing obtain/extend tips, log the verifier in use, and check for full verification here
|
||||
- name: Show logs for ${{ inputs.test_id }} test (checkpoint)
|
||||
run: |
|
||||
gcloud compute ssh \
|
||||
github-service-account@${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
||||
gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
||||
--ssh-key-expire-after=30s \
|
||||
--zone ${{ env.ZONE }} \
|
||||
--quiet \
|
||||
--ssh-flag="-o ServerAliveInterval=5" \
|
||||
--ssh-flag="-o ConnectionAttempts=20" \
|
||||
--ssh-flag="-o ConnectTimeout=5" \
|
||||
|
@ -1111,10 +1097,9 @@ jobs:
|
|||
# (`docker wait` can also wait for multiple containers, but we only ever wait for a single container.)
|
||||
- name: Result of ${{ inputs.test_id }} test
|
||||
run: |
|
||||
gcloud compute ssh \
|
||||
github-service-account@${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
||||
gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
||||
--ssh-key-expire-after=30s \
|
||||
--zone ${{ env.ZONE }} \
|
||||
--quiet \
|
||||
--ssh-flag="-o ServerAliveInterval=5" \
|
||||
--ssh-flag="-o ConnectionAttempts=20" \
|
||||
--ssh-flag="-o ConnectTimeout=5" \
|
||||
|
@ -1237,15 +1222,14 @@ jobs:
|
|||
SYNC_HEIGHT=""
|
||||
|
||||
DOCKER_LOGS=$( \
|
||||
gcloud compute ssh \
|
||||
github-service-account@${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
||||
gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
||||
--ssh-key-expire-after=30s \
|
||||
--zone ${{ env.ZONE }} \
|
||||
--quiet \
|
||||
--ssh-flag="-o ServerAliveInterval=5" \
|
||||
--ssh-flag="-o ConnectionAttempts=20" \
|
||||
--ssh-flag="-o ConnectTimeout=5" \
|
||||
--command=" \
|
||||
docker logs ${{ inputs.test_id }} --tail 200 \
|
||||
sudo docker logs ${{ inputs.test_id }} --tail 200 \
|
||||
")
|
||||
|
||||
SYNC_HEIGHT=$( \
|
||||
|
@ -1376,3 +1360,15 @@ jobs:
|
|||
else
|
||||
gcloud compute instances delete "${INSTANCE}" --zone "${{ env.ZONE }}" --delete-disks all --quiet
|
||||
fi
|
||||
|
||||
# Deletes SSH keys generated during this workflow run, as GCP has a limit of SSH keys
|
||||
# that can exist at the same time in the OS Login metadata. Not deleting this keys
|
||||
# could cause the following error:
|
||||
# `Login profile size exceeds 32 KiB. Delete profile values to make additional space`
|
||||
- name: Delete temporal SSH keys
|
||||
continue-on-error: true
|
||||
run: |
|
||||
for i in $(gcloud compute os-login ssh-keys list --format="table[no-heading](value.fingerprint)") --impersonate-service-account=github-service-account@zealous-zebra.iam.gserviceaccount.com; do
|
||||
echo "$i";
|
||||
gcloud compute os-login ssh-keys remove --key "$i" --impersonate-service-account=github-service-account@zealous-zebra.iam.gserviceaccount.com || true;
|
||||
done
|
||||
|
|
Loading…
Reference in New Issue