Refactor launch without cached state into a single job

This commit is contained in:
teor 2023-10-06 11:00:25 +10:00 committed by GitHub
parent fcc7bf4e33
commit 0fbc464200
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 27 additions and 66 deletions

View File

@ -104,11 +104,15 @@ env:
CACHED_STATE_UPDATE_LIMIT: 576
jobs:
# set up and launch the test, if it doesn't use any cached state
# set up and run the test, if it doesn't use any cached state
# each test runs one of the *-with/without-cached-state job series, and skips the other
launch-without-cached-state:
name: Launch ${{ inputs.test_id }} test
if: ${{ !inputs.needs_zebra_state }}
#
# Launches the test and checks the results of the test.
#
run-without-cached-state:
name: Run ${{ inputs.test_id }} test
# If `inputs.is_long_test` is `true`, the timeout is 5 days, otherwise it's 3 hours.
timeout-minutes: ${{ inputs.is_long_test && 7200 || 180 }}
runs-on: zfnd-runners
permissions:
contents: 'read'
@ -136,19 +140,6 @@ jobs:
NETWORK_CAPS="${{ inputs.network }}"
echo "NETWORK=${NETWORK_CAPS,,}" >> "$GITHUB_ENV"
# Install our SSH secret
- name: Install private SSH key
uses: shimataro/ssh-key-action@v2.5.1
with:
key: ${{ secrets.GCP_SSH_PRIVATE_KEY }}
name: google_compute_engine
known_hosts: unnecessary
- name: Generate public SSH key
run: |
sudo apt-get update && sudo apt-get -qq install -y --no-install-recommends openssh-client
ssh-keygen -y -f ~/.ssh/google_compute_engine > ~/.ssh/google_compute_engine.pub
# Setup gcloud CLI
- name: Authenticate to Google Cloud
id: auth
@ -161,17 +152,20 @@ jobs:
- name: Set up Cloud SDK
uses: google-github-actions/setup-gcloud@v1.1.1
# Create a Compute Engine virtual machine
- name: Create ${{ inputs.test_id }} GCP compute instance
id: create-instance
# Create a Compute Engine virtual machine and run the tests
# Show all the test logs, then follow the logs of the test we just launched, until it finishes.
#
# TODO: fail if the "Zcash network" regex isn't found in the first 700 lines of output
- name: Run ${{ inputs.test_id }} test
id: test-result
run: |
gcloud compute instances create-with-container "${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" \
--boot-disk-size 300GB \
--boot-disk-size 350GB \
--boot-disk-type pd-ssd \
--image-project=cos-cloud \
--image-family=cos-stable \
--create-disk=name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",device-name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",size=300GB,type=pd-ssd \
--container-image=gcr.io/google-containers/busybox \
--container-image="${{ vars.GAR_BASE }}/${{ vars.CI_IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }}" \
--container-env="${{ inputs.test_variables }}" \
--container-mount-host-path=name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",host-path=mount-path=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }},mount-path=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \
--container-tty \
--machine-type ${{ vars.GCP_LARGE_MACHINE }} \
--network-interface=subnet=${{ vars.GCP_SUBNETWORK }} \
--scopes cloud-platform \
@ -179,45 +173,12 @@ jobs:
--metadata-from-file=startup-script=.github/workflows/scripts/gcp-vm-startup-script.sh \
--labels=app=${{ inputs.app_name }},environment=test,network=${NETWORK},github_ref=${{ env.GITHUB_REF_SLUG_URL }},test=${{ inputs.test_id }} \
--tags ${{ inputs.app_name }} \
--zone ${{ vars.GCP_ZONE }}
sleep 60
# Create a docker volume with the new disk we just created.
#
# SSH into the just created VM, and create a docker volume with the newly created disk.
- name: Create ${{ inputs.test_id }} Docker volume
run: |
gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
--zone ${{ vars.GCP_ZONE }} \
--ssh-flag="-o ServerAliveInterval=5" \
--ssh-flag="-o ConnectionAttempts=20" \
--ssh-flag="-o ConnectTimeout=5" \
--command \
"\
sudo mkfs.ext4 -v /dev/sdb \
&& \
sudo docker volume create --driver local --opt type=ext4 --opt device=/dev/sdb \
${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }} \
"
# Launch the test without any cached state
- name: Launch ${{ inputs.test_id }} test
run: |
gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
--zone ${{ vars.GCP_ZONE }} \
--ssh-flag="-o ServerAliveInterval=5" \
--ssh-flag="-o ConnectionAttempts=20" \
--ssh-flag="-o ConnectTimeout=5" \
--command \
"\
sudo docker run \
--name ${{ inputs.test_id }} \
--tty \
--detach \
${{ inputs.test_variables }} \
--mount type=volume,src=${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }},dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \
${{ vars.GAR_BASE }}/${{ vars.CI_IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \
"
--zone ${{ vars.GCP_ZONE }} | \
tee --output-error=exit /dev/stderr | \
grep --extended-regexp \
-e 'Zcash network: ${{ inputs.network }}' | \
grep --max-count=1 --extended-regexp --color=always \
"test result: .*ok.* [1-9][0-9]* passed.*finished in"; \
# set up and launch the test, if it uses cached state
# each test runs one of the *-with/without-cached-state job series, and skips the other
@ -501,7 +462,7 @@ jobs:
test-result:
name: Run ${{ inputs.test_id }} test
# We run exactly one of without-cached-state or with-cached-state, and we always skip the other one.
needs: [ launch-with-cached-state, launch-without-cached-state ]
needs: [ launch-with-cached-state ]
# If the previous job fails, we also want to run and fail this job,
# so that the branch protection rule fails in Mergify and GitHub.
if: ${{ !cancelled() }}
@ -621,7 +582,7 @@ jobs:
create-state-image:
name: Create ${{ inputs.test_id }} cached state image
runs-on: ubuntu-latest
needs: [ test-result, launch-with-cached-state ]
needs: [ test-result, launch-with-cached-state, run-without-cached-state ]
# We run exactly one of without-cached-state or with-cached-state, and we always skip the other one.
# Normally, if a job is skipped, all the jobs that depend on it are also skipped.
# So we need to override the default success() check to make this job run.