fix(ci): Stop using multiple jobs for quick Google Cloud tests (#5560)

* Only run multiple test jobs if they are needed for a long test * Remove unused job steps * Remove trailing whitespace * Follow logs in the Run step Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
2022-11-08 08:29:37 +10:00 · 2022-11-08 08:29:37 +10:00 · f94231fe34
parent e2d8f32619
commit f94231fe34
2 changed files with 36 additions and 144 deletions
--- a/.github/workflows/continous-integration-docker.yml
+++ b/.github/workflows/continous-integration-docker.yml
@ -332,7 +332,7 @@ jobs:
    # because we might never get a finished sync.
    #
    # See the concurrency comment on the zebrad test-full-sync job for details.
-    concurrency: 
+    concurrency:
      group: ${{ github.workflow }}−${{ github.ref }}-regenerate-stateful-disks
      cancel-in-progress: false

@ -378,6 +378,8 @@ jobs:
      test_id: full-sync-to-tip
      test_description: Test a full sync up to the tip
      test_variables: '-e TEST_FULL_SYNC=1 -e ZEBRA_FORCE_USE_COLOR=1 -e FULL_SYNC_MAINNET_TIMEOUT_MINUTES=600'
+      # This test runs for longer than 6 hours, so it needs multiple jobs
+      is_long_test: true
      needs_zebra_state: false
      saves_to_disk: true
      disk_suffix: tip
@ -385,7 +387,7 @@ jobs:
    # We want to prevent multiple full zebrad syncs running at the same time,
    # but we don't want to cancel running syncs on `main` if a new PR gets merged,
    # because we might never get a finished sync.
-    # 
+    #
    # Instead, we let the first sync complete, then queue the latest pending sync, cancelling any syncs in between.
    # (As the general workflow concurrency group just gets matched in Pull Requests,
    # it has no impact on this job.)
@ -393,7 +395,7 @@ jobs:
    # TODO:
    # - allow multiple manual syncs on a branch, and isolate manual syncs from automatic syncs, by adding '-${{ github.run_id }}' when github.event.inputs.run-full-sync is true
    # - stop multiple automatic full syncs across different PRs by removing '−${{ github.ref }}' when needs.get-available-disks.outputs.zebra_tip_disk is true
-    concurrency: 
+    concurrency:
      group: ${{ github.workflow }}−${{ github.ref }}-test-full-sync
      cancel-in-progress: false

@ -460,7 +462,7 @@ jobs:
    # because we might never get a finished sync.
    #
    # See the concurrency comment on the zebrad test-full-sync job for details.
-    concurrency: 
+    concurrency:
      group: ${{ github.workflow }}−${{ github.ref }}-lightwalletd-full-sync
      cancel-in-progress: false

@ -546,7 +548,7 @@ jobs:
    # because we might never get a finished test.
    #
    # See the concurrency comment on the zebrad test-full-sync job for details.
-    concurrency: 
+    concurrency:
      group: ${{ github.workflow }}−${{ github.ref }}-lightwalletd-transactions-test
      cancel-in-progress: false

--- a/.github/workflows/deploy-gcp-tests.yml
+++ b/.github/workflows/deploy-gcp-tests.yml
@ -12,6 +12,11 @@ on:
        required: true
        type: string
        description: 'Explains what the test does'
+      height_grep_text:
+        required: false
+        type: string
+        description: 'Regular expression to find the tip height in test logs, and add it to newly created cached state image metadata'
+
      # Test selection and parameters
      test_variables:
        required: true
@ -22,6 +27,12 @@ on:
        type: string
        default: Mainnet
        description: 'Zcash network to test against'
+      is_long_test:
+        required: false
+        type: boolean
+        default: false
+        description: 'Does this test need multiple run jobs? (Does it run longer than 6 hours?)'
+
      # Cached state
      #
      # TODO: find a better name
@ -69,11 +80,6 @@ on:
        required: true
        type: boolean
        description: 'Does the test create a new cached state disk?'
-      # Metadata
-      height_grep_text:
-        required: false
-        type: string
-        description: 'Regular expression to find the tip height in test logs, and add it to newly created cached state image metadata'
      app_name:
        required: false
        type: string
@ -119,11 +125,6 @@ jobs:
        with:
          short-length: 7

-      - name: Downcase network name for disks
-        run: |
-          NETWORK_CAPS=${{ inputs.network }}
-          echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV
-
      # Setup gcloud CLI
      - name: Authenticate to Google Cloud
        id: auth
@ -193,11 +194,6 @@ jobs:
        with:
          short-length: 7

-      - name: Downcase network name for disks
-        run: |
-          NETWORK_CAPS=${{ inputs.network }}
-          echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV
-
      # Setup gcloud CLI
      - name: Authenticate to Google Cloud
        id: auth
@ -415,11 +411,6 @@ jobs:
        with:
          short-length: 7

-      - name: Downcase network name for disks
-        run: |
-          NETWORK_CAPS=${{ inputs.network }}
-          echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV
-
      # Setup gcloud CLI
      - name: Authenticate to Google Cloud
        id: auth
@ -524,12 +515,14 @@ jobs:


  # follow the logs of the test we just launched, up to Sapling activation (or the test finishing)
+  #
+  # If `inputs.is_long_test` is `false`, this job is skipped.
  logs-sprout:
    name: Log ${{ inputs.test_id }} test (sprout)
    # We run exactly one of without-cached-state or with-cached-state, and we always skip the other one.
    needs: [ launch-with-cached-state, launch-without-cached-state ]
    # If the previous job fails, we still want to show the logs.
-    if: ${{ !cancelled() }}
+    if: ${{ !cancelled() && inputs.is_long_test }}
    runs-on: ubuntu-latest
    permissions:
      contents: 'read'
@ -545,11 +538,6 @@ jobs:
        with:
          short-length: 7

-      - name: Downcase network name for disks
-        run: |
-          NETWORK_CAPS=${{ inputs.network }}
-          echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV
-
      # Setup gcloud CLI
      - name: Authenticate to Google Cloud
        id: auth
@ -598,7 +586,7 @@ jobs:
    name: Log ${{ inputs.test_id }} test (heartwood)
    needs: [ logs-sprout ]
    # If the previous job fails, we still want to show the logs.
-    if: ${{ !cancelled() }}
+    if: ${{ !cancelled() && inputs.is_long_test }}
    runs-on: ubuntu-latest
    permissions:
      contents: 'read'
@ -614,11 +602,6 @@ jobs:
        with:
          short-length: 7

-      - name: Downcase network name for disks
-        run: |
-          NETWORK_CAPS=${{ inputs.network }}
-          echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV
-
      # Setup gcloud CLI
      - name: Authenticate to Google Cloud
        id: auth
@ -657,7 +640,7 @@ jobs:
    name: Log ${{ inputs.test_id }} test (canopy)
    needs: [ logs-heartwood ]
    # If the previous job fails, we still want to show the logs.
-    if: ${{ !cancelled() }}
+    if: ${{ !cancelled() && inputs.is_long_test }}
    runs-on: ubuntu-latest
    permissions:
      contents: 'read'
@ -673,11 +656,6 @@ jobs:
        with:
          short-length: 7

-      - name: Downcase network name for disks
-        run: |
-          NETWORK_CAPS=${{ inputs.network }}
-          echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV
-
      # Setup gcloud CLI
      - name: Authenticate to Google Cloud
        id: auth
@ -718,7 +696,7 @@ jobs:
    name: Log ${{ inputs.test_id }} test (1740k)
    needs: [ logs-canopy ]
    # If the previous job fails, we still want to show the logs.
-    if: ${{ !cancelled() }}
+    if: ${{ !cancelled() && inputs.is_long_test }}
    runs-on: ubuntu-latest
    permissions:
      contents: 'read'
@ -734,11 +712,6 @@ jobs:
        with:
          short-length: 7

-      - name: Downcase network name for disks
-        run: |
-          NETWORK_CAPS=${{ inputs.network }}
-          echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV
-
      # Setup gcloud CLI
      - name: Authenticate to Google Cloud
        id: auth
@ -781,7 +754,7 @@ jobs:
    name: Log ${{ inputs.test_id }} test (1760k)
    needs: [ logs-1740k ]
    # If the previous job fails, we still want to show the logs.
-    if: ${{ !cancelled() }}
+    if: ${{ !cancelled() && inputs.is_long_test }}
    runs-on: ubuntu-latest
    permissions:
      contents: 'read'
@ -797,11 +770,6 @@ jobs:
        with:
          short-length: 7

-      - name: Downcase network name for disks
-        run: |
-          NETWORK_CAPS=${{ inputs.network }}
-          echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV
-
      # Setup gcloud CLI
      - name: Authenticate to Google Cloud
        id: auth
@ -844,7 +812,7 @@ jobs:
    name: Log ${{ inputs.test_id }} test (1780k)
    needs: [ logs-1760k ]
    # If the previous job fails, we still want to show the logs.
-    if: ${{ !cancelled() }}
+    if: ${{ !cancelled() && inputs.is_long_test }}
    runs-on: ubuntu-latest
    permissions:
      contents: 'read'
@ -860,11 +828,6 @@ jobs:
        with:
          short-length: 7

-      - name: Downcase network name for disks
-        run: |
-          NETWORK_CAPS=${{ inputs.network }}
-          echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV
-
      # Setup gcloud CLI
      - name: Authenticate to Google Cloud
        id: auth
@ -908,7 +871,7 @@ jobs:
    name: Log ${{ inputs.test_id }} test (1800k)
    needs: [ logs-1780k ]
    # If the previous job fails, we still want to show the logs.
-    if: ${{ !cancelled() }}
+    if: ${{ !cancelled() && inputs.is_long_test }}
    runs-on: ubuntu-latest
    permissions:
      contents: 'read'
@ -924,11 +887,6 @@ jobs:
        with:
          short-length: 7

-      - name: Downcase network name for disks
-        run: |
-          NETWORK_CAPS=${{ inputs.network }}
-          echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV
-
      # Setup gcloud CLI
      - name: Authenticate to Google Cloud
        id: auth
@ -971,7 +929,7 @@ jobs:
    name: Log ${{ inputs.test_id }} test (1820k)
    needs: [ logs-1800k ]
    # If the previous job fails, we still want to show the logs.
-    if: ${{ !cancelled() }}
+    if: ${{ !cancelled() && inputs.is_long_test }}
    runs-on: ubuntu-latest
    permissions:
      contents: 'read'
@ -987,11 +945,6 @@ jobs:
        with:
          short-length: 7

-      - name: Downcase network name for disks
-        run: |
-          NETWORK_CAPS=${{ inputs.network }}
-          echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV
-
      # Setup gcloud CLI
      - name: Authenticate to Google Cloud
        id: auth
@ -1031,7 +984,7 @@ jobs:
    name: Log ${{ inputs.test_id }} test (checkpoint)
    needs: [ logs-1820k ]
    # If the previous job fails, we still want to show the logs.
-    if: ${{ !cancelled() }}
+    if: ${{ !cancelled() && inputs.is_long_test }}
    runs-on: ubuntu-latest
    permissions:
      contents: 'read'
@ -1047,11 +1000,6 @@ jobs:
        with:
          short-length: 7

-      - name: Downcase network name for disks
-        run: |
-          NETWORK_CAPS=${{ inputs.network }}
-          echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV
-
      # Setup gcloud CLI
      - name: Authenticate to Google Cloud
        id: auth
@ -1086,69 +1034,15 @@ jobs:
          -e 'test result:.*finished in' \
          "

-  # follow the logs of the test we just launched, until it finishes
-  logs-end:
-    name: Log ${{ inputs.test_id }} test (end)
-    needs: [ logs-checkpoint ]
-    # If the previous job fails, we still want to show the logs.
-    if: ${{ !cancelled() }}
-    runs-on: ubuntu-latest
-    permissions:
-      contents: 'read'
-      id-token: 'write'
-    steps:
-      - uses: actions/checkout@v3.1.0
-        with:
-          persist-credentials: false
-          fetch-depth: '2'

-      - name: Inject slug/short variables
-        uses: rlespinasse/github-slug-action@v4
-        with:
-          short-length: 7
-
-      - name: Downcase network name for disks
-        run: |
-          NETWORK_CAPS=${{ inputs.network }}
-          echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV
-
-      # Setup gcloud CLI
-      - name: Authenticate to Google Cloud
-        id: auth
-        uses: google-github-actions/auth@v0.8.3
-        with:
-          retries: '3'
-          workload_identity_provider: 'projects/143793276228/locations/global/workloadIdentityPools/github-actions/providers/github-oidc'
-          service_account: 'github-service-account@zealous-zebra.iam.gserviceaccount.com'
-          token_format: 'access_token'
-
-      # Show recent logs, following until the test finishes
-      - name: Show logs for ${{ inputs.test_id }} test (end)
-        run: |
-          gcloud compute ssh \
-          github-service-account@${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
-          --zone ${{ env.ZONE }} \
-          --quiet \
-          --ssh-flag="-o ServerAliveInterval=5" \
-          --ssh-flag="-o ConnectionAttempts=20" \
-          --ssh-flag="-o ConnectTimeout=5" \
-          --command \
-          "\
-          sudo docker logs \
-          --tail ${{ env.EXTRA_LOG_LINES }} \
-          --follow \
-          ${{ inputs.test_id }} | \
-          tee --output-error=exit /dev/stderr | \
-          grep --max-count=1 --extended-regexp --color=always \
-          'test result:.*finished in' \
-          "
-
-
-  # check the results of the test, and show all of the test logs
+  # Show all the test logs, then follow the logs of the test we just launched, until it finishes.
+  # Then check the result of the test.
+  #
+  # If `inputs.is_long_test` is `false`, the Rust test harness mostly runs in this job.
+  # Otherwise, it mostly runs in the "logs" jobs.
  test-result:
-    # TODO: update the job name here, and in the branch protection rules
    name: Run ${{ inputs.test_id }} test
-    needs: [ logs-end ]
+    needs: [ logs-checkpoint ]
    # If the previous job fails, we also want to run and fail this job,
    # so that the branch protection rule fails in Mergify and GitHub.
    if: ${{ !cancelled() }}
@ -1167,11 +1061,6 @@ jobs:
        with:
          short-length: 7

-      - name: Downcase network name for disks
-        run: |
-          NETWORK_CAPS=${{ inputs.network }}
-          echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV
-
      # Setup gcloud CLI
      - name: Authenticate to Google Cloud
        id: auth
@ -1205,6 +1094,7 @@ jobs:
          set -e;
          sudo docker logs \
          --tail all \
+          --follow \
          ${{ inputs.test_id }} | \
          tee --output-error=exit /dev/stderr | \
          grep --max-count=1 --extended-regexp --color=always \