zebra/.github/workflows/deploy-gcp-tests.yml

name: Deploy GCP tests

on:
  workflow_call:
    inputs:
      # Status and logging
      test_id:
        required: true
        type: string
        description: 'Unique identifier for the test'
      test_description:
        required: true
        type: string
        description: 'Explains what the test does'
      # Test selection and parameters
      test_variables:
        required: true
        type: string
        description: 'Environmental variables used to select and configure the test'
      network:
        required: false
        type: string
        default: Mainnet
        description: 'Zcash network to test against'
      # Cached state
      #
      # TODO: find a better name
      root_state_path:
        required: false
        type: string
        default: '/zebrad-cache'
        description: 'Cached state base directory path'
      # TODO: find a better name
      zebra_state_dir:
        required: false
        type: string
        default: ''
        description: 'Zebra cached state directory and input image prefix to search in GCP'
      # TODO: find a better name
      lwd_state_dir:
        required: false
        type: string
        default: ''
        description: 'Lightwalletd cached state directory and input image prefix to search in GCP'
      disk_prefix:
        required: false
        type: string
        default: 'zebrad-cache'
        description: 'Image name prefix, and `zebra_state_dir` name for newly created cached states'
      disk_suffix:
        required: false
        type: string
        description: 'Image name suffix'
      needs_zebra_state:
        required: true
        type: boolean
        description: 'Does the test use Zebra cached state?'
      needs_lwd_state:
        required: false
        type: boolean
        description: 'Does the test use Lightwalletd and Zebra cached state?'
      # main branch states can be outdated and slower, but they can also be more reliable
      prefer_main_cached_state:
        required: false
        type: boolean
        default: false
        description: 'Does the test prefer to use a main branch cached state?'
      saves_to_disk:
        required: true
        type: boolean
        description: 'Does the test create a new cached state disk?'
      # Metadata
      height_grep_text:
        required: false
        type: string
        description: 'Regular expression to find the tip height in test logs, and add it to newly created cached state image metadata'
      app_name:
        required: false
        type: string
        default: 'zebra'
        description: 'Application name for Google Cloud instance metadata'

env:
  # where we get the Docker image from
  IMAGE_NAME: zebrad-test
  GAR_BASE: us-docker.pkg.dev/zealous-zebra/zebra
  # what kind of Google Cloud instance we want to launch
  ZONE: us-central1-a
  MACHINE_TYPE: c2d-standard-16
  # How many previous log lines we show at the start of each new log job.
  # Increase this number if some log lines are skipped between jobs
  #
  # We want to show all the logs since the last job finished,
  # but we don't know how long it will be between jobs.
  # 200 lines is about 6-15 minutes of sync logs, or one panic log.
  EXTRA_LOG_LINES: 200

jobs:
  # set up the test, if it doesn't use any cached state
  # each test runs one of the *-with/without-cached-state job series, and skips the other
  setup-without-cached-state:
    name: Setup ${{ inputs.test_id }} test
    if: ${{ !inputs.needs_zebra_state }}
    runs-on: ubuntu-latest
    permissions:
      contents: 'read'
      id-token: 'write'
    steps:
      - uses: actions/checkout@v3.0.2
        with:
          persist-credentials: false
          fetch-depth: '2'

      - name: Inject slug/short variables
        uses: rlespinasse/github-slug-action@v4
        with:
          short-length: 7

      - name: Downcase network name for disks
        run: |
          NETWORK_CAPS=${{ inputs.network }}
          echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV

      # Setup gcloud CLI
      - name: Authenticate to Google Cloud
        id: auth
        uses: google-github-actions/auth@v0.8.0
        with:
          retries: '3'
          workload_identity_provider: 'projects/143793276228/locations/global/workloadIdentityPools/github-actions/providers/github-oidc'
          service_account: 'github-service-account@zealous-zebra.iam.gserviceaccount.com'
          token_format: 'access_token'

      # Create a Compute Engine virtual machine
      - name: Create ${{ inputs.test_id }} GCP compute instance
        id: create-instance
        run: |
          gcloud compute instances create-with-container "${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" \
          --boot-disk-size 200GB \
          --boot-disk-type pd-ssd \
          --create-disk name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",device-name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",size=200GB,type=pd-ssd \
          --container-image debian:buster \
          --container-restart-policy=never \
          --machine-type ${{ env.MACHINE_TYPE }} \
          --scopes cloud-platform \
          --metadata=google-monitoring-enabled=true,google-logging-enabled=true \
          --tags ${{ inputs.app_name }} \
          --zone ${{ env.ZONE }}
          sleep 60

      # Create a docker volume with the new disk we just created.
      #
      # SSH into the just created VM, and create a docker volume with the newly created disk.
      - name: Create ${{ inputs.test_id }} Docker volume
        run: |
          gcloud compute ssh \
          ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
          --zone ${{ env.ZONE }} \
          --quiet \
          --ssh-flag="-o ServerAliveInterval=5" \
          --command \
          "\
          sudo mkfs.ext4 -v /dev/sdb \
          && \
          docker volume create --driver local --opt type=ext4 --opt device=/dev/sdb \
          ${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }} \
          "

  # launch the test, if it doesn't use any cached state
  launch-without-cached-state:
    name: Launch ${{ inputs.test_id }} test
    needs: [ setup-without-cached-state ]
    # If creating the Google Cloud instance fails, we don't want to launch another docker instance.
    if: ${{ !cancelled() && !failure() && !inputs.needs_zebra_state }}
    runs-on: ubuntu-latest
    permissions:
      contents: 'read'
      id-token: 'write'
    steps:
      - uses: actions/checkout@v3.0.2
        with:
          persist-credentials: false
          fetch-depth: '2'

      - name: Inject slug/short variables
        uses: rlespinasse/github-slug-action@v4
        with:
          short-length: 7

      - name: Downcase network name for disks
        run: |
          NETWORK_CAPS=${{ inputs.network }}
          echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV

      # Setup gcloud CLI
      - name: Authenticate to Google Cloud
        id: auth
        uses: google-github-actions/auth@v0.8.0
        with:
          retries: '3'
          workload_identity_provider: 'projects/143793276228/locations/global/workloadIdentityPools/github-actions/providers/github-oidc'
          service_account: 'github-service-account@zealous-zebra.iam.gserviceaccount.com'
          token_format: 'access_token'

      # Launch the test without any cached state
      - name: Launch ${{ inputs.test_id }} test
        run: |
          gcloud compute ssh \
          ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
          --zone ${{ env.ZONE }} \
          --quiet \
          --ssh-flag="-o ServerAliveInterval=5" \
          --command \
          "\
          docker run \
          --name ${{ inputs.test_id }} \
          --tty \
          --detach \
          ${{ inputs.test_variables }} \
          --mount type=volume,src=${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }},dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \
          ${{ env.GAR_BASE }}/${{ env.IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \
          "


  # set up the test, if it uses cached state
  # each test runs one of the *-with/without-cached-state job series, and skips the other
  setup-with-cached-state:
    name: Setup ${{ inputs.test_id }} test
    if: ${{ inputs.needs_zebra_state }}
    runs-on: ubuntu-latest
    permissions:
      contents: 'read'
      id-token: 'write'
    steps:
      - uses: actions/checkout@v3.0.2
        with:
          persist-credentials: false
          fetch-depth: '2'

      - name: Inject slug/short variables
        uses: rlespinasse/github-slug-action@v4
        with:
          short-length: 7

      - name: Downcase network name for disks
        run: |
          NETWORK_CAPS=${{ inputs.network }}
          echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV

      # Setup gcloud CLI
      - name: Authenticate to Google Cloud
        id: auth
        uses: google-github-actions/auth@v0.8.0
        with:
          retries: '3'
          workload_identity_provider: 'projects/143793276228/locations/global/workloadIdentityPools/github-actions/providers/github-oidc'
          service_account: 'github-service-account@zealous-zebra.iam.gserviceaccount.com'
          token_format: 'access_token'

      # Find a cached state disk for this job, matching all of:
      # - disk cached state (lwd_state_dir/zebra_state_dir or disk_prefix) - zebrad-cache or lwd-cache
      # - state version (from the source code) - v{N}
      # - network (network) - mainnet or testnet
      # - disk target height kind (disk_suffix) - checkpoint or tip
      #
      # If the test needs a lightwalletd state (needs_lwd_state) set the variable DISK_PREFIX accordingly
      # - To ${{ inputs.lwd_state_dir }}" if needed
      # - To ${{ inputs.zebra_state_dir || inputs.disk_prefix }} if not
      #
      # If there are multiple disks:
      # - prefer images generated from this branch and commit, then
      # - if prefer_main_cached_state is true, prefer images from the `main` branch, then
      # - use images from any other branch.
      # Within each of these categories:
      # - prefer newer images to older images
      #
      # Passes the disk name to subsequent steps using $CACHED_DISK_NAME env variable
      # Passes the state version to subsequent steps using $STATE_VERSION env variable
      - name: Find ${{ inputs.test_id }} cached state disk
        id: get-disk-name
        run: |
          LOCAL_STATE_VERSION=$(grep -oE "DATABASE_FORMAT_VERSION: .* [0-9]+" "$GITHUB_WORKSPACE/zebra-state/src/constants.rs" | grep -oE "[0-9]+" | tail -n1)
          echo "STATE_VERSION: $LOCAL_STATE_VERSION"

          if [[ "${{ inputs.needs_lwd_state }}" == "true" ]]; then
              DISK_PREFIX=${{ inputs.lwd_state_dir }}
          else
              DISK_PREFIX=${{ inputs.zebra_state_dir || inputs.disk_prefix }}
          fi

          # Try to find an image generated from a previous step or run of this commit.
          # Fields are listed in the "Create image from state disk" step.
          #
          # We can't match the full branch name here,
          # because it might have been shortened for the image.
          #
          # The probability of two matching short commit hashes within the same month is very low.
          COMMIT_DISK_PREFIX="${DISK_PREFIX}-.+-${{ env.GITHUB_SHA_SHORT }}-v${LOCAL_STATE_VERSION}-${NETWORK}-${{ inputs.disk_suffix }}"
          COMMIT_CACHED_DISK_NAME=$(gcloud compute images list --filter="name~${COMMIT_DISK_PREFIX}" --format="value(NAME)" --sort-by=~creationTimestamp --limit=1)
          echo "${GITHUB_REF_SLUG_URL}-${{ env.GITHUB_SHA_SHORT }} Disk: $COMMIT_CACHED_DISK_NAME"
          if [[ -n "$COMMIT_CACHED_DISK_NAME" ]]; then
              echo "Description: $(gcloud compute images describe $COMMIT_CACHED_DISK_NAME --format='value(DESCRIPTION)')"
          fi

          # Try to find an image generated from the main branch
          MAIN_CACHED_DISK_NAME=$(gcloud compute images list --filter="name~${DISK_PREFIX}-main-[0-9a-f]+-v${LOCAL_STATE_VERSION}-${NETWORK}-${{ inputs.disk_suffix }}" --format="value(NAME)" --sort-by=~creationTimestamp --limit=1)
          echo "main Disk: $MAIN_CACHED_DISK_NAME"
          if [[ -n "$MAIN_CACHED_DISK_NAME" ]]; then
              echo "Description: $(gcloud compute images describe $MAIN_CACHED_DISK_NAME --format='value(DESCRIPTION)')"
          fi

          # Try to find an image generated from any other branch
          ANY_CACHED_DISK_NAME=$(gcloud compute images list --filter="name~${DISK_PREFIX}-.+-[0-9a-f]+-v${LOCAL_STATE_VERSION}-${NETWORK}-${{ inputs.disk_suffix }}" --format="value(NAME)" --sort-by=~creationTimestamp --limit=1)
          echo "any branch Disk: $ANY_CACHED_DISK_NAME"
          if [[ -n "$ANY_CACHED_DISK_NAME" ]]; then
              echo "Description: $(gcloud compute images describe $ANY_CACHED_DISK_NAME --format='value(DESCRIPTION)')"
          fi

          # Select a cached disk based on the job settings
          CACHED_DISK_NAME="$COMMIT_CACHED_DISK_NAME"
          if [[ -z "$CACHED_DISK_NAME" ]] && [[ "${{ inputs.prefer_main_cached_state }}" == "true" ]]; then
              echo "Preferring main branch cached state to other branches..."
              CACHED_DISK_NAME="$MAIN_CACHED_DISK_NAME"
          fi
          if [[ -z "$CACHED_DISK_NAME" ]]; then
              CACHED_DISK_NAME="$ANY_CACHED_DISK_NAME"
          fi

          if [[ -z "$CACHED_DISK_NAME" ]]; then
              echo "No cached state disk available"
              echo "Expected ${COMMIT_DISK_PREFIX}"
              echo "Also searched for cached disks from other branches"
              echo "Cached state test jobs must depend on the cached state rebuild job"
              exit 1
          fi

          echo "Selected Disk: $CACHED_DISK_NAME"

          echo "STATE_VERSION=$LOCAL_STATE_VERSION" >> $GITHUB_ENV
          echo "CACHED_DISK_NAME=$CACHED_DISK_NAME" >> $GITHUB_ENV

      # Create a Compute Engine virtual machine and attach a cached state disk using the
      # $CACHED_DISK_NAME variable as the source image to populate the disk cached state
      - name: Create ${{ inputs.test_id }} GCP compute instance
        id: create-instance
        run: |
          gcloud compute instances create-with-container "${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" \
          --boot-disk-size 200GB \
          --boot-disk-type pd-ssd \
          --create-disk image=${{ env.CACHED_DISK_NAME }},name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",device-name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",size=200GB,type=pd-ssd \
          --container-image debian:buster \
          --container-restart-policy=never \
          --machine-type ${{ env.MACHINE_TYPE }} \
          --scopes cloud-platform \
          --metadata=google-monitoring-enabled=true,google-logging-enabled=true \
          --tags ${{ inputs.app_name }} \
          --zone ${{ env.ZONE }}
          sleep 60

      # Create a docker volume with the selected cached state.
      #
      # SSH into the just created VM, expand the partition and filesystem to fill the entire disk,
      # then create a docker volume with the recently attached disk.
      # (The cached state and disk are usually the same size,
      # but the cached state can be smaller if we just increased the disk size.)
      - name: Create ${{ inputs.test_id }} Docker volume
        run: |
          gcloud compute ssh \
          ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
          --zone ${{ env.ZONE }} \
          --quiet \
          --ssh-flag="-o ServerAliveInterval=5" \
          --command \
          "\
          sudo e2fsck -v -f -p /dev/sdb \
          && \
          sudo resize2fs -p /dev/sdb \
          && \
          docker volume create --driver local --opt type=ext4 --opt device=/dev/sdb \
          ${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }} \
          "

  # launch the test, if it uses cached state
  launch-with-cached-state:
    name: Launch ${{ inputs.test_id }} test
    needs: [ setup-with-cached-state ]
    # If creating the Google Cloud instance fails, we don't want to launch another docker instance.
    if: ${{ !cancelled() && !failure() && inputs.needs_zebra_state }}
    runs-on: ubuntu-latest
    permissions:
      contents: 'read'
      id-token: 'write'
    steps:
      - uses: actions/checkout@v3.0.2
        with:
          persist-credentials: false
          fetch-depth: '2'

      - name: Inject slug/short variables
        uses: rlespinasse/github-slug-action@v4
        with:
          short-length: 7

      - name: Downcase network name for disks
        run: |
          NETWORK_CAPS=${{ inputs.network }}
          echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV

      # Setup gcloud CLI
      - name: Authenticate to Google Cloud
        id: auth
        uses: google-github-actions/auth@v0.8.0
        with:
          retries: '3'
          workload_identity_provider: 'projects/143793276228/locations/global/workloadIdentityPools/github-actions/providers/github-oidc'
          service_account: 'github-service-account@zealous-zebra.iam.gserviceaccount.com'
          token_format: 'access_token'

      # Launch the test with the previously created Zebra-only cached state.
      # Each test runs one of the "Launch test" steps, and skips the other.
      #
      # SSH into the just created VM, and create a Docker container to run the incoming test
      # from ${{ inputs.test_id }}, then mount the docker volume created in the previous job.
      #
      # The disk mounted in the VM is located at /dev/sdb, we mount the root `/` of this disk to the docker
      # container in one path:
      # - /var/cache/zebrad-cache -> ${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} -> $ZEBRA_CACHED_STATE_DIR
      #
      # This path must match the variable used by the tests in Rust, which are also set in
      # `continous-integration-docker.yml` to be able to run this tests.
      #
      # Although we're mounting the disk root, Zebra will only respect the values from
      # $ZEBRA_CACHED_STATE_DIR. The inputs like ${{ inputs.zebra_state_dir }} are only used
      # to match that variable paths.
      - name: Launch ${{ inputs.test_id }} test
        # This step only runs for tests that just read or write a Zebra state.
        #
        # lightwalletd-full-sync reads Zebra and writes lwd, so it is handled specially.
        # TODO: we should find a better logic for this use cases
        if: ${{ (inputs.needs_zebra_state && !inputs.needs_lwd_state) && inputs.test_id != 'lwd-full-sync' }}
        run: |
          gcloud compute ssh \
          ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
          --zone ${{ env.ZONE }} \
          --quiet \
          --ssh-flag="-o ServerAliveInterval=5" \
          --command \
          "\
          docker run \
          --name ${{ inputs.test_id }} \
          --tty \
          --detach \
          ${{ inputs.test_variables }} \
          --mount type=volume,src=${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }},dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \
          ${{ env.GAR_BASE }}/${{ env.IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \
          "

      # Launch the test with the previously created Lightwalletd and Zebra cached state.
      # Each test runs one of the "Launch test" steps, and skips the other.
      #
      # SSH into the just created VM, and create a Docker container to run the incoming test
      # from ${{ inputs.test_id }}, then mount the docker volume created in the previous job.
      #
      # In this step we're using the same disk for simplicity, as mounting multiple disks to the
      # VM and to the container might require more steps in this workflow, and additional
      # considerations.
      #
      # The disk mounted in the VM is located at /dev/sdb, we mount the root `/` of this disk to the docker
      # container in two different paths:
      # - /var/cache/zebrad-cache -> ${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} -> $ZEBRA_CACHED_STATE_DIR
      # - /var/cache/lwd-cache -> ${{ inputs.root_state_path }}/${{ inputs.lwd_state_dir }} -> $LIGHTWALLETD_DATA_DIR
      #
      # This doesn't cause any path conflicts, because Zebra and lightwalletd create different
      # subdirectories for their data. (But Zebra, lightwalletd, and the test harness must not
      # delete the whole cache directory.)
      #
      # This paths must match the variables used by the tests in Rust, which are also set in
      # `continous-integration-docker.yml` to be able to run this tests.
      #
      # Although we're mounting the disk root to both directories, Zebra and Lightwalletd
      # will only respect the values from $ZEBRA_CACHED_STATE_DIR and $LIGHTWALLETD_DATA_DIR,
      # the inputs like ${{ inputs.lwd_state_dir }} are only used to match those variables paths.
      - name: Launch ${{ inputs.test_id }} test
        # This step only runs for tests that read or write Lightwalletd and Zebra states.
        #
        # lightwalletd-full-sync reads Zebra and writes lwd, so it is handled specially.
        # TODO: we should find a better logic for this use cases
        if: ${{ (inputs.needs_zebra_state && inputs.needs_lwd_state) || inputs.test_id == 'lwd-full-sync' }}
        run: |
          gcloud compute ssh \
          ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
          --zone ${{ env.ZONE }} \
          --quiet \
          --ssh-flag="-o ServerAliveInterval=5" \
          --command \
          "\
          docker run \
          --name ${{ inputs.test_id }} \
          --tty \
          --detach \
          ${{ inputs.test_variables }} \
          --mount type=volume,src=${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }},dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \
          --mount type=volume,src=${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }},dst=${{ inputs.root_state_path }}/${{ inputs.lwd_state_dir }} \
          ${{ env.GAR_BASE }}/${{ env.IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \
          "


  # follow the logs of the test we just launched, up to Sapling activation (or the test finishing)
  logs-sprout:
    name: Log ${{ inputs.test_id }} test (sprout)
    # We run exactly one of without-cached-state or with-cached-state, and we always skip the other one.
    needs: [ launch-with-cached-state, launch-without-cached-state ]
    # If the previous job fails, we still want to show the logs.
    if: ${{ !cancelled() }}
    runs-on: ubuntu-latest
    permissions:
      contents: 'read'
      id-token: 'write'
    steps:
      - uses: actions/checkout@v3.0.2
        with:
          persist-credentials: false
          fetch-depth: '2'

      - name: Inject slug/short variables
        uses: rlespinasse/github-slug-action@v4
        with:
          short-length: 7

      - name: Downcase network name for disks
        run: |
          NETWORK_CAPS=${{ inputs.network }}
          echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV

      # Setup gcloud CLI
      - name: Authenticate to Google Cloud
        id: auth
        uses: google-github-actions/auth@v0.8.0
        with:
          retries: '3'
          workload_identity_provider: 'projects/143793276228/locations/global/workloadIdentityPools/github-actions/providers/github-oidc'
          service_account: 'github-service-account@zealous-zebra.iam.gserviceaccount.com'
          token_format: 'access_token'

      # Show all the logs since the container launched,
      # following until Sapling activation (or the test finishes).
      #
      # The log pipeline ignores the exit status of `docker logs`.
      # It also ignores the expected 'broken pipe' error from `tee`,
      # which happens when `grep` finds a matching output and moves on to the next job.
      #
      # Errors in the tests are caught by the final test status job.
      - name: Show logs for ${{ inputs.test_id }} test (sprout)
        run: |
          gcloud compute ssh \
          ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
          --zone ${{ env.ZONE }} \
          --quiet \
          --ssh-flag="-o ServerAliveInterval=5" \
          --command \
          "\
          docker logs \
          --tail all \
          --follow \
          ${{ inputs.test_id }} | \
          tee --output-error=exit /dev/stderr | \
          grep --max-count=1 --extended-regexp --color=always \
          -e 'estimated progress.*network_upgrade.*=.*Sapling' \
          -e 'estimated progress.*network_upgrade.*=.*Blossom' \
          -e 'estimated progress.*network_upgrade.*=.*Heartwood' \
          -e 'estimated progress.*network_upgrade.*=.*Canopy' \
          -e 'estimated progress.*network_upgrade.*=.*Nu5' \
          -e 'test result:.*finished in' \
          "

  # follow the logs of the test we just launched, up to Canopy activation (or the test finishing)
  logs-heartwood:
    name: Log ${{ inputs.test_id }} test (heartwood)
    needs: [ logs-sprout ]
    # If the previous job fails, we still want to show the logs.
    if: ${{ !cancelled() }}
    runs-on: ubuntu-latest
    permissions:
      contents: 'read'
      id-token: 'write'
    steps:
      - uses: actions/checkout@v3.0.2
        with:
          persist-credentials: false
          fetch-depth: '2'

      - name: Inject slug/short variables
        uses: rlespinasse/github-slug-action@v4
        with:
          short-length: 7

      - name: Downcase network name for disks
        run: |
          NETWORK_CAPS=${{ inputs.network }}
          echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV

      # Setup gcloud CLI
      - name: Authenticate to Google Cloud
        id: auth
        uses: google-github-actions/auth@v0.8.0
        with:
          retries: '3'
          workload_identity_provider: 'projects/143793276228/locations/global/workloadIdentityPools/github-actions/providers/github-oidc'
          service_account: 'github-service-account@zealous-zebra.iam.gserviceaccount.com'
          token_format: 'access_token'

      # Show recent logs, following until Canopy activation (or the test finishes)
      - name: Show logs for ${{ inputs.test_id }} test (heartwood)
        run: |
          gcloud compute ssh \
          ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
          --zone ${{ env.ZONE }} \
          --quiet \
          --ssh-flag="-o ServerAliveInterval=5" \
          --command \
          "\
          docker logs \
          --tail all \
          --follow \
          ${{ inputs.test_id }} | \
          tee --output-error=exit /dev/stderr | \
          grep --max-count=1 --extended-regexp --color=always \
          -e 'estimated progress.*network_upgrade.*=.*Canopy' \
          -e 'estimated progress.*network_upgrade.*=.*Nu5' \
          -e 'test result:.*finished in' \
          "

  # follow the logs of the test we just launched, up to NU5 activation (or the test finishing)
  logs-canopy:
    name: Log ${{ inputs.test_id }} test (canopy)
    needs: [ logs-heartwood ]
    # If the previous job fails, we still want to show the logs.
    if: ${{ !cancelled() }}
    runs-on: ubuntu-latest
    permissions:
      contents: 'read'
      id-token: 'write'
    steps:
      - uses: actions/checkout@v3.0.2
        with:
          persist-credentials: false
          fetch-depth: '2'

      - name: Inject slug/short variables
        uses: rlespinasse/github-slug-action@v4
        with:
          short-length: 7

      - name: Downcase network name for disks
        run: |
          NETWORK_CAPS=${{ inputs.network }}
          echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV

      # Setup gcloud CLI
      - name: Authenticate to Google Cloud
        id: auth
        uses: google-github-actions/auth@v0.8.0
        with:
          retries: '3'
          workload_identity_provider: 'projects/143793276228/locations/global/workloadIdentityPools/github-actions/providers/github-oidc'
          service_account: 'github-service-account@zealous-zebra.iam.gserviceaccount.com'
          token_format: 'access_token'

      # Show recent logs, following until NU5 activation (or the test finishes)
      - name: Show logs for ${{ inputs.test_id }} test (canopy)
        run: |
          gcloud compute ssh \
          ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
          --zone ${{ env.ZONE }} \
          --quiet \
          --ssh-flag="-o ServerAliveInterval=5" \
          --command \
          "\
          docker logs \
          --tail all \
          --follow \
          ${{ inputs.test_id }} | \
          tee --output-error=exit /dev/stderr | \
          grep --max-count=1 --extended-regexp --color=always \
          -e 'estimated progress.*network_upgrade.*=.*Nu5' \
          -e 'test result:.*finished in' \
          "

  # follow the logs of the test we just launched, up to block 1,740,000 or later
  # (or the test finishing)
  #
  # We chose this height because it was about 5 hours into the NU5 sync, at the end of July 2022.
  logs-1740k:
    name: Log ${{ inputs.test_id }} test (1740k)
    needs: [ logs-canopy ]
    # If the previous job fails, we still want to show the logs.
    if: ${{ !cancelled() }}
    runs-on: ubuntu-latest
    permissions:
      contents: 'read'
      id-token: 'write'
    steps:
      - uses: actions/checkout@v3.0.2
        with:
          persist-credentials: false
          fetch-depth: '2'

      - name: Inject slug/short variables
        uses: rlespinasse/github-slug-action@v4
        with:
          short-length: 7

      - name: Downcase network name for disks
        run: |
          NETWORK_CAPS=${{ inputs.network }}
          echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV

      # Setup gcloud CLI
      - name: Authenticate to Google Cloud
        id: auth
        uses: google-github-actions/auth@v0.8.0
        with:
          retries: '3'
          workload_identity_provider: 'projects/143793276228/locations/global/workloadIdentityPools/github-actions/providers/github-oidc'
          service_account: 'github-service-account@zealous-zebra.iam.gserviceaccount.com'
          token_format: 'access_token'

      # Show recent logs, following until block 1,740,000 (or the test finishes)
      - name: Show logs for ${{ inputs.test_id }} test (1740k)
        run: |
          gcloud compute ssh \
          ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
          --zone ${{ env.ZONE }} \
          --quiet \
          --ssh-flag="-o ServerAliveInterval=5" \
          --command \
          "\
          docker logs \
          --tail all \
          --follow \
          ${{ inputs.test_id }} | \
          tee --output-error=exit /dev/stderr | \
          grep --max-count=1 --extended-regexp --color=always \
          -e 'estimated progress.*current_height.*=.*17[4-9][0-9][0-9][0-9][0-9].*remaining_sync_blocks' \
          -e 'estimated progress.*current_height.*=.*1[8-9][0-9][0-9][0-9][0-9][0-9].*remaining_sync_blocks' \
          -e 'estimated progress.*current_height.*=.*2[0-9][0-9][0-9][0-9][0-9][0-9].*remaining_sync_blocks' \
          -e 'test result:.*finished in' \
          "

  # follow the logs of the test we just launched, up to block 1,760,000 or later
  # (or the test finishing)
  #
  # We chose this height because it was about 9 hours into the NU5 sync, at the end of August 2022.
  logs-1760k:
    name: Log ${{ inputs.test_id }} test (1760k)
    needs: [ logs-1740k ]
    # If the previous job fails, we still want to show the logs.
    if: ${{ !cancelled() }}
    runs-on: ubuntu-latest
    permissions:
      contents: 'read'
      id-token: 'write'
    steps:
      - uses: actions/checkout@v3.0.2
        with:
          persist-credentials: false
          fetch-depth: '2'

      - name: Inject slug/short variables
        uses: rlespinasse/github-slug-action@v4
        with:
          short-length: 7

      - name: Downcase network name for disks
        run: |
          NETWORK_CAPS=${{ inputs.network }}
          echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV

      # Setup gcloud CLI
      - name: Authenticate to Google Cloud
        id: auth
        uses: google-github-actions/auth@v0.8.0
        with:
          retries: '3'
          workload_identity_provider: 'projects/143793276228/locations/global/workloadIdentityPools/github-actions/providers/github-oidc'
          service_account: 'github-service-account@zealous-zebra.iam.gserviceaccount.com'
          token_format: 'access_token'

      # Show recent logs, following until block 1,760,000 (or the test finishes)
      - name: Show logs for ${{ inputs.test_id }} test (1760k)
        run: |
          gcloud compute ssh \
          ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
          --zone ${{ env.ZONE }} \
          --quiet \
          --ssh-flag="-o ServerAliveInterval=5" \
          --command \
          "\
          docker logs \
          --tail all \
          --follow \
          ${{ inputs.test_id }} | \
          tee --output-error=exit /dev/stderr | \
          grep --max-count=1 --extended-regexp --color=always \
          -e 'estimated progress.*current_height.*=.*17[6-9][0-9][0-9][0-9][0-9].*remaining_sync_blocks' \
          -e 'estimated progress.*current_height.*=.*1[8-9][0-9][0-9][0-9][0-9][0-9].*remaining_sync_blocks' \
          -e 'estimated progress.*current_height.*=.*2[0-9][0-9][0-9][0-9][0-9][0-9].*remaining_sync_blocks' \
          -e 'test result:.*finished in' \
          "

  # follow the logs of the test we just launched, up to the last checkpoint (or the test finishing)
  logs-checkpoint:
    name: Log ${{ inputs.test_id }} test (checkpoint)
    needs: [ logs-1760k ]
    # If the previous job fails, we still want to show the logs.
    if: ${{ !cancelled() }}
    runs-on: ubuntu-latest
    permissions:
      contents: 'read'
      id-token: 'write'
    steps:
      - uses: actions/checkout@v3.0.2
        with:
          persist-credentials: false
          fetch-depth: '2'

      - name: Inject slug/short variables
        uses: rlespinasse/github-slug-action@v4
        with:
          short-length: 7

      - name: Downcase network name for disks
        run: |
          NETWORK_CAPS=${{ inputs.network }}
          echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV

      # Setup gcloud CLI
      - name: Authenticate to Google Cloud
        id: auth
        uses: google-github-actions/auth@v0.8.0
        with:
          retries: '3'
          workload_identity_provider: 'projects/143793276228/locations/global/workloadIdentityPools/github-actions/providers/github-oidc'
          service_account: 'github-service-account@zealous-zebra.iam.gserviceaccount.com'
          token_format: 'access_token'

      # Show recent logs, following until the last checkpoint (or the test finishes)
      #
      # TODO: when doing obtain/extend tips, log the verifier in use, and check for full verification here
      - name: Show logs for ${{ inputs.test_id }} test (checkpoint)
        run: |
          gcloud compute ssh \
          ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
          --zone ${{ env.ZONE }} \
          --quiet \
          --ssh-flag="-o ServerAliveInterval=5" \
          --command \
          "\
          docker logs \
          --tail ${{ env.EXTRA_LOG_LINES }} \
          --follow \
          ${{ inputs.test_id }} | \
          tee --output-error=exit /dev/stderr | \
          grep --max-count=1 --extended-regexp --color=always \
          -e 'verified final checkpoint' \
          -e 'test result:.*finished in' \
          "

  # follow the logs of the test we just launched, until it finishes
  logs-end:
    name: Log ${{ inputs.test_id }} test (end)
    needs: [ logs-checkpoint ]
    # If the previous job fails, we still want to show the logs.
    if: ${{ !cancelled() }}
    runs-on: ubuntu-latest
    permissions:
      contents: 'read'
      id-token: 'write'
    steps:
      - uses: actions/checkout@v3.0.2
        with:
          persist-credentials: false
          fetch-depth: '2'

      - name: Inject slug/short variables
        uses: rlespinasse/github-slug-action@v4
        with:
          short-length: 7

      - name: Downcase network name for disks
        run: |
          NETWORK_CAPS=${{ inputs.network }}
          echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV

      # Setup gcloud CLI
      - name: Authenticate to Google Cloud
        id: auth
        uses: google-github-actions/auth@v0.8.0
        with:
          retries: '3'
          workload_identity_provider: 'projects/143793276228/locations/global/workloadIdentityPools/github-actions/providers/github-oidc'
          service_account: 'github-service-account@zealous-zebra.iam.gserviceaccount.com'
          token_format: 'access_token'

      # Show recent logs, following until the test finishes
      - name: Show logs for ${{ inputs.test_id }} test (end)
        run: |
          gcloud compute ssh \
          ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
          --zone ${{ env.ZONE }} \
          --quiet \
          --ssh-flag="-o ServerAliveInterval=5" \
          --command \
          "\
          docker logs \
          --tail ${{ env.EXTRA_LOG_LINES }} \
          --follow \
          ${{ inputs.test_id }} | \
          tee --output-error=exit /dev/stderr | \
          grep --max-count=1 --extended-regexp --color=always \
          'test result:.*finished in' \
          "


  # check the results of the test, and show all of the test logs
  test-result:
    # TODO: update the job name here, and in the branch protection rules
    name: Run ${{ inputs.test_id }} test
    needs: [ logs-end ]
    # If the previous job fails, we also want to run and fail this job,
    # so that the branch protection rule fails in Mergify and GitHub.
    if: ${{ !cancelled() }}
    runs-on: ubuntu-latest
    permissions:
      contents: 'read'
      id-token: 'write'
    steps:
      - uses: actions/checkout@v3.0.2
        with:
          persist-credentials: false
          fetch-depth: '2'

      - name: Inject slug/short variables
        uses: rlespinasse/github-slug-action@v4
        with:
          short-length: 7

      - name: Downcase network name for disks
        run: |
          NETWORK_CAPS=${{ inputs.network }}
          echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV

      # Setup gcloud CLI
      - name: Authenticate to Google Cloud
        id: auth
        uses: google-github-actions/auth@v0.8.0
        with:
          retries: '3'
          workload_identity_provider: 'projects/143793276228/locations/global/workloadIdentityPools/github-actions/providers/github-oidc'
          service_account: 'github-service-account@zealous-zebra.iam.gserviceaccount.com'
          token_format: 'access_token'

      # Check that the container executed at least 1 Rust test harness test, and that all tests passed.
      # Then wait for the container to finish, and exit with the test's exit status.
      # Also shows all the test logs.
      #
      # If the container has already finished, `docker wait` should return its status.
      # But sometimes this doesn't work, so we use `docker inspect` as a fallback.
      #
      # `docker wait` prints the container exit status as a string, but we need to exit the `ssh` command
      # with that status.
      # (`docker wait` can also wait for multiple containers, but we only ever wait for a single container.)
      - name: Result of ${{ inputs.test_id }} test
        run: |
          gcloud compute ssh \
          ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
          --zone ${{ env.ZONE }} \
          --quiet \
          --ssh-flag="-o ServerAliveInterval=5" \
          --command=' \
          set -e;
          docker logs \
          --tail all \
          ${{ inputs.test_id }} | \
          tee --output-error=exit /dev/stderr | \
          grep --max-count=1 --extended-regexp --color=always \
          "test result: .*ok.* [1-9][0-9]* passed.*finished in"; \
          EXIT_STATUS=$( \
          docker wait ${{ inputs.test_id }} || \
          docker inspect --format "{{.State.ExitCode}}" ${{ inputs.test_id }} || \
          echo "missing container, or missing exit status for container" \
          ); \
          echo "docker exit status: $EXIT_STATUS"; \
          exit "$EXIT_STATUS" \
          '


  # create a state image from the instance's state disk, if requested by the caller
  create-state-image:
    name: Create ${{ inputs.test_id }} cached state image
    runs-on: ubuntu-latest
    needs: [ test-result ]
    # We run exactly one of without-cached-state or with-cached-state, and we always skip the other one.
    # Normally, if a job is skipped, all the jobs that depend on it are also skipped.
    # So we need to override the default success() check to make this job run.
    if: ${{ !cancelled() && !failure() && inputs.saves_to_disk }}
    permissions:
      contents: 'read'
      id-token: 'write'
    steps:
      - uses: actions/checkout@v3.0.2
        with:
          persist-credentials: false
          fetch-depth: '2'

      - name: Inject slug/short variables
        uses: rlespinasse/github-slug-action@v4
        with:
          short-length: 7

      # Performs formatting on disk name components.
      #
      # Disk images in GCP are required to be in lowercase, but the blockchain network
      # uses sentence case, so we need to downcase ${{ inputs.network }}.
      #
      # Disk image names in GCP are limited to 63 characters, so we need to limit
      # branch names to 13 characters.
      #
      # Passes ${{ inputs.network }} to subsequent steps using $NETWORK env variable.
      # Passes ${{ env.GITHUB_REF_SLUG_URL }} to subsequent steps using $SHORT_GITHUB_REF env variable.
      - name: Format network name and branch name for disks
        run: |
          NETWORK_CAPS=${{ inputs.network }}
          echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV
          LONG_GITHUB_REF=${{ env.GITHUB_REF_SLUG_URL }}
          echo "SHORT_GITHUB_REF=${LONG_GITHUB_REF:0:13}" >> $GITHUB_ENV

      # Setup gcloud CLI
      - name: Authenticate to Google Cloud
        id: auth
        uses: google-github-actions/auth@v0.7.3
        with:
          workload_identity_provider: 'projects/143793276228/locations/global/workloadIdentityPools/github-actions/providers/github-oidc'
          service_account: 'github-service-account@zealous-zebra.iam.gserviceaccount.com'
          token_format: 'access_token'

      # Get the state version from the local constants.rs file to be used in the image creation,
      # as the state version is part of the disk image name.
      #
      # Passes the state version to subsequent steps using $STATE_VERSION env variable
      - name: Get state version from constants.rs
        run: |
          LOCAL_STATE_VERSION=$(grep -oE "DATABASE_FORMAT_VERSION: .* [0-9]+" $GITHUB_WORKSPACE/zebra-state/src/constants.rs | grep -oE "[0-9]+" | tail -n1)
          echo "STATE_VERSION: $LOCAL_STATE_VERSION"

          echo "STATE_VERSION=$LOCAL_STATE_VERSION" >> $GITHUB_ENV

      # Get the sync height from the test logs, which is later used as part of the
      # disk description.
      #
      # The regex used to grep the sync height is provided by ${{ inputs.height_grep_text }},
      # this allows to dynamically change the height as needed by different situations or
      # based on the logs output from different tests
      #
      # Passes the sync height to subsequent steps using $SYNC_HEIGHT env variable
      - name: Get sync height from logs
        run: |
          SYNC_HEIGHT=""

          DOCKER_LOGS=$(\
          gcloud compute ssh \
          ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
          --zone ${{ env.ZONE }} \
          --quiet \
          --ssh-flag="-o ServerAliveInterval=5" \
          --command="docker logs ${{ inputs.test_id }} --tail 20")

          SYNC_HEIGHT=$(echo $DOCKER_LOGS | grep -oE '${{ inputs.height_grep_text }}\([0-9]+\)' | grep -oE '[0-9]+' | tail -1 || [[ $? == 1 ]])
          echo "SYNC_HEIGHT=$SYNC_HEIGHT" >> $GITHUB_ENV

      # Sets the $UPDATE_SUFFIX env var to "-u" if using cached state,
      # and the empty string otherwise.
      #
      # Also sets a unique date and time suffix $TIME_SUFFIX.
      - name: Set update and time suffixes
        run: |
          UPDATE_SUFFIX=""

          if [[ "${{ inputs.needs_zebra_state }}" == "true" ]]; then
              UPDATE_SUFFIX="-u"
          fi

          # We're going to delete old images after a month, so we don't need the year here
          TIME_SUFFIX=$(date '+%m%d%H%M%S' --utc)

          echo "UPDATE_SUFFIX=$UPDATE_SUFFIX" >> $GITHUB_ENV
          echo "TIME_SUFFIX=$TIME_SUFFIX" >> $GITHUB_ENV

      # Create an image from disk that will be used for following/other tests.
      #
      # This image can contain:
      # - Zebra cached state
      # - Zebra + lightwalletd cached state
      # Which cached state is being saved to the disk is defined by ${{ inputs.disk_prefix }}.
      #
      # The image name must be unique, and be 63 characters or less.
      # The timestamp makes images from the same commit unique,
      # as long as they don't finish in the same second.
      #
      # Force the image creation (--force) as the disk is still attached even though is not being
      # used by the container.
      - name: Create image from state disk
        run: |
          gcloud compute images create \
          "${{ inputs.disk_prefix }}-${SHORT_GITHUB_REF}-${{ env.GITHUB_SHA_SHORT }}-v${{ env.STATE_VERSION }}-${{ env.NETWORK }}-${{ inputs.disk_suffix }}${UPDATE_SUFFIX}-${TIME_SUFFIX}" \
          --force \
          --source-disk=${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }} \
          --source-disk-zone=${{ env.ZONE }} \
          --storage-location=us \
          --description="Created from commit ${{ env.GITHUB_SHA_SHORT }} with height ${{ env.SYNC_HEIGHT }}"

  # delete the Google Cloud instance for this test
  delete-instance:
    name: Delete ${{ inputs.test_id }} instance
    runs-on: ubuntu-latest
    needs: [ create-state-image ]
    # If a disk generation step timeouts (+6 hours) the previous job (creating the image) will be skipped.
    # Even if the instance continues running, no image will be created, so it's better to delete it.
    if: always()
    continue-on-error: true
    permissions:
      contents: 'read'
      id-token: 'write'
    steps:
      - uses: actions/checkout@v3.0.2
        with:
          persist-credentials: false
          fetch-depth: '2'

      - name: Inject slug/short variables
        uses: rlespinasse/github-slug-action@v4
        with:
          short-length: 7

      # Setup gcloud CLI
      - name: Authenticate to Google Cloud
        id: auth
        uses: google-github-actions/auth@v0.7.3
        with:
          workload_identity_provider: 'projects/143793276228/locations/global/workloadIdentityPools/github-actions/providers/github-oidc'
          service_account: 'github-service-account@zealous-zebra.iam.gserviceaccount.com'
          token_format: 'access_token'

      # Deletes the instances that has been recently deployed in the actual commit after all
      # previous jobs have run, no matter the outcome of the job.
      - name: Delete test instance
        continue-on-error: true
        run: |
          INSTANCE=$(gcloud compute instances list --filter=${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} --format='value(NAME)')
          if [ -z "${INSTANCE}" ]; then
            echo "No instance to delete"
          else
            gcloud compute instances delete "${INSTANCE}" --zone "${{ env.ZONE }}" --delete-disks all --quiet
          fi