feat(cd): deploy instances with attached cached states (#8868)

* ref(ci): consolidate cached states workflows and scripts We've been using multiple approaches to locate and retrieve cached states in GCP. However, this has made it difficult to reuse the same methods across new workflows or different scenarios. To address this, we've streamlined the process to make it more reusable in other contexts. This change will support deploying instances from both the `main` branch and `release`, simplifying future implementations and speeding up the process. Changes: - Use a single bash script (`gcp-get-cached-disks.sh`) to get cached states names and availability - Move script logic from `sub-find-cached-disks.yml` to `gcp-get-cached-disks.sh` and adapt `sub-find-cached-disks.yml` to allow to output available disks and disks names. - Simplify parameters usage in `sub-deploy-integration-tests-gcp.yml` and convert the `Find ${{ inputs.test_id }} cached state disk` step into an independent job, to be able to use the `sub-find-cached-disks.yml` reusable workflow - Remove repetition in `sub-ci-integration-tests-gcp.yml` * ref(tests): Use the `ZEBRA_CACHED_STATE_DIR` env var across tests We had a technical debt with some tests using a hardcoded value for the cache directory (`/zebrad-cache`), which generated inconsistency across disks and cached states directories. Changes: - Allow sync tests to use the `ZEBRA_CACHED_STATE_DIR` as the cache directory, if specified - Update the `entrypoint.sh` to reflect this change - Add the `ZEBRA_CACHED_STATE_DIR` variable to the missing tests in `sub-ci-integration-tests-gcp.yml`, and remove extra parameters to call reusable workflows. * feat(cd): deploy instances with cached states * fix(cd): allow deploying from branch * fix(cd): add missing `CACHED_DISK_NAME` env
2024-09-19 14:12:09 +01:00 · 2024-09-19 14:12:09 +01:00 · 8870b2c60e
parent aec07f24ff
commit 8870b2c60e
1 changed files with 69 additions and 38 deletions
--- a/.github/workflows/cd-deploy-nodes-gcp.yml
+++ b/.github/workflows/cd-deploy-nodes-gcp.yml
@ -42,27 +42,26 @@ on:
        type: boolean
        default: false
-  # TODO: Temporarily disabled to reduce network load, see #6894.
+  push:
-  #push:
+   # Skip main branch updates where Rust code and dependencies aren't modified.
-  #  # Skip main branch updates where Rust code and dependencies aren't modified.
+   branches:
-  #  branches:
+     - main
-  #    - main
+   paths:
-  #  paths:
+     # code and tests
-  #    # code and tests
+     - '**/*.rs'
-  #    - '**/*.rs'
+     # hard-coded checkpoints and proptest regressions
-  #    # hard-coded checkpoints and proptest regressions
+     - '**/*.txt'
-  #    - '**/*.txt'
+     # dependencies
-  #    # dependencies
+     - '**/Cargo.toml'
-  #    - '**/Cargo.toml'
+     - '**/Cargo.lock'
-  #    - '**/Cargo.lock'
+     # configuration files
-  #    # configuration files
+     - '.cargo/config.toml'
-  #    - '.cargo/config.toml'
+     - '**/clippy.toml'
-  #    - '**/clippy.toml'
+     # workflow definitions
-  #    # workflow definitions
+     - 'docker/**'
-  #    - 'docker/**'
+     - '.dockerignore'
-  #    - '.dockerignore'
+     - '.github/workflows/cd-deploy-nodes-gcp.yml'
-  #    - '.github/workflows/cd-deploy-nodes-gcp.yml'
+     - '.github/workflows/sub-build-docker-image.yml'
  #    - '.github/workflows/sub-build-docker-image.yml'
  # Only runs the Docker image tests, doesn't deploy any instances
  pull_request:
@ -176,6 +175,19 @@ jobs:
      test_variables: '-e NETWORK -e ZEBRA_CONF_PATH="zebrad/tests/common/configs/v1.0.0-rc.2.toml"'
      network: ${{ inputs.network || vars.ZCASH_NETWORK }}
  # Finds a `tip` cached state disk for zebra from the main branch
  #
  # Passes the disk name to subsequent jobs using `cached_disk_name` output
  #
  get-disk-name:
    name: Get disk name
    uses: ./.github/workflows/sub-find-cached-disks.yml
    with:
      network: ${{ inputs.network || vars.ZCASH_NETWORK }}
      disk_prefix: zebrad-cache
      disk_suffix: tip
      prefer_main_cached_state: true
  # Deploy Managed Instance Groups (MiGs) for Mainnet and Testnet,
  # with one node in the configured GCP region.
  #
@ -196,9 +208,11 @@ jobs:
      matrix:
        network: [Mainnet, Testnet]
    name: Deploy ${{ matrix.network }} nodes
-    needs: [ build, versioning, test-configuration-file, test-zebra-conf-path ]
+    needs: [ build, versioning, test-configuration-file, test-zebra-conf-path, get-disk-name ]
    runs-on: ubuntu-latest
    timeout-minutes: 60
    env:
      CACHED_DISK_NAME: ${{ needs.get-disk-name.outputs.cached_disk_name }}
    permissions:
      contents: 'read'
      id-token: 'write'
@ -240,24 +254,31 @@ jobs:
      # but the implementation is failing as it's requiring the disk names, contrary to what is stated in the official documentation
      - name: Create instance template for ${{ matrix.network }}
        run: |
          NAME="zebrad-cache-${{ env.GITHUB_HEAD_REF_SLUG_URL || env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-${NETWORK}"
          DISK_PARAMS="name=${NAME},device-name=${NAME},size=400GB,type=pd-ssd"
          if [ -n "${{ env.CACHED_DISK_NAME }}" ]; then
            DISK_PARAMS+=",image=${{ env.CACHED_DISK_NAME }}"
          else
            echo "No cached disk found for ${{ matrix.network }} in main branch"
            exit 1
          fi
          gcloud compute instance-templates create-with-container zebrad-${{ needs.versioning.outputs.major_version || env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-${NETWORK} \
-          --boot-disk-size 300GB \
+          --machine-type ${{ vars.GCP_SMALL_MACHINE }} \
          --boot-disk-size 50GB \
          --boot-disk-type=pd-ssd \
          --image-project=cos-cloud \
          --image-family=cos-stable \
-          --user-output-enabled \
+          --network-interface=subnet=${{ vars.GCP_SUBNETWORK }} \
-          --metadata google-logging-enabled=true,google-logging-use-fluentbit=true,google-monitoring-enabled=true \
+          --create-disk="${DISK_PARAMS}" \
          --container-mount-disk=mount-path='/var/cache/zebrad-cache',name=${NAME},mode=rw \
          --container-stdin \
          --container-tty \
          --container-image ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} \
          --container-env "NETWORK=${{ matrix.network }},LOG_FILE=${{ vars.CD_LOG_FILE }},LOG_COLOR=false,SENTRY_DSN=${{ vars.SENTRY_DSN }}" \
          --create-disk=name=zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${NETWORK},device-name=zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${NETWORK},auto-delete=yes,size=300GB,type=pd-ssd,mode=rw \
          --container-mount-disk=mount-path='/var/cache/zebrad-cache',name=zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${NETWORK},mode=rw \
          --machine-type ${{ vars.GCP_SMALL_MACHINE }} \
          --network-interface=subnet=${{ vars.GCP_SUBNETWORK }} \
          --service-account ${{ vars.GCP_DEPLOYMENTS_SA }} \
          --scopes cloud-platform \
-          --labels=app=zebrad,environment=prod,network=${NETWORK},github_ref=${{ env.GITHUB_REF_SLUG_URL }} \
+          --metadata google-logging-enabled=true,google-logging-use-fluentbit=true,google-monitoring-enabled=true \
          --labels=app=zebrad,environment=staging,network=${NETWORK},github_ref=${{ env.GITHUB_REF_SLUG_URL }} \
          --tags zebrad
      # Check if our destination instance group exists already
@ -297,9 +318,11 @@ jobs:
  # Note: this instances are not automatically replaced or deleted
  deploy-instance:
    name: Deploy single ${{ inputs.network }} instance
-    needs: [ build, test-configuration-file, test-zebra-conf-path ]
+    needs: [ build, test-configuration-file, test-zebra-conf-path, get-disk-name ]
    runs-on: ubuntu-latest
    timeout-minutes: 30
    env:
      CACHED_DISK_NAME: ${{ needs.get-disk-name.outputs.cached_disk_name }}
    permissions:
      contents: 'read'
      id-token: 'write'
@ -340,22 +363,30 @@ jobs:
      # Create instance template from container image
      - name: Manual deploy of a single ${{ inputs.network }} instance running zebrad
        run: |
          NAME="zebrad-cache-${{ env.GITHUB_HEAD_REF_SLUG_URL || env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-${NETWORK}"
          DISK_PARAMS="name=${NAME},device-name=${NAME},size=400GB,type=pd-ssd"
          if [ -n "${{ env.CACHED_DISK_NAME }}" ]; then
            DISK_PARAMS+=",image=${{ env.CACHED_DISK_NAME }}"
          else
            echo "No cached disk found for ${{ matrix.network }} in main branch"
            exit 1
          fi
          gcloud compute instances create-with-container "zebrad-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-${NETWORK}" \
-          --boot-disk-size 300GB \
+          --machine-type ${{ vars.GCP_SMALL_MACHINE }} \
          --boot-disk-size 50GB \
          --boot-disk-type=pd-ssd \
          --image-project=cos-cloud \
          --image-family=cos-stable \
-          --user-output-enabled \
+          --network-interface=subnet=${{ vars.GCP_SUBNETWORK }} \
-          --metadata google-logging-enabled=true,google-logging-use-fluentbit=true,google-monitoring-enabled=true \
+          --create-disk="${DISK_PARAMS}" \
          --container-mount-disk=mount-path='/var/cache/zebrad-cache',name=${NAME},mode=rw \
          --container-stdin \
          --container-tty \
          --container-image ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} \
          --container-env "NETWORK=${{ inputs.network }},LOG_FILE=${{ inputs.log_file }},LOG_COLOR=false,SENTRY_DSN=${{ vars.SENTRY_DSN }}" \
          --create-disk=name=zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${NETWORK},device-name=zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${NETWORK},auto-delete=yes,size=300GB,type=pd-ssd,mode=rw \
          --container-mount-disk=mount-path='/var/cache/zebrad-cache',name=zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${NETWORK},mode=rw \
          --machine-type ${{ vars.GCP_SMALL_MACHINE }} \
          --network-interface=subnet=${{ vars.GCP_SUBNETWORK }} \
          --service-account ${{ vars.GCP_DEPLOYMENTS_SA }} \
          --scopes cloud-platform \
          --metadata google-logging-enabled=true,google-monitoring-enabled=true \
          --labels=app=zebrad,environment=qa,network=${NETWORK},github_ref=${{ env.GITHUB_REF_SLUG_URL }} \
          --tags zebrad \
          --zone ${{ vars.GCP_ZONE }}