From 8227dabe56f3964f4624e45005ff2eb924c5a02a Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Sun, 28 Aug 2022 08:46:21 -0400 Subject: [PATCH] ci(build): deploy long running node instances on release (#4939) * feat(build): deploy long running instances on release Previous behavior: Each time we merged to main new nodes would be deployed, this is an expected behavior as we need to ensure nodes get deployed and run without issues, but this could also replace nodes very hastily. Expected behavior: We want instances which would run for a longer time, to allow us to troubleshoot issues or inspect the behavior of this instances for longer periods of time (2+ weeks) Applied solution: Deploy a versioned manage instance group (MiG) using the major version of the release semver. We just use the first part of the version to replace old instances, and change it when a major version is released to keep a segregation between new and old versions. * ci(build): allow v0 as a major version tag * fix(build): use rust conventions for versioning * fix(deploy): improve documentation and trigger on release * Update .github/workflows/continous-delivery.yml Co-authored-by: teor * fix(versioning): typo * fix(deploy): use `zebrad-v1` as the instance name, with no SHA * fix(deploy): create and update MiG must use the same name * docs(deployments): add Continuous Delivery process Co-authored-by: teor Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> --- .github/workflows/build-docker-image.yml | 9 ++- .github/workflows/continous-delivery.yml | 71 ++++++++++++++++++++---- book/src/dev/continous-delivery.md | 28 ++++++++++ 3 files changed, 97 insertions(+), 11 deletions(-) create mode 100644 book/src/dev/continous-delivery.md diff --git a/.github/workflows/build-docker-image.yml b/.github/workflows/build-docker-image.yml index 9b337a1a2..665a3fee6 100644 --- a/.github/workflows/build-docker-image.yml +++ b/.github/workflows/build-docker-image.yml @@ -37,12 +37,19 @@ on: required: false type: string default: info + outputs: + image_digest: + description: 'The image digest to be used on a caller workflow' + value: ${{ jobs.build.outputs.image_digest }} jobs: build: name: Build images timeout-minutes: 210 runs-on: ubuntu-latest + outputs: + image_digest: ${{ steps.docker_build.outputs.digest }} + image_name: ${{ fromJSON(steps.docker_build.outputs.metadata)['image.name'] }} permissions: contents: 'read' id-token: 'write' @@ -67,12 +74,12 @@ jobs: # generate Docker tags based on the following events/attributes tags: | type=schedule + type=sha type=ref,event=branch type=ref,event=pr type=semver,pattern={{version}} type=semver,pattern={{major}}.{{minor}} type=semver,pattern={{major}} - type=sha # Setup Docker Buildx to allow use of docker cache layers from GH - name: Set up Docker Buildx diff --git a/.github/workflows/continous-delivery.yml b/.github/workflows/continous-delivery.yml index 51401205f..b896d1ce2 100644 --- a/.github/workflows/continous-delivery.yml +++ b/.github/workflows/continous-delivery.yml @@ -14,6 +14,9 @@ on: push: branches: - main + release: + types: + - published env: NETWORK: Mainnet @@ -23,6 +26,36 @@ env: MACHINE_TYPE: c2-standard-4 jobs: + # If a release was made we want to extract the first part of the semver from the + # tag_name + # + # Generate the following output to pass to subsequent jobs + # - If our semver is `v1.3.0` the resulting output from this job would be `v1` + # + # Note: We just use the first part of the version to replace old instances, and change + # it when a major version is released, to keep a segregation between new and old + # versions. + versioning: + name: Versioning + runs-on: ubuntu-latest + outputs: + major_version: ${{ steps.set.outputs.major_version }} + steps: + - name: Getting Zebrad Version + id: get + uses: actions/github-script@v6.1.0 + with: + result-encoding: string + script: | + return context.payload.release.tag_name.substring(0,2) + - name: Setting API Version + id: set + run: echo "::set-output name=major_version::${{ steps.get.outputs.result }}" + + # Each time this workflow is executed, a build will be triggered to create a new image + # with the corresponding tags using information from Git + # + # The image will be commonly named `zebrad:` build: uses: ./.github/workflows/build-docker-image.yml with: @@ -35,15 +68,26 @@ jobs: zebra_skip_ipv6_tests: '1' rust_log: info + # This jobs handles the deployment of a Managed Instance Group (MiG) with 2 nodes in + # the us-central1 region. Two different groups of MiGs are deployed one for pushes to + # the main branch and another for version releases of Zebra + # + # Once this workflow is triggered the previous MiG is replaced, on pushes to main its + # always replaced, and with releases its only replaced if the same major version is + # being deployed, otherwise a new major version is deployed + # + # Runs: + # - on every push/merge to the `main` branch + # - on every release, when it's published deploy-nodes: name: Deploy Mainnet nodes - needs: build + needs: [ build, versioning ] runs-on: ubuntu-latest timeout-minutes: 30 permissions: contents: 'read' id-token: 'write' - if: ${{ github.event_name == 'push' && github.ref_name == 'main' }} + if: ${{ (github.event_name == 'push' && github.ref_name == 'main') || github.event_name == 'release' }} steps: - name: Inject slug/short variables @@ -63,9 +107,9 @@ jobs: - name: Create instance template run: | - gcloud compute instance-templates create-with-container zebrad-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ + gcloud compute instance-templates create-with-container zebrad-${{ needs.versioning.outputs.major_version || env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ --boot-disk-type=pd-ssd \ - --container-image ${{ env.GAR_BASE }}/${{ env.GITHUB_REF_SLUG_URL }}:${{ env.GITHUB_SHA_SHORT }} \ + --container-image ${{ env.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} \ --create-disk name=zebrad-cache-${{ env.GITHUB_SHA_SHORT }},auto-delete=yes,size=100GB,type=pd-ssd \ --container-mount-disk mount-path="/zebrad-cache",name=zebrad-cache-${{ env.GITHUB_SHA_SHORT }} \ --machine-type ${{ env.MACHINE_TYPE }} \ @@ -77,15 +121,15 @@ jobs: id: does-group-exist continue-on-error: true run: | - gcloud compute instance-groups list | grep "zebrad-${{ env.GITHUB_REF_SLUG_URL }}" | grep "${{ env.REGION }}" + gcloud compute instance-groups list | grep "zebrad-${{ needs.versioning.outputs.major_version || env.GITHUB_REF_SLUG_URL }}" | grep "${{ env.REGION }}" # Deploy new managed instance group using the new instance template - name: Create managed instance group if: steps.does-group-exist.outcome == 'failure' run: | gcloud compute instance-groups managed create \ - "zebrad-${{ env.GITHUB_REF_SLUG_URL }}" \ - --template "zebrad-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" \ + "zebrad-${{ needs.versioning.outputs.major_version || env.GITHUB_REF_SLUG_URL }}" \ + --template "zebrad-${{ needs.versioning.outputs.major_version || env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" \ --health-check zebrad-tracing-filter \ --initial-delay 30 \ --region "${{ env.REGION }}" \ @@ -96,10 +140,17 @@ jobs: if: steps.does-group-exist.outcome == 'success' run: | gcloud compute instance-groups managed rolling-action start-update \ - "zebrad-${{ env.GITHUB_REF_SLUG_URL }}" \ - --version template="zebrad-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" \ + "zebrad-${{ needs.versioning.outputs.major_version || env.GITHUB_REF_SLUG_URL }}" \ + --version template="zebrad-${{ needs.versioning.outputs.major_version || env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" \ --region "${{ env.REGION }}" + # This jobs handles the deployment of a single node (1) in the us-central1-a zone + # when an instance is required to test a specific commit + # + # Runs: + # - on request, using workflow_dispatch with regenerate-disks + # + # Note: this instances are not automatically replaced or deleted deploy-instance: name: Deploy single instance needs: build @@ -134,7 +185,7 @@ jobs: --boot-disk-type=pd-ssd \ --container-stdin \ --container-tty \ - --container-image ${{ env.GAR_BASE }}/${{ env.GITHUB_REF_SLUG_URL }}:${{ env.GITHUB_SHA_SHORT }} \ + --container-image ${{ env.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} \ --create-disk name=zebrad-cache-${{ env.GITHUB_SHA_SHORT }},auto-delete=yes,size=100GB,type=pd-ssd \ --container-mount-disk mount-path='/zebrad-cache',name=zebrad-cache-${{ env.GITHUB_SHA_SHORT }} \ --machine-type ${{ env.MACHINE_TYPE }} \ diff --git a/book/src/dev/continous-delivery.md b/book/src/dev/continous-delivery.md new file mode 100644 index 000000000..e3592a145 --- /dev/null +++ b/book/src/dev/continous-delivery.md @@ -0,0 +1,28 @@ +# Zebra Continuous Delivery + +Zebra has an extension of it's continuous integration since it automatically deploys all +code changes to a testing and/or pre-production environment after each PR gets merged +into the `main` branch, and on each Zebra `release`. + +## Triggers + +The Continuous delivery pipeline is triggered when: + +* A PR is merged to `main` (technically, a `push` event) +* A new release is published in GitHub + +## Deployments + +On each trigger Zebra is deployed using the branch or version references as part of +the deployment naming convention. Deployments are made using [Managed Instance Groups (MIGs)](https://cloud.google.com/compute/docs/instance-groups#managed_instance_groups) +from Google Cloud Platform with, 2 nodes in the us-central1 region. + +**Note**: These *MIGs* are always replaced when PRs are merged to the `main` branch and +when a release is published. If a new major version is released, a new *MIG* is also +created, keeping the previous major version running until it's no longer needed. + +A single instance can also be deployed, on an on-demand basis, if required, when a +long-lived instance, with specific changes, is needed to be tested in the Mainnet with +the same infrastructure used for CI & CD. + +Further validations of the actual process can be done on our continuous delivery [workflow file](https://github.com/ZcashFoundation/zebra/blob/main/.github/workflows/continous-delivery.yml).