zebra/.github/workflows/test-full-sync.yml

name: Full sync test

on:
  workflow_dispatch:
    inputs:
      network:
        default: 'Mainnet'
        description: 'Network to deploy: Mainnet or Testnet'
        required: true
      checkpoint_sync:
        default: 'true'
        description: 'Configures `zebrad` to use as many checkpoints as possible'
        required: true
  pull_request:
    branches:
      - main
    paths:
      # code and tests (including full sync acceptance test changes)
      # TODO: ignore changes in test code that isn't used in the full sync test
      - '**/*.rs'
      # hard-coded checkpoints
      # TODO: ignore changes to proptest seed .txt files
      - '**/*.txt'
      # dependencies
      - '**/Cargo.toml'
      - '**/Cargo.lock'
      # workflow definitions
      - 'docker/**'
      - '.github/workflows/test-full-sync.yml'
  push:
    branches:
      - main
    paths:
      # code and tests (including full sync acceptance test changes)
      # TODO: ignore changes in test code that isn't used in the full sync test
      - '**/*.rs'
      # hard-coded checkpoints
      # TODO: ignore changes to proptest seed .txt files
      - '**/*.txt'
      # dependencies
      - '**/Cargo.toml'
      - '**/Cargo.lock'
      # workflow definitions
      - 'docker/**'
      - '.github/workflows/test-full-sync.yml'

env:
  CARGO_INCREMENTAL: '1'
  ZEBRA_SKIP_IPV6_TESTS: '1'
  RUST_BACKTRACE: full
  RUST_LIB_BACKTRACE: full
  COLORBT_SHOW_HIDDEN: '1'
  NETWORK: Mainnet
  PROJECT_ID: zealous-zebra
  GAR_BASE: us-docker.pkg.dev/zealous-zebra/zebra
  GCR_BASE: gcr.io/zealous-zebra
  REGION: us-central1
  ZONE: us-central1-a
  MACHINE_TYPE: c2d-standard-16
  IMAGE_NAME: zebrad-test

jobs:
  build:
    # TODO add `startsWith(github.head_ref, 'mergify/merge-queue/')` to the condition to
    # only run on Mergify head branches, and on manual dispatch:
    # https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#running-your-workflow-based-on-the-head-or-base-branch-of-a-pull-request-1
    if: ${{ github.event_name == 'push' || github.event_name == 'workflow_dispatch' }}
    name: Build images
    timeout-minutes: 210
    runs-on: ubuntu-latest
    permissions:
      contents: 'read'
      id-token: 'write'
    steps:
      - uses: actions/checkout@v3.0.1
        with:
          persist-credentials: false

      - name: Inject slug/short variables
        uses: rlespinasse/github-slug-action@v4
        with:
          short-length: 7

      # Automatic tag management and OCI Image Format Specification for labels
      - name: Docker meta
        id: meta
        uses: docker/metadata-action@v3.7.0
        with:
          # list of Docker images to use as base name for tags
          images: |
            ${{ env.GAR_BASE }}/${{ env.IMAGE_NAME }}
            ${{ env.GCR_BASE }}/${{ env.GITHUB_REPOSITORY_SLUG_URL }}/${{ env.IMAGE_NAME }}
          # generate Docker tags based on the following events/attributes
          tags: |
            type=schedule
            type=ref,event=branch
            type=ref,event=pr
            type=semver,pattern={{version}}
            type=semver,pattern={{major}}.{{minor}}
            type=semver,pattern={{major}}
            type=sha

      # Setup Docker Buildx to allow use of docker cache layers from GH
      - name: Set up Docker Buildx
        id: buildx
        uses: docker/setup-buildx-action@v1

      - name: Authenticate to Google Cloud
        id: auth
        uses: google-github-actions/auth@v0.7.0
        with:
          workload_identity_provider: 'projects/143793276228/locations/global/workloadIdentityPools/github-actions/providers/github-oidc'
          service_account: 'github-service-account@zealous-zebra.iam.gserviceaccount.com'
          token_format: 'access_token'

      - name: Login to Google Artifact Registry
        uses: docker/login-action@v1.14.1
        with:
          registry: us-docker.pkg.dev
          username: oauth2accesstoken
          password: ${{ steps.auth.outputs.access_token }}

      - name: Login to Google Container Registry
        uses: docker/login-action@v1.14.1
        with:
          registry: gcr.io
          username: oauth2accesstoken
          password: ${{ steps.auth.outputs.access_token }}

      # Build and push image to Google Artifact Registry
      - name: Build & push
        id: docker_build
        uses: docker/build-push-action@v2.10.0
        with:
          target: tester
          context: .
          file: ./docker/Dockerfile
          tags: ${{ steps.meta.outputs.tags }}
          labels: ${{ steps.meta.outputs.labels }}
          build-args: |
            NETWORK=${{ github.event.inputs.network || env.NETWORK }}
            SHORT_SHA=${{ env.GITHUB_SHA_SHORT }}
            RUST_BACKTRACE=${{ env.RUST_BACKTRACE }}
            RUST_LIB_BACKTRACE=${{ env.RUST_LIB_BACKTRACE }}
            COLORBT_SHOW_HIDDEN=${{ env.COLORBT_SHOW_HIDDEN }}
            ZEBRA_SKIP_NETWORK_TESTS="1"
            CHECKPOINT_SYNC=${{ github.event.inputs.checkpoint_sync || true }}
            RUST_LOG=debug
            SENTRY_DSN=${{ secrets.SENTRY_ENDPOINT }}
          push: true
          cache-from: type=registry,ref=${{ env.GAR_BASE }}/${{ env.IMAGE_NAME }}:${{ env.GITHUB_REF_SLUG_URL }}-buildcache
          cache-to: type=registry,ref=${{ env.GAR_BASE }}/${{ env.IMAGE_NAME }}:${{ env.GITHUB_REF_SLUG_URL }}-buildcache,mode=max

  # Test that Zebra can run a full mainnet sync after a PR is approved
  test-full-sync:
    name: Test full Mainnet sync
    runs-on: ubuntu-latest
    needs: [build]
    permissions:
      contents: 'read'
      id-token: 'write'
    steps:
      - uses: actions/checkout@v3.0.1
        with:
          persist-credentials: false

      - name: Inject slug/short variables
        uses: rlespinasse/github-slug-action@v4
        with:
          short-length: 7

      - name: Downcase network name for disks
        run: |
          NETWORK_CAPS=${{ github.event.inputs.network || env.NETWORK }}
          echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV

      # Setup gcloud CLI
      - name: Authenticate to Google Cloud
        id: auth
        uses: google-github-actions/auth@v0.7.0
        with:
          workload_identity_provider: 'projects/143793276228/locations/global/workloadIdentityPools/github-actions/providers/github-oidc'
          service_account: 'github-service-account@zealous-zebra.iam.gserviceaccount.com'
          token_format: 'access_token'

      # Check if our destination compute instance exists and delete it
      - name: Delete existing instance with same SHA
        run: |
          INSTANCE=$(gcloud compute instances list --filter=full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} --format='value(NAME)')
          if [ -z "${INSTANCE}" ]; then
            echo "No instance to delete"
          else
            gcloud compute instances delete "${INSTANCE}" --zone "${{ env.ZONE }}" --delete-disks all --quiet
          fi

      # Creates Compute Engine virtual machine instance w/ disks
      - name: Create GCP compute instance
        run: |
          gcloud compute instances create-with-container "full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" \
          --boot-disk-size 100GB \
          --boot-disk-type pd-ssd \
          --container-image ${{ env.GAR_BASE }}/${{ env.IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \
          --container-restart-policy=never \
          --container-stdin \
          --container-tty \
          --container-env=ZEBRA_SKIP_IPV6_TESTS=1,TEST_FULL_SYNC=1,ZEBRA_FORCE_USE_COLOR=1,FULL_SYNC_MAINNET_TIMEOUT_MINUTES=600 \
          --machine-type ${{ env.MACHINE_TYPE }} \
          --scopes cloud-platform \
          --metadata=google-monitoring-enabled=true,google-logging-enabled=true \
          --tags zebrad \
          --zone "${{ env.ZONE }}"

      # TODO: this approach is very messy, but getting the just created container name is very error prone and GCP doesn't have a workaround for this without requiring a TTY
      # This TODO relates to the following issues:
      # https://github.com/actions/runner/issues/241
      # https://www.googlecloudcommunity.com/gc/Infrastructure-Compute-Storage/SSH-into-Compute-Container-not-easily-possible/td-p/170915
      #
      # Deploying a zebra container might take more than 30 seconds to completely start, so we're adding a timer at the end
      # of this step before starting the following ones
      - name: Get container name from logs
        run: |
          INSTANCE_ID=$(gcloud compute instances describe full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} --zone ${{ env.ZONE }} --format='value(id)')
          echo "INSTANCE_ID=$INSTANCE_ID" >> $GITHUB_ENV

          CONTAINER_NAME=""
          while [[ ${CONTAINER_NAME} != *"full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}"* ]]; do
              CONTAINER_NAME=$(gcloud logging read 'log_name=projects/${{ env.PROJECT_ID }}/logs/cos_system AND jsonPayload.MESSAGE:full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}' --format='value(jsonPayload.MESSAGE)' --limit=1 | grep -o '...-full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-....' | tr -d "'.")
              echo "Using container: ${CONTAINER_NAME} from instance: ${INSTANCE_ID}"
              sleep 10
          done

          echo "INSTANCE_ID=$INSTANCE_ID" >> $GITHUB_ENV
          echo "CONTAINER_NAME=$CONTAINER_NAME" >> $GITHUB_ENV
          sleep 90

      - name: Full sync
        id: full-sync
        run: |
          gcloud compute ssh \
          full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
          --zone ${{ env.ZONE }} \
          --quiet \
          --ssh-flag="-o ServerAliveInterval=5" \
          --command="docker logs --follow ${{ env.CONTAINER_NAME }}"

          EXIT_CODE=$(\
          gcloud compute ssh \
          full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
          --zone ${{ env.ZONE }} \
          --quiet \
          --ssh-flag="-o ServerAliveInterval=5" \
          --command="docker wait ${{ env.CONTAINER_NAME }}")

          exit ${EXIT_CODE}

      - name: Get state version from constants.rs
        run: |
          STATE_VERSION=""

          LOCAL_STATE_VERSION=$(grep -oE "DATABASE_FORMAT_VERSION: .* [0-9]+" $GITHUB_WORKSPACE/zebra-state/src/constants.rs | grep -oE "[0-9]+" | tail -n1)
          echo "STATE_VERSION: $LOCAL_STATE_VERSION"

          echo "STATE_VERSION=$LOCAL_STATE_VERSION" >> $GITHUB_ENV

      - name: Get sync height from logs
        run: |
          SYNC_HEIGHT=""

          while [[ ${SYNC_HEIGHT} == "" ]]; do
              SYNC_HEIGHT=$(gcloud logging read --format='value(jsonPayload.MESSAGE)' --order="desc" --limit=1 '(resource.labels.instance_id="${{ env.INSTANCE_ID }}" AND jsonPayload.message=~".+finished initial sync to chain tip.+Height\([0-9]+\).+")' | grep -oE 'Height\([0-9]+\)' | grep -oE '[0-9]+' || [[ $? == 1 ]] )
              echo "SYNC_HEIGHT: $SYNC_HEIGHT"
              sleep 10
          done

          echo "SYNC_HEIGHT=$SYNC_HEIGHT" >> $GITHUB_ENV

      # Create image from disk
      # Force the image creation as the disk is still attached, even though is not being used by the container
      - name: Create image from state disk
        run: |
          gcloud compute images create zebrad-cache-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-v${{ env.STATE_VERSION }}-${{ env.NETWORK }}-tip \
          --force \
          --source-disk=full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
          --source-disk-zone=${{ env.ZONE }} \
          --storage-location=us \
          --description="Created from commit ${{ env.GITHUB_SHA_SHORT }} with height ${{ env.SYNC_HEIGHT }}"

      - name: Delete test instance
        # Do not delete the instance if the sync timeouts in GitHub
        if: ${{ steps.full-sync.outcome == 'success' || steps.full-sync.outcome == 'failure' }}
        continue-on-error: true
        run: |
          gcloud compute instances delete "full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" --zone "${{ env.ZONE }}" --delete-disks all --quiet