zebra/.github/workflows/test-full-sync.yml

294 lines
12 KiB
YAML

name: Full sync test
on:
workflow_dispatch:
inputs:
network:
default: 'Mainnet'
description: 'Network to deploy: Mainnet or Testnet'
required: true
checkpoint_sync:
default: 'true'
description: 'Configures `zebrad` to use as many checkpoints as possible'
required: true
pull_request:
branches:
- main
paths:
# code and tests (including full sync acceptance test changes)
# TODO: ignore changes in test code that isn't used in the full sync test
- '**/*.rs'
# hard-coded checkpoints
# TODO: ignore changes to proptest seed .txt files
- '**/*.txt'
# dependencies
- '**/Cargo.toml'
- '**/Cargo.lock'
# workflow definitions
- 'docker/**'
- '.github/workflows/test-full-sync.yml'
push:
branches:
- main
paths:
# code and tests (including full sync acceptance test changes)
# TODO: ignore changes in test code that isn't used in the full sync test
- '**/*.rs'
# hard-coded checkpoints
# TODO: ignore changes to proptest seed .txt files
- '**/*.txt'
# dependencies
- '**/Cargo.toml'
- '**/Cargo.lock'
# workflow definitions
- 'docker/**'
- '.github/workflows/test-full-sync.yml'
env:
CARGO_INCREMENTAL: '1'
ZEBRA_SKIP_IPV6_TESTS: '1'
RUST_BACKTRACE: full
RUST_LIB_BACKTRACE: full
COLORBT_SHOW_HIDDEN: '1'
NETWORK: Mainnet
PROJECT_ID: zealous-zebra
GAR_BASE: us-docker.pkg.dev/zealous-zebra/zebra
GCR_BASE: gcr.io/zealous-zebra
REGION: us-central1
ZONE: us-central1-a
MACHINE_TYPE: c2d-standard-16
IMAGE_NAME: zebrad-test
jobs:
build:
# TODO add `startsWith(github.head_ref, 'mergify/merge-queue/')` to the condition to
# only run on Mergify head branches, and on manual dispatch:
# https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#running-your-workflow-based-on-the-head-or-base-branch-of-a-pull-request-1
if: ${{ github.event_name == 'push' || github.event_name == 'workflow_dispatch' }}
name: Build images
timeout-minutes: 210
runs-on: ubuntu-latest
permissions:
contents: 'read'
id-token: 'write'
steps:
- uses: actions/checkout@v3.0.2
with:
persist-credentials: false
- name: Inject slug/short variables
uses: rlespinasse/github-slug-action@v4
with:
short-length: 7
# Automatic tag management and OCI Image Format Specification for labels
- name: Docker meta
id: meta
uses: docker/metadata-action@v3.7.0
with:
# list of Docker images to use as base name for tags
images: |
${{ env.GAR_BASE }}/${{ env.IMAGE_NAME }}
${{ env.GCR_BASE }}/${{ env.GITHUB_REPOSITORY_SLUG_URL }}/${{ env.IMAGE_NAME }}
# generate Docker tags based on the following events/attributes
tags: |
type=schedule
type=ref,event=branch
type=ref,event=pr
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
type=semver,pattern={{major}}
type=sha
# Setup Docker Buildx to allow use of docker cache layers from GH
- name: Set up Docker Buildx
id: buildx
uses: docker/setup-buildx-action@v1
- name: Authenticate to Google Cloud
id: auth
uses: google-github-actions/auth@v0.7.0
with:
workload_identity_provider: 'projects/143793276228/locations/global/workloadIdentityPools/github-actions/providers/github-oidc'
service_account: 'github-service-account@zealous-zebra.iam.gserviceaccount.com'
token_format: 'access_token'
- name: Login to Google Artifact Registry
uses: docker/login-action@v1.14.1
with:
registry: us-docker.pkg.dev
username: oauth2accesstoken
password: ${{ steps.auth.outputs.access_token }}
- name: Login to Google Container Registry
uses: docker/login-action@v1.14.1
with:
registry: gcr.io
username: oauth2accesstoken
password: ${{ steps.auth.outputs.access_token }}
# Build and push image to Google Artifact Registry
- name: Build & push
id: docker_build
uses: docker/build-push-action@v2.10.0
with:
target: tester
context: .
file: ./docker/Dockerfile
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
build-args: |
NETWORK=${{ github.event.inputs.network || env.NETWORK }}
SHORT_SHA=${{ env.GITHUB_SHA_SHORT }}
RUST_BACKTRACE=${{ env.RUST_BACKTRACE }}
RUST_LIB_BACKTRACE=${{ env.RUST_LIB_BACKTRACE }}
COLORBT_SHOW_HIDDEN=${{ env.COLORBT_SHOW_HIDDEN }}
ZEBRA_SKIP_NETWORK_TESTS="1"
CHECKPOINT_SYNC=${{ github.event.inputs.checkpoint_sync || true }}
RUST_LOG=debug
SENTRY_DSN=${{ secrets.SENTRY_ENDPOINT }}
push: true
cache-from: type=registry,ref=${{ env.GAR_BASE }}/${{ env.IMAGE_NAME }}:${{ env.GITHUB_REF_SLUG_URL }}-buildcache
cache-to: type=registry,ref=${{ env.GAR_BASE }}/${{ env.IMAGE_NAME }}:${{ env.GITHUB_REF_SLUG_URL }}-buildcache,mode=max
# Test that Zebra can run a full mainnet sync after a PR is approved
test-full-sync:
name: Test full Mainnet sync
runs-on: ubuntu-latest
needs: [build]
permissions:
contents: 'read'
id-token: 'write'
steps:
- uses: actions/checkout@v3.0.2
with:
persist-credentials: false
- name: Inject slug/short variables
uses: rlespinasse/github-slug-action@v4
with:
short-length: 7
- name: Downcase network name for disks
run: |
NETWORK_CAPS=${{ github.event.inputs.network || env.NETWORK }}
echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV
# Setup gcloud CLI
- name: Authenticate to Google Cloud
id: auth
uses: google-github-actions/auth@v0.7.0
with:
workload_identity_provider: 'projects/143793276228/locations/global/workloadIdentityPools/github-actions/providers/github-oidc'
service_account: 'github-service-account@zealous-zebra.iam.gserviceaccount.com'
token_format: 'access_token'
# Check if our destination compute instance exists and delete it
- name: Delete existing instance with same SHA
run: |
INSTANCE=$(gcloud compute instances list --filter=full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} --format='value(NAME)')
if [ -z "${INSTANCE}" ]; then
echo "No instance to delete"
else
gcloud compute instances delete "${INSTANCE}" --zone "${{ env.ZONE }}" --delete-disks all --quiet
fi
# Creates Compute Engine virtual machine instance w/ disks
- name: Create GCP compute instance
run: |
gcloud compute instances create-with-container "full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" \
--boot-disk-size 100GB \
--boot-disk-type pd-ssd \
--container-image ${{ env.GAR_BASE }}/${{ env.IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \
--container-restart-policy=never \
--container-stdin \
--container-tty \
--container-env=ZEBRA_SKIP_IPV6_TESTS=1,TEST_FULL_SYNC=1,ZEBRA_FORCE_USE_COLOR=1,FULL_SYNC_MAINNET_TIMEOUT_MINUTES=600 \
--machine-type ${{ env.MACHINE_TYPE }} \
--scopes cloud-platform \
--metadata=google-monitoring-enabled=true,google-logging-enabled=true \
--tags zebrad \
--zone "${{ env.ZONE }}"
# TODO: this approach is very messy, but getting the just created container name is very error prone and GCP doesn't have a workaround for this without requiring a TTY
# This TODO relates to the following issues:
# https://github.com/actions/runner/issues/241
# https://www.googlecloudcommunity.com/gc/Infrastructure-Compute-Storage/SSH-into-Compute-Container-not-easily-possible/td-p/170915
#
# Deploying a zebra container might take more than 30 seconds to completely start, so we're adding a timer at the end
# of this step before starting the following ones
- name: Get container name from logs
run: |
INSTANCE_ID=$(gcloud compute instances describe full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} --zone ${{ env.ZONE }} --format='value(id)')
echo "INSTANCE_ID=$INSTANCE_ID" >> $GITHUB_ENV
CONTAINER_NAME=""
while [[ ${CONTAINER_NAME} != *"full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}"* ]]; do
CONTAINER_NAME=$(gcloud logging read 'log_name=projects/${{ env.PROJECT_ID }}/logs/cos_system AND jsonPayload.MESSAGE:full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}' --format='value(jsonPayload.MESSAGE)' --limit=1 | grep -o '...-full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-....' | tr -d "'.")
echo "Using container: ${CONTAINER_NAME} from instance: ${INSTANCE_ID}"
sleep 10
done
echo "INSTANCE_ID=$INSTANCE_ID" >> $GITHUB_ENV
echo "CONTAINER_NAME=$CONTAINER_NAME" >> $GITHUB_ENV
sleep 90
- name: Full sync
id: full-sync
run: |
gcloud compute ssh \
full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
--zone ${{ env.ZONE }} \
--quiet \
--ssh-flag="-o ServerAliveInterval=5" \
--command="docker logs --follow ${{ env.CONTAINER_NAME }}"
EXIT_CODE=$(\
gcloud compute ssh \
full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
--zone ${{ env.ZONE }} \
--quiet \
--ssh-flag="-o ServerAliveInterval=5" \
--command="docker wait ${{ env.CONTAINER_NAME }}")
exit ${EXIT_CODE}
- name: Get state version from constants.rs
run: |
STATE_VERSION=""
LOCAL_STATE_VERSION=$(grep -oE "DATABASE_FORMAT_VERSION: .* [0-9]+" $GITHUB_WORKSPACE/zebra-state/src/constants.rs | grep -oE "[0-9]+" | tail -n1)
echo "STATE_VERSION: $LOCAL_STATE_VERSION"
echo "STATE_VERSION=$LOCAL_STATE_VERSION" >> $GITHUB_ENV
- name: Get sync height from logs
run: |
SYNC_HEIGHT=""
while [[ ${SYNC_HEIGHT} == "" ]]; do
SYNC_HEIGHT=$(gcloud logging read --format='value(jsonPayload.MESSAGE)' --order="desc" --limit=1 '(resource.labels.instance_id="${{ env.INSTANCE_ID }}" AND jsonPayload.message=~".+finished initial sync to chain tip.+Height\([0-9]+\).+")' | grep -oE 'Height\([0-9]+\)' | grep -oE '[0-9]+' || [[ $? == 1 ]] )
echo "SYNC_HEIGHT: $SYNC_HEIGHT"
sleep 10
done
echo "SYNC_HEIGHT=$SYNC_HEIGHT" >> $GITHUB_ENV
# Create image from disk
# Force the image creation as the disk is still attached, even though is not being used by the container
- name: Create image from state disk
run: |
gcloud compute images create zebrad-cache-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-v${{ env.STATE_VERSION }}-${{ env.NETWORK }}-tip \
--force \
--source-disk=full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
--source-disk-zone=${{ env.ZONE }} \
--storage-location=us \
--description="Created from commit ${{ env.GITHUB_SHA_SHORT }} with height ${{ env.SYNC_HEIGHT }}"
- name: Delete test instance
# Do not delete the instance if the sync timeouts in GitHub
if: ${{ steps.full-sync.outcome == 'success' || steps.full-sync.outcome == 'failure' }}
continue-on-error: true
run: |
gcloud compute instances delete "full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" --zone "${{ env.ZONE }}" --delete-disks all --quiet