From db966f27fa674af1d6203fc42caecfe4e2578657 Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Wed, 2 Mar 2022 10:15:24 -0400 Subject: [PATCH] feat(actions)!: add full sync test (#3582) * add(tests): full sync test * fix(test): add build * fix(deploy): escape double dashes '--' correctly * fix(test): remove unexpected --no-capture arg error: Found argument '--nocapture' which wasn't expected, or isn't valid in this context * refactor(docker): use default executable as entrypoint * refactor(startup): add a custom entrypoint * fix(test): add missing TEST_FULL_SYNC variable * test(timeout): use the biggest machine * fix * fix(deploy): use latest successful image * typo * refactor(docker): generate config file at startup * revert(build): changes were made to docker * fix(docker): send variables correctly to the entrypoint * test different conf file approach * fix(env): add RUN_TEST env variable * ref: use previous approach * fix(color): use environment variable * fix(resources): use our normal machine size * fix(ci): double CPU and RAM for full sync test * fix(test): check for zebrad test output in the correct order The mempool is only activated once, so we must check for that log first. After mempool activation, the stop regex is logged at least once. (It might be logged before as well, but we can't rely on that.) When checking that the mempool didn't activate, wait for the `zebrad` command to exit, then check the entire log. * fix(ci): run full sync test with full compiler optimisations * fix(tests): reintroduce tests and run full sync on approval * fix(tests): reduce the changelog Co-authored-by: teor --- .github/workflows/test.yml | 89 +++++++++++++++++++++++++++++++++++++- docker/Dockerfile | 44 ++++++++++++++----- docker/entrypoint.sh | 37 ++++++++++++++++ 3 files changed, 158 insertions(+), 12 deletions(-) create mode 100755 docker/entrypoint.sh diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 53db50a2e..df0b01802 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -19,6 +19,17 @@ on: - '**/Cargo.lock' - 'docker/**' - '.github/workflows/test.yml' + pull_request_review: + branches: + - main + paths: + - '**/*.rs' + - '**/*.txt' + - '**/Cargo.toml' + - '**/Cargo.lock' + - 'docker/**' + - '.github/workflows/test.yml' + types: [submitted] env: CARGO_INCREMENTAL: '1' @@ -29,7 +40,7 @@ env: GCR_BASE: gcr.io/${{ secrets.GCP_PROJECT_ID }} REGION: us-central1 ZONE: us-central1-a - MACHINE_TYPE: c2-standard-8 + MACHINE_TYPE: c2d-standard-16 IMAGE_NAME: zebrad-test jobs: @@ -407,3 +418,79 @@ jobs: continue-on-error: true run: | gcloud compute instances delete "zebrad-tests-${{ env.GITHUB_HEAD_REF_SLUG_URL || env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_EVENT_PULL_REQUEST_HEAD_SHA_SHORT || env.GITHUB_SHA_SHORT }}" --delete-disks all --zone "${{ env.ZONE }}" + + # Test that Zebra can run a full mainnet sync after a PR is approved + test-full-sync: + name: Test full Mainnet sync + runs-on: ubuntu-latest + needs: [ build] + if: github.event.review.state == 'approved' + steps: + - uses: actions/checkout@v2.4.0 + with: + persist-credentials: false + + - name: Inject slug/short variables + uses: rlespinasse/github-slug-action@v4 + + # Setup gcloud CLI + - name: Authenticate to Google Cloud + id: auth + uses: google-github-actions/auth@v0.5.0 + with: + credentials_json: ${{ secrets.GOOGLE_CREDENTIALS }} + + # Creates Compute Engine virtual machine instance w/ disks + - name: Create GCP compute instance + id: create-instance + run: | + gcloud compute instances create-with-container "sync-tests-${{ env.GITHUB_HEAD_REF_SLUG_URL || env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_EVENT_PULL_REQUEST_HEAD_SHA_SHORT || env.GITHUB_SHA_SHORT }}" \ + --boot-disk-size 100GB \ + --boot-disk-type pd-extreme \ + --container-image ${{ env.GAR_BASE }}/${{ env.IMAGE_NAME }}:${{ env.GITHUB_EVENT_PULL_REQUEST_HEAD_SHA_SHORT || env.GITHUB_SHA_SHORT }} \ + --container-restart-policy=never \ + --container-stdin \ + --container-tty \ + --container-env=ZEBRA_SKIP_IPV6_TESTS=1,TEST_FULL_SYNC=1,ZEBRA_FORCE_USE_COLOR=1,FULL_SYNC_MAINNET_TIMEOUT_MINUTES=600 \ + --machine-type ${{ env.MACHINE_TYPE }} \ + --scopes cloud-platform \ + --metadata=google-monitoring-enabled=true,google-logging-enabled=true \ + --tags zebrad \ + --zone "${{ env.ZONE }}" + + # TODO: this approach is very mesy, but getting the just created container name is very error prone and GCP doesn't have a workaround for this without requiring a TTY + # This TODO relates to the following issues: + # https://github.com/actions/runner/issues/241 + # https://www.googlecloudcommunity.com/gc/Infrastructure-Compute-Storage/SSH-into-Compute-Container-not-easily-possible/td-p/170915 + - name: Get container name from logs + id: get-container-name + if: steps.create-instance.outcome == 'success' + run: | + INSTANCE_ID=$(gcloud compute instances describe sync-tests-${{ env.GITHUB_HEAD_REF_SLUG_URL || env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_EVENT_PULL_REQUEST_HEAD_SHA_SHORT || env.GITHUB_SHA_SHORT }} --zone ${{ env.ZONE }} --format='value(id)') + echo "Using instance: $INSTANCE_ID" + while [[ ${CONTAINER_NAME} != *"sync-tests-${{ env.GITHUB_HEAD_REF_SLUG_URL || env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_EVENT_PULL_REQUEST_HEAD_SHA_SHORT || env.GITHUB_SHA_SHORT }}"* ]]; do + CONTAINER_NAME=$(gcloud logging read 'log_name=projects/${{ env.PROJECT_ID }}/logs/cos_system AND jsonPayload.MESSAGE:sync-tests-${{ env.GITHUB_HEAD_REF_SLUG_URL || env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_EVENT_PULL_REQUEST_HEAD_SHA_SHORT || env.GITHUB_SHA_SHORT }}' --format='value(jsonPayload.MESSAGE)' --limit=1 | grep -o '...-sync-tests-${{ env.GITHUB_HEAD_REF_SLUG_URL || env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_EVENT_PULL_REQUEST_HEAD_SHA_SHORT || env.GITHUB_SHA_SHORT }}-....' | tr -d "'.") + echo "Using container: ${CONTAINER_NAME} from instance: ${INSTANCE_ID}" + sleep 10 + done + CONTAINER_NAME=$(gcloud logging read 'log_name=projects/${{ env.PROJECT_ID }}/logs/cos_system AND jsonPayload.MESSAGE:sync-tests-${{ env.GITHUB_HEAD_REF_SLUG_URL || env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_EVENT_PULL_REQUEST_HEAD_SHA_SHORT || env.GITHUB_SHA_SHORT }}' --format='value(jsonPayload.MESSAGE)' --limit=1 | grep -o '...-sync-tests-${{ env.GITHUB_HEAD_REF_SLUG_URL || env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_EVENT_PULL_REQUEST_HEAD_SHA_SHORT || env.GITHUB_SHA_SHORT }}-....' | tr -d "'.") + echo "::set-output name=zebra_container::$CONTAINER_NAME" + + - name: Sync past mandatory checkpoint logs + id: sync-past-checkpoint + run: | + gcloud compute ssh \ + sync-tests-${{ env.GITHUB_HEAD_REF_SLUG_URL || env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_EVENT_PULL_REQUEST_HEAD_SHA_SHORT || env.GITHUB_SHA_SHORT }} \ + --zone ${{ env.ZONE }} \ + --quiet \ + --ssh-flag="-o ServerAliveInterval=5" \ + --command="docker logs --follow ${{ env.ZEBRA_CONTAINER }}" + env: + ZEBRA_CONTAINER: ${{ steps.get-container-name.outputs.zebra_container }} + + - name: Delete test instance + # Do not delete the instance if the sync timeouts in GitHub + if: ${{ steps.sync-past-checkpoint.outcome == 'success' }} || ${{ steps.sync-past-checkpoint.outcome == 'failure' }} + continue-on-error: true + run: | + gcloud compute instances delete "sync-tests-${{ env.GITHUB_HEAD_REF_SLUG_URL || env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_EVENT_PULL_REQUEST_HEAD_SHA_SHORT || env.GITHUB_SHA_SHORT }}" --delete-disks all --zone "${{ env.ZONE }}" diff --git a/docker/Dockerfile b/docker/Dockerfile index 6eb413b64..359a924d7 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -40,13 +40,30 @@ RUN apt-get -qq update && \ # Optimize builds. In particular, regenerate-stateful-test-disks.yml was reaching the # GitHub Actions time limit (6 hours), so we needed to make it faster. -ENV RUSTFLAGS -O +# +# TODO: apply this optimisation level to all release builds in .cargo/config.toml +ENV RUSTFLAGS "-C opt-level=3" + ENV CARGO_HOME /app/.cargo/ # Build dependencies - this is the caching Docker layer! RUN cargo chef cook --release --features enable-sentry --recipe-path recipe.json -ARG RUST_BACKTRACE=1 -ENV RUST_BACKTRACE ${RUST_BACKTRACE} +ARG RUST_BACKTRACE +ENV RUST_BACKTRACE ${RUST_BACKTRACE:-1} + +# Skip IPv6 tests by default, as some CI environment don't have IPv6 available +ARG ZEBRA_SKIP_IPV6_TESTS +ENV ZEBRA_SKIP_IPV6_TESTS ${ZEBRA_SKIP_IPV6_TESTS:-1} + +# Use default checkpoint sync and network values if none is provided +ARG CHECKPOINT_SYNC +ENV CHECKPOINT_SYNC ${CHECKPOINT_SYNC:-true} + +ARG NETWORK +ENV NETWORK ${NETWORK:-Mainnet} + +ARG TEST_FULL_SYNC +ENV TEST_FULL_SYNC ${TEST_FULL_SYNC:-1} COPY . . # Build zebra @@ -58,13 +75,21 @@ FROM builder AS tester COPY --from=us-docker.pkg.dev/zealous-zebra/zebra/zcash-params /root/.zcash-params /root/.zcash-params COPY --from=us-docker.pkg.dev/zealous-zebra/zebra/lightwalletd /lightwalletd /usr/local/bin -# Skip IPv6 tests by default, as some CI environment don't have IPv6 available -ARG ZEBRA_SKIP_IPV6_TESTS=1 -ENV ZEBRA_SKIP_IPV6_TESTS ${ZEBRA_SKIP_IPV6_TESTS} - RUN cargo test --locked --release --features enable-sentry --workspace --no-run -CMD ["cargo" "test" "--locked" "--release" "--features" "enable-sentry" "--workspace"] +COPY ./docker/entrypoint.sh / +RUN chmod u+x /entrypoint.sh + +ARG CHECKPOINT_SYNC=true +ARG NETWORK=Mainnet +ARG TEST_FULL_SYNC +ENV TEST_FULL_SYNC ${TEST_FULL_SYNC:-1} + +ARG RUN_TESTS +ENV RUN_TESTS ${RUN_TESTS:-1} + +ENTRYPOINT ["/entrypoint.sh"] +CMD [ "cargo"] # Runner image FROM debian:bullseye-slim AS runtime @@ -75,9 +100,6 @@ RUN apt-get update && \ apt-get install -y --no-install-recommends \ ca-certificates -ARG CHECKPOINT_SYNC=true -ARG NETWORK=Mainnet - RUN set -ex; \ { \ echo "[consensus]"; \ diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh new file mode 100755 index 000000000..c668a6b16 --- /dev/null +++ b/docker/entrypoint.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +set -x + +if [ ! -f /app/zebrad.toml ]; then +echo " +[consensus] +checkpoint_sync = ${CHECKPOINT_SYNC} +[metrics] +endpoint_addr = 0.0.0.0:9999 +[network] +network = ${NETWORK} +[state] +cache_dir = /zebrad-cache +[tracing] +force_use_color = true +endpoint_addr = 0.0.0.0:3000" > /app/zebrad.toml +fi + +case "$1" in + -- | cargo) + if [[ "$RUN_TESTS" -eq "1" ]]; then + if [[ "$TEST_FULL_SYNC" -eq "1" ]]; then + exec cargo "test" "--locked" "--release" "--features" "enable-sentry" "--test" "acceptance" "--" "--nocapture" "--ignored" "full_sync_mainnet" + else + exec cargo "test" "--locked" "--release" "--features" "enable-sentry" "--workspace" "--" "--include-ignored" + fi + fi + ;; + zebrad) + exec zebrad "$@" + ;; + *) + exec "$@" +esac + +exit 1 \ No newline at end of file