Merge branch 'main' into docker-refactor

This commit is contained in:
Marek 2024-10-18 11:09:26 +02:00
commit afeb05f50b
7 changed files with 142 additions and 103 deletions

View File

@ -1,6 +1,6 @@
# Google Cloud node deployments and tests that run when Rust code or dependencies are modified,
# but only on PRs from the ZcashFoundation/zebra repository.
# (External PRs are tested/deployed by mergify.)
# (External PRs are tested/deployed by mergify.)
#
# 1. `versioning`: Extracts the major version from the release semver. Useful for segregating instances based on major versions.
# 2. `build`: Builds a Docker image named `zebrad` with the necessary tags derived from Git.
@ -30,58 +30,80 @@ on:
workflow_dispatch:
inputs:
network:
default: 'Mainnet'
description: 'Network to deploy: Mainnet or Testnet'
default: Mainnet
description: "Network to deploy: Mainnet or Testnet"
required: true
log_file:
default: ''
description: 'Log to a file path rather than standard output'
type: choice
options:
- Mainnet
- Testnet
cached_disk_type:
default: tip
description: "Type of cached disk to use"
required: true
type: choice
options:
- tip
- checkpoint
prefer_main_cached_state:
default: false
description: "Prefer cached state from the main branch"
required: false
type: boolean
no_cached_disk:
default: false
description: "Do not use a cached state disk"
required: false
type: boolean
no_cache:
description: 'Disable the Docker cache for this build'
description: "Disable the Docker cache for this build"
required: false
type: boolean
default: false
log_file:
default: ""
description: "Log to a file path rather than standard output"
push:
# Skip main branch updates where Rust code and dependencies aren't modified.
branches:
- main
paths:
# code and tests
- '**/*.rs'
# hard-coded checkpoints and proptest regressions
- '**/*.txt'
# dependencies
- '**/Cargo.toml'
- '**/Cargo.lock'
# configuration files
- '.cargo/config.toml'
- '**/clippy.toml'
# workflow definitions
- 'docker/**'
- '.dockerignore'
- '.github/workflows/cd-deploy-nodes-gcp.yml'
- '.github/workflows/sub-build-docker-image.yml'
# Skip main branch updates where Rust code and dependencies aren't modified.
branches:
- main
paths:
# code and tests
- "**/*.rs"
# hard-coded checkpoints and proptest regressions
- "**/*.txt"
# dependencies
- "**/Cargo.toml"
- "**/Cargo.lock"
# configuration files
- ".cargo/config.toml"
- "**/clippy.toml"
# workflow definitions
- "docker/**"
- ".dockerignore"
- ".github/workflows/cd-deploy-nodes-gcp.yml"
- ".github/workflows/sub-build-docker-image.yml"
# Only runs the Docker image tests, doesn't deploy any instances
pull_request:
# Skip PRs where Rust code and dependencies aren't modified.
paths:
# code and tests
- '**/*.rs'
- "**/*.rs"
# hard-coded checkpoints and proptest regressions
- '**/*.txt'
- "**/*.txt"
# dependencies
- '**/Cargo.toml'
- '**/Cargo.lock'
- "**/Cargo.toml"
- "**/Cargo.lock"
# configuration files
- '.cargo/config.toml'
- '**/clippy.toml'
- ".cargo/config.toml"
- "**/clippy.toml"
# workflow definitions
- 'docker/**'
- '.dockerignore'
- '.github/workflows/cd-deploy-nodes-gcp.yml'
- '.github/workflows/sub-build-docker-image.yml'
- "docker/**"
- ".dockerignore"
- ".github/workflows/cd-deploy-nodes-gcp.yml"
- ".github/workflows/sub-build-docker-image.yml"
release:
types:
@ -144,11 +166,11 @@ jobs:
needs: build
uses: ./.github/workflows/sub-test-zebra-config.yml
with:
test_id: 'default-conf'
test_id: "default-conf"
docker_image: ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }}
grep_patterns: '-e "net.*=.*Main.*estimated progress to chain tip.*BeforeOverwinter"'
test_variables: '-e NETWORK'
network: 'Mainnet'
test_variables: "-e NETWORK"
network: "Mainnet"
# Test reconfiguring the docker image for testnet.
test-configuration-file-testnet:
@ -157,11 +179,11 @@ jobs:
# Make sure Zebra can sync the genesis block on testnet
uses: ./.github/workflows/sub-test-zebra-config.yml
with:
test_id: 'testnet-conf'
test_id: "testnet-conf"
docker_image: ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }}
grep_patterns: '-e "net.*=.*Test.*estimated progress to chain tip.*Genesis" -e "net.*=.*Test.*estimated progress to chain tip.*BeforeOverwinter"'
test_variables: '-e NETWORK'
network: 'Testnet'
test_variables: "-e NETWORK"
network: "Testnet"
# Finds a `tip` cached state disk for zebra from the main branch
#
@ -170,11 +192,12 @@ jobs:
get-disk-name:
name: Get disk name
uses: ./.github/workflows/sub-find-cached-disks.yml
if: ${{ !inputs.no_cached_disk }}
with:
network: ${{ inputs.network || vars.ZCASH_NETWORK }}
disk_prefix: zebrad-cache
disk_suffix: tip
prefer_main_cached_state: true
disk_suffix: ${{ inputs.cached_disk_type || 'tip' }}
prefer_main_cached_state: ${{ inputs.prefer_main_cached_state || (github.event_name == 'push' && github.ref_name == 'main' && true) || false }}
# Deploy Managed Instance Groups (MiGs) for Mainnet and Testnet,
# with one node in the configured GCP region.
@ -196,14 +219,21 @@ jobs:
matrix:
network: [Mainnet, Testnet]
name: Deploy ${{ matrix.network }} nodes
needs: [ build, versioning, test-configuration-file, test-zebra-conf-path, get-disk-name ]
needs:
[
build,
versioning,
test-configuration-file,
test-zebra-conf-path,
get-disk-name,
]
runs-on: ubuntu-latest
timeout-minutes: 60
env:
CACHED_DISK_NAME: ${{ needs.get-disk-name.outputs.cached_disk_name }}
permissions:
contents: 'read'
id-token: 'write'
contents: "read"
id-token: "write"
if: ${{ !cancelled() && !failure() && ((github.event_name == 'push' && github.ref_name == 'main') || github.event_name == 'release') }}
steps:
@ -232,20 +262,20 @@ jobs:
id: auth
uses: google-github-actions/auth@v2.1.6
with:
workload_identity_provider: '${{ vars.GCP_WIF }}'
service_account: '${{ vars.GCP_DEPLOYMENTS_SA }}'
workload_identity_provider: "${{ vars.GCP_WIF }}"
service_account: "${{ vars.GCP_DEPLOYMENTS_SA }}"
- name: Set up Cloud SDK
uses: google-github-actions/setup-gcloud@v2.1.1
# TODO we should implement the fixes from https://github.com/ZcashFoundation/zebra/pull/5670 here
# but the implementation is failing as it's requiring the disk names, contrary to what is stated in the official documentation
- name: Create instance template for ${{ matrix.network }}
run: |
NAME="zebrad-cache-${{ env.GITHUB_HEAD_REF_SLUG_URL || env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-${NETWORK}"
DISK_PARAMS="name=${NAME},device-name=${NAME},size=400GB,type=pd-ssd"
DISK_NAME="zebrad-cache-${{ env.GITHUB_HEAD_REF_SLUG_URL || env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-${NETWORK}"
DISK_PARAMS="name=${DISK_NAME},device-name=${DISK_NAME},size=400GB,type=pd-ssd"
if [ -n "${{ env.CACHED_DISK_NAME }}" ]; then
DISK_PARAMS+=",image=${{ env.CACHED_DISK_NAME }}"
elif [ ${{ inputs.no_cached_disk && github.event_name == 'workflow_dispatch' }} ]; then
echo "No cached disk required"
else
echo "No cached disk found for ${{ matrix.network }} in main branch"
exit 1
@ -258,7 +288,7 @@ jobs:
--image-family=cos-stable \
--network-interface=subnet=${{ vars.GCP_SUBNETWORK }} \
--create-disk="${DISK_PARAMS}" \
--container-mount-disk=mount-path='/var/cache/zebrad-cache',name=${NAME},mode=rw \
--container-mount-disk=mount-path='/var/cache/zebrad-cache',name=${DISK_NAME},mode=rw \
--container-stdin \
--container-tty \
--container-image ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} \
@ -306,15 +336,16 @@ jobs:
# Note: this instances are not automatically replaced or deleted
deploy-instance:
name: Deploy single ${{ inputs.network }} instance
needs: [ build, test-configuration-file, test-zebra-conf-path, get-disk-name ]
needs: [build, test-configuration-file, test-zebra-conf-path, get-disk-name]
runs-on: ubuntu-latest
timeout-minutes: 30
env:
CACHED_DISK_NAME: ${{ needs.get-disk-name.outputs.cached_disk_name }}
permissions:
contents: 'read'
id-token: 'write'
if: github.event_name == 'workflow_dispatch'
contents: "read"
id-token: "write"
# Run even if we don't need a cached disk, but only when triggered by a workflow_dispatch
if: ${{ !failure() && github.event_name == 'workflow_dispatch' }}
steps:
- uses: actions/checkout@v4.2.1
@ -342,8 +373,8 @@ jobs:
id: auth
uses: google-github-actions/auth@v2.1.6
with:
workload_identity_provider: '${{ vars.GCP_WIF }}'
service_account: '${{ vars.GCP_DEPLOYMENTS_SA }}'
workload_identity_provider: "${{ vars.GCP_WIF }}"
service_account: "${{ vars.GCP_DEPLOYMENTS_SA }}"
- name: Set up Cloud SDK
uses: google-github-actions/setup-gcloud@v2.1.1
@ -351,10 +382,12 @@ jobs:
# Create instance template from container image
- name: Manual deploy of a single ${{ inputs.network }} instance running zebrad
run: |
NAME="zebrad-cache-${{ env.GITHUB_HEAD_REF_SLUG_URL || env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-${NETWORK}"
DISK_PARAMS="name=${NAME},device-name=${NAME},size=400GB,type=pd-ssd"
DISK_NAME="zebrad-cache-${{ env.GITHUB_HEAD_REF_SLUG_URL || env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-${NETWORK}"
DISK_PARAMS="name=${DISK_NAME},device-name=${DISK_NAME},size=400GB,type=pd-ssd"
if [ -n "${{ env.CACHED_DISK_NAME }}" ]; then
DISK_PARAMS+=",image=${{ env.CACHED_DISK_NAME }}"
elif [ ${{ inputs.no_cached_disk && github.event_name == 'workflow_dispatch' }} ]; then
echo "No cached disk required"
else
echo "No cached disk found for ${{ matrix.network }} in main branch"
exit 1
@ -367,7 +400,7 @@ jobs:
--image-family=cos-stable \
--network-interface=subnet=${{ vars.GCP_SUBNETWORK }} \
--create-disk="${DISK_PARAMS}" \
--container-mount-disk=mount-path='/var/cache/zebrad-cache',name=${NAME},mode=rw \
--container-mount-disk=mount-path='/var/cache/zebrad-cache',name=${DISK_NAME},mode=rw \
--container-stdin \
--container-tty \
--container-image ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} \
@ -382,7 +415,7 @@ jobs:
failure-issue:
name: Open or update issues for release failures
# When a new job is added to this workflow, add it to this list.
needs: [ versioning, build, deploy-nodes, deploy-instance ]
needs: [versioning, build, deploy-nodes, deploy-instance]
# Only open tickets for failed or cancelled jobs that are not coming from PRs.
# (PR statuses are already reported in the PR jobs list, and checked by Mergify.)
if: (failure() && github.event.pull_request == null) || (cancelled() && github.event.pull_request == null)

View File

@ -93,7 +93,7 @@ jobs:
run: |
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain=stable --profile=default
- uses: Swatinem/rust-cache@v2.7.3
- uses: Swatinem/rust-cache@v2.7.5
with:
shared-key: "clippy-cargo-lock"
@ -138,7 +138,7 @@ jobs:
# We don't cache `fmt` outputs because the job is quick,
# and we want to use the limited GitHub actions cache space for slower jobs.
#- uses: Swatinem/rust-cache@v2.7.3
#- uses: Swatinem/rust-cache@v2.7.5
- run: |
cargo fmt --all -- --check

View File

@ -112,7 +112,7 @@ jobs:
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain=${{ matrix.rust }} --profile=minimal
- uses: Swatinem/rust-cache@v2.7.3
- uses: Swatinem/rust-cache@v2.7.5
# TODO: change Rust cache target directory on Windows,
# or remove this workaround once the build is more efficient (#3005).
#with:
@ -221,7 +221,7 @@ jobs:
run: |
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain=stable --profile=minimal
- uses: Swatinem/rust-cache@v2.7.3
- uses: Swatinem/rust-cache@v2.7.5
with:
shared-key: "clippy-cargo-lock"

View File

@ -155,7 +155,7 @@ jobs:
run: |
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain=beta --profile=default
- uses: Swatinem/rust-cache@v2.7.3
- uses: Swatinem/rust-cache@v2.7.5
- name: Build internal docs
run: |

View File

@ -3,9 +3,9 @@
# This script finds a cached Google Cloud Compute image based on specific criteria.
#
# If there are multiple disks:
# - prefer images generated from the same commit, then
# - if prefer_main_cached_state is true, prefer images from the `main` branch, then
# - use any images from any other branch or commit.
# - if `PREFER_MAIN_CACHED_STATE` is "true", then select an image from the `main` branch, else
# - try to find a cached disk image from the current branch (or PR), else
# - try to find an image from any branch.
#
# Within each of these categories:
# - prefer newer images to older images
@ -20,7 +20,7 @@ echo "Extracting local state version..."
LOCAL_STATE_VERSION=$(grep -oE "DATABASE_FORMAT_VERSION: .* [0-9]+" "${GITHUB_WORKSPACE}/zebra-state/src/constants.rs" | grep -oE "[0-9]+" | tail -n1)
echo "STATE_VERSION: ${LOCAL_STATE_VERSION}"
# Function to find a cached disk image based on the git pattern (commit, main, or any branch)
# Function to find a cached disk image based on the git pattern (branch, main, or any branch)
find_cached_disk_image() {
local git_pattern="${1}"
local git_source="${2}"
@ -34,40 +34,36 @@ find_cached_disk_image() {
echo "Found ${git_source} Disk: ${disk_name}" >&2
disk_description=$(gcloud compute images describe "${disk_name}" --format="value(DESCRIPTION)")
echo "Description: ${disk_description}" >&2
echo "${disk_name}" # This is the actual return value when a disk is found
echo "${disk_name}" # This is the actual return value when a disk is found
else
echo "No ${git_source} disk found." >&2
echo "No ${git_source} disk found with '${disk_search_pattern}' pattern." >&2
fi
}
# Check if both $DISK_PREFIX and $DISK_SUFFIX are set, as they are required to find a cached disk image
# Check if both $DISK_PREFIX and $DISK_SUFFIX are set, as they are required to
# find a cached disk image.
if [[ -n "${DISK_PREFIX}" && -n "${DISK_SUFFIX}" ]]; then
# Find the most suitable cached disk image
echo "Finding the most suitable cached disk image..."
echo "Finding a ${DISK_PREFIX}-${DISK_SUFFIX} disk image for ${NETWORK}..."
CACHED_DISK_NAME=""
# First, try to find a cached disk image from the current commit
CACHED_DISK_NAME=$(find_cached_disk_image ".+-${GITHUB_SHA_SHORT}" "commit")
# If no cached disk image is found
if [[ -z "${CACHED_DISK_NAME}" ]]; then
# Check if main branch images are preferred
if [[ "${PREFER_MAIN_CACHED_STATE}" == "true" ]]; then
CACHED_DISK_NAME=$(find_cached_disk_image "main-[0-9a-f]+" "main branch")
# Else, try to find one from any branch
else
CACHED_DISK_NAME=$(find_cached_disk_image ".+-[0-9a-f]+" "any branch")
fi
# Try to find an image based on the `main` branch if that branch is preferred.
if [[ "${PREFER_MAIN_CACHED_STATE}" == "true" ]]; then
CACHED_DISK_NAME=$(find_cached_disk_image "main-[0-9a-f]+" "main branch")
fi
# If no image was found, try to find one from the current branch (or PR).
CACHED_DISK_NAME=${CACHED_DISK_NAME:-$(find_cached_disk_image ".+-${GITHUB_REF}" "branch")}
# If we still have no image, try to find one from any branch.
CACHED_DISK_NAME=${CACHED_DISK_NAME:-$(find_cached_disk_image ".+-[0-9a-f]+" "any branch")}
# Handle case where no suitable disk image is found
# Handle the case where no suitable disk image is found
if [[ -z "${CACHED_DISK_NAME}" ]]; then
echo "No suitable cached state disk available."
echo "Cached state test jobs must depend on the cached state rebuild job."
echo "No suitable cached state disk available. Try running the cached state rebuild job."
exit 1
else
echo "Selected Disk: ${CACHED_DISK_NAME}"
fi
echo "Selected Disk: ${CACHED_DISK_NAME}"
else
echo "DISK_PREFIX or DISK_SUFFIX is not set. Skipping disk image search."
fi
@ -77,7 +73,6 @@ find_available_disk_type() {
local base_name="${1}"
local disk_type="${2}"
local disk_pattern="${base_name}-cache"
local output_var="${base_name}_${disk_type}_disk"
local disk_name
disk_name=$(gcloud compute images list --filter="status=READY AND name~${disk_pattern}-.+-[0-9a-f]+-v${LOCAL_STATE_VERSION}-${NETWORK}-${disk_type}" --format="value(NAME)" --sort-by=~creationTimestamp --limit=1)
@ -87,10 +82,10 @@ find_available_disk_type() {
echo "Found ${disk_type^^} disk: ${disk_name} for ${base_name^^} on network: ${NETWORK}" >&2
disk_description=$(gcloud compute images describe "${disk_name}" --format="value(DESCRIPTION)")
echo "Description: ${disk_description}" >&2
echo "true" # This is the actual return value when a disk is found
echo "true" # This is the actual return value when a disk is found
else
echo "No ${disk_type^^} disk found for ${base_name^^} on network: ${NETWORK}" >&2
echo "false" # This is the actual return value when no disk is found
echo "false" # This is the actual return value when no disk is found
fi
}
if [[ -n "${NETWORK}" ]]; then

View File

@ -654,6 +654,7 @@ jobs:
# (This is unlikely, because each image created by a workflow has a different name.)
#
# The image name must also be 63 characters or less.
# More info: https://cloud.google.com/compute/docs/naming-resources#resource-name-format
#
# Force the image creation (--force) as the disk is still attached even though is not being
# used by the container.

View File

@ -74,20 +74,30 @@ jobs:
- name: Set up Cloud SDK
uses: google-github-actions/setup-gcloud@v2.1.1
# Disk images in GCP are required to be in lowercase, but the blockchain network
# uses sentence case, so we need to downcase ${{ inputs.network }}
# Performs formatting on disk name components.
#
# Passes a lowercase Network name to subsequent steps using $NETWORK env variable
- name: Downcase network name for disks
# Disk images in GCP are required to be in lowercase, but the blockchain network
# uses sentence case, so we need to downcase ${{ inputs.network }}.
#
# Disk image names in GCP are limited to 63 characters, so we need to limit
# branch names to 12 characters.
# Check the `create-state-image` in `sub-deploy-integration-tests-gcp.yml` for more details in image names.
# More info: https://cloud.google.com/compute/docs/naming-resources#resource-name-format
#
# Passes ${{ inputs.network }} to subsequent steps using $NETWORK env variable.
# Passes ${{ env.GITHUB_REF_SLUG_URL }} to subsequent steps using $SHORT_GITHUB_REF env variable.
- name: Format network name and branch name for disks
run: |
NETWORK_CAPS=${{ inputs.network }}
echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV
NETWORK_CAPS="${{ inputs.network }}"
echo "NETWORK=${NETWORK_CAPS,,}" >> "$GITHUB_ENV"
LONG_GITHUB_REF="${{ env.GITHUB_REF_SLUG_URL }}"
echo "SHORT_GITHUB_REF=${LONG_GITHUB_REF:0:12}" >> "$GITHUB_ENV"
# Check if there are cached state disks available for subsequent jobs to use.
- name: Check if cached state disks exists
id: get-available-disks
env:
GITHUB_SHA_SHORT: ${{ env.GITHUB_SHA_SHORT }}
GITHUB_REF: ${{ env.SHORT_GITHUB_REF }}
NETWORK: ${{ env.NETWORK }} # use lowercase version from env, not input
DISK_PREFIX: ${{ inputs.disk_prefix }}
DISK_SUFFIX: ${{ inputs.disk_suffix }}