1179 lines
51 KiB
YAML
1179 lines
51 KiB
YAML
name: Deploy GCP tests
|
|
|
|
on:
|
|
workflow_call:
|
|
inputs:
|
|
# Status and logging
|
|
test_id:
|
|
required: true
|
|
type: string
|
|
description: 'Unique identifier for the test'
|
|
test_description:
|
|
required: true
|
|
type: string
|
|
description: 'Explains what the test does'
|
|
height_grep_text:
|
|
required: false
|
|
type: string
|
|
description: 'Regular expression to find the tip height in test logs, and add it to newly created cached state image metadata'
|
|
|
|
# Test selection and parameters
|
|
test_variables:
|
|
required: true
|
|
type: string
|
|
description: 'Environmental variables used to select and configure the test'
|
|
network:
|
|
required: false
|
|
type: string
|
|
default: Mainnet
|
|
description: 'Zcash network to test against'
|
|
is_long_test:
|
|
required: false
|
|
type: boolean
|
|
default: false
|
|
description: 'Does this test need multiple run jobs? (Does it run longer than 6 hours?)'
|
|
|
|
# Cached state
|
|
#
|
|
# TODO: find a better name
|
|
root_state_path:
|
|
required: false
|
|
type: string
|
|
default: '/zebrad-cache'
|
|
description: 'Cached state base directory path'
|
|
# TODO: find a better name
|
|
zebra_state_dir:
|
|
required: false
|
|
type: string
|
|
default: ''
|
|
description: 'Zebra cached state directory and input image prefix to search in GCP'
|
|
# TODO: find a better name
|
|
lwd_state_dir:
|
|
required: false
|
|
type: string
|
|
default: ''
|
|
description: 'Lightwalletd cached state directory and input image prefix to search in GCP'
|
|
disk_prefix:
|
|
required: false
|
|
type: string
|
|
default: 'zebrad-cache'
|
|
description: 'Image name prefix, and `zebra_state_dir` name for newly created cached states'
|
|
disk_suffix:
|
|
required: false
|
|
type: string
|
|
description: 'Image name suffix'
|
|
needs_zebra_state:
|
|
required: true
|
|
type: boolean
|
|
description: 'Does the test use Zebra cached state?'
|
|
needs_lwd_state:
|
|
required: false
|
|
type: boolean
|
|
description: 'Does the test use Lightwalletd and Zebra cached state?'
|
|
# main branch states can be outdated and slower, but they can also be more reliable
|
|
prefer_main_cached_state:
|
|
required: false
|
|
type: boolean
|
|
default: false
|
|
description: 'Does the test prefer to use a main branch cached state?'
|
|
saves_to_disk:
|
|
required: true
|
|
type: boolean
|
|
description: 'Can this test create new or updated cached state disks?'
|
|
force_save_to_disk:
|
|
required: false
|
|
type: boolean
|
|
default: false
|
|
description: 'Force this test to create a new or updated cached state disk'
|
|
app_name:
|
|
required: false
|
|
type: string
|
|
default: 'zebra'
|
|
description: 'Application name, used to work out when a job is an update job'
|
|
|
|
env:
|
|
# How many previous log lines we show at the start of each new log job.
|
|
# Increase this number if some log lines are skipped between jobs
|
|
#
|
|
# We want to show all the logs since the last job finished,
|
|
# but we don't know how long it will be between jobs.
|
|
# 200 lines is about 6-15 minutes of sync logs, or one panic log.
|
|
EXTRA_LOG_LINES: 200
|
|
# How many blocks to wait before creating an updated cached state image.
|
|
# 1 day is approximately 1152 blocks.
|
|
CACHED_STATE_UPDATE_LIMIT: 576
|
|
|
|
jobs:
|
|
# set up the test, if it doesn't use any cached state
|
|
# each test runs one of the *-with/without-cached-state job series, and skips the other
|
|
setup-without-cached-state:
|
|
name: Setup ${{ inputs.test_id }} test
|
|
if: ${{ !inputs.needs_zebra_state }}
|
|
runs-on: zfnd-runners
|
|
permissions:
|
|
contents: 'read'
|
|
id-token: 'write'
|
|
steps:
|
|
- uses: actions/checkout@v4.0.0
|
|
with:
|
|
persist-credentials: false
|
|
fetch-depth: '2'
|
|
- uses: r7kamura/rust-problem-matchers@v1.4.0
|
|
|
|
- name: Inject slug/short variables
|
|
uses: rlespinasse/github-slug-action@v4
|
|
with:
|
|
short-length: 7
|
|
|
|
# Makes the Zcash network name lowercase.
|
|
#
|
|
# Labels in GCP are required to be in lowercase, but the blockchain network
|
|
# uses sentence case, so we need to downcase ${{ inputs.network }}.
|
|
#
|
|
# Passes ${{ inputs.network }} to subsequent steps using $NETWORK env variable.
|
|
- name: Downcase network name for labels
|
|
run: |
|
|
NETWORK_CAPS="${{ inputs.network }}"
|
|
echo "NETWORK=${NETWORK_CAPS,,}" >> "$GITHUB_ENV"
|
|
|
|
# Setup gcloud CLI
|
|
- name: Authenticate to Google Cloud
|
|
id: auth
|
|
uses: google-github-actions/auth@v1.1.1
|
|
with:
|
|
retries: '3'
|
|
workload_identity_provider: '${{ vars.GCP_WIF }}'
|
|
service_account: '${{ vars.GCP_DEPLOYMENTS_SA }}'
|
|
|
|
- name: Set up Cloud SDK
|
|
uses: google-github-actions/setup-gcloud@v1.1.1
|
|
|
|
# Create a Compute Engine virtual machine
|
|
- name: Create ${{ inputs.test_id }} GCP compute instance
|
|
id: create-instance
|
|
run: |
|
|
gcloud compute instances create-with-container "${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" \
|
|
--boot-disk-size 300GB \
|
|
--boot-disk-type pd-ssd \
|
|
--image-project=cos-cloud \
|
|
--image-family=cos-stable \
|
|
--create-disk=name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",device-name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",size=300GB,type=pd-ssd \
|
|
--container-image=gcr.io/google-containers/busybox \
|
|
--machine-type ${{ vars.GCP_LARGE_MACHINE }} \
|
|
--network-interface=subnet=${{ vars.GCP_SUBNETWORK }} \
|
|
--scopes cloud-platform \
|
|
--metadata=google-monitoring-enabled=TRUE,google-logging-enabled=TRUE \
|
|
--metadata-from-file=startup-script=.github/workflows/scripts/gcp-vm-startup-script.sh \
|
|
--labels=app=${{ inputs.app_name }},environment=test,network=${NETWORK},github_ref=${{ env.GITHUB_REF_SLUG_URL }},test=${{ inputs.test_id }} \
|
|
--tags ${{ inputs.app_name }} \
|
|
--zone ${{ vars.GCP_ZONE }}
|
|
sleep 60
|
|
|
|
# Create a docker volume with the new disk we just created.
|
|
#
|
|
# SSH into the just created VM, and create a docker volume with the newly created disk.
|
|
- name: Create ${{ inputs.test_id }} Docker volume
|
|
run: |
|
|
gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
|
--zone ${{ vars.GCP_ZONE }} \
|
|
--ssh-flag="-o ServerAliveInterval=5" \
|
|
--ssh-flag="-o ConnectionAttempts=20" \
|
|
--ssh-flag="-o ConnectTimeout=5" \
|
|
--command \
|
|
"\
|
|
sudo mkfs.ext4 -v /dev/sdb \
|
|
&& \
|
|
sudo docker volume create --driver local --opt type=ext4 --opt device=/dev/sdb \
|
|
${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }} \
|
|
"
|
|
|
|
# launch the test, if it doesn't use any cached state
|
|
launch-without-cached-state:
|
|
name: Launch ${{ inputs.test_id }} test
|
|
needs: [ setup-without-cached-state ]
|
|
# If creating the Google Cloud instance fails, we don't want to launch another docker instance.
|
|
if: ${{ !cancelled() && !failure() && !inputs.needs_zebra_state }}
|
|
runs-on: zfnd-runners
|
|
permissions:
|
|
contents: 'read'
|
|
id-token: 'write'
|
|
steps:
|
|
- uses: actions/checkout@v4.0.0
|
|
with:
|
|
persist-credentials: false
|
|
fetch-depth: '2'
|
|
- uses: r7kamura/rust-problem-matchers@v1.4.0
|
|
|
|
- name: Inject slug/short variables
|
|
uses: rlespinasse/github-slug-action@v4
|
|
with:
|
|
short-length: 7
|
|
|
|
# Setup gcloud CLI
|
|
- name: Authenticate to Google Cloud
|
|
id: auth
|
|
uses: google-github-actions/auth@v1.1.1
|
|
with:
|
|
retries: '3'
|
|
workload_identity_provider: '${{ vars.GCP_WIF }}'
|
|
service_account: '${{ vars.GCP_DEPLOYMENTS_SA }}'
|
|
|
|
- name: Set up Cloud SDK
|
|
uses: google-github-actions/setup-gcloud@v1.1.1
|
|
|
|
# Launch the test without any cached state
|
|
- name: Launch ${{ inputs.test_id }} test
|
|
run: |
|
|
gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
|
--zone ${{ vars.GCP_ZONE }} \
|
|
--ssh-flag="-o ServerAliveInterval=5" \
|
|
--ssh-flag="-o ConnectionAttempts=20" \
|
|
--ssh-flag="-o ConnectTimeout=5" \
|
|
--command \
|
|
"\
|
|
sudo docker run \
|
|
--name ${{ inputs.test_id }} \
|
|
--tty \
|
|
--detach \
|
|
${{ inputs.test_variables }} \
|
|
--mount type=volume,src=${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }},dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \
|
|
${{ vars.GAR_BASE }}/${{ vars.CI_IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \
|
|
"
|
|
|
|
|
|
# set up the test, if it uses cached state
|
|
# each test runs one of the *-with/without-cached-state job series, and skips the other
|
|
setup-with-cached-state:
|
|
name: Setup ${{ inputs.test_id }} test
|
|
if: ${{ inputs.needs_zebra_state }}
|
|
runs-on: zfnd-runners
|
|
outputs:
|
|
cached_disk_name: ${{ steps.get-disk-name.outputs.cached_disk_name }}
|
|
permissions:
|
|
contents: 'read'
|
|
id-token: 'write'
|
|
steps:
|
|
- uses: actions/checkout@v4.0.0
|
|
with:
|
|
persist-credentials: false
|
|
fetch-depth: '2'
|
|
- uses: r7kamura/rust-problem-matchers@v1.4.0
|
|
|
|
- name: Inject slug/short variables
|
|
uses: rlespinasse/github-slug-action@v4
|
|
with:
|
|
short-length: 7
|
|
|
|
- name: Downcase network name for disks and labels
|
|
run: |
|
|
NETWORK_CAPS="${{ inputs.network }}"
|
|
echo "NETWORK=${NETWORK_CAPS,,}" >> "$GITHUB_ENV"
|
|
|
|
# Setup gcloud CLI
|
|
- name: Authenticate to Google Cloud
|
|
id: auth
|
|
uses: google-github-actions/auth@v1.1.1
|
|
with:
|
|
retries: '3'
|
|
workload_identity_provider: '${{ vars.GCP_WIF }}'
|
|
service_account: '${{ vars.GCP_DEPLOYMENTS_SA }}'
|
|
|
|
- name: Set up Cloud SDK
|
|
uses: google-github-actions/setup-gcloud@v1.1.1
|
|
|
|
# Find a cached state disk for this job, matching all of:
|
|
# - disk cached state (lwd_state_dir/zebra_state_dir or disk_prefix) - zebrad-cache or lwd-cache
|
|
# - state version (from the source code) - v{N}
|
|
# - network (network) - mainnet or testnet
|
|
# - disk target height kind (disk_suffix) - checkpoint or tip
|
|
#
|
|
# If the test needs a lightwalletd state (needs_lwd_state) set the variable DISK_PREFIX accordingly
|
|
# - To ${{ inputs.lwd_state_dir }}" if needed
|
|
# - To ${{ inputs.zebra_state_dir || inputs.disk_prefix }} if not
|
|
#
|
|
# If there are multiple disks:
|
|
# - prefer images generated from the same commit, then
|
|
# - if prefer_main_cached_state is true, prefer images from the `main` branch, then
|
|
# - use any images from any other branch or commit.
|
|
# Within each of these categories:
|
|
# - prefer newer images to older images
|
|
#
|
|
# Passes the disk name to subsequent steps using $CACHED_DISK_NAME env variable
|
|
# Passes the state version to subsequent steps using $STATE_VERSION env variable
|
|
- name: Find ${{ inputs.test_id }} cached state disk
|
|
id: get-disk-name
|
|
run: |
|
|
LOCAL_STATE_VERSION=$(grep -oE "DATABASE_FORMAT_VERSION: .* [0-9]+" "$GITHUB_WORKSPACE/zebra-state/src/constants.rs" | grep -oE "[0-9]+" | tail -n1)
|
|
echo "STATE_VERSION: $LOCAL_STATE_VERSION"
|
|
|
|
if [[ "${{ inputs.needs_lwd_state }}" == "true" ]]; then
|
|
DISK_PREFIX=${{ inputs.lwd_state_dir }}
|
|
else
|
|
DISK_PREFIX=${{ inputs.zebra_state_dir || inputs.disk_prefix }}
|
|
fi
|
|
|
|
# Try to find an image generated from a previous step or run of this commit.
|
|
# Fields are listed in the "Create image from state disk" step.
|
|
#
|
|
# We don't want to match the full branch name here, because:
|
|
# - we want to ignore the different GITHUB_REFs across manually triggered jobs,
|
|
# pushed branches, and PRs,
|
|
# - previous commits might have been buggy,
|
|
# or they might have worked and hide bugs in this commit
|
|
# (we can't avoid this issue entirely, but we don't want to make it more likely), and
|
|
# - the branch name might have been shortened for the image.
|
|
#
|
|
# The probability of two matching short commit hashes within the same month is very low.
|
|
COMMIT_DISK_PREFIX="${DISK_PREFIX}-.+-${{ env.GITHUB_SHA_SHORT }}-v${LOCAL_STATE_VERSION}-${NETWORK}-${{ inputs.disk_suffix }}"
|
|
COMMIT_CACHED_DISK_NAME=$(gcloud compute images list --filter="status=READY AND name~${COMMIT_DISK_PREFIX}" --format="value(NAME)" --sort-by=~creationTimestamp --limit=1)
|
|
echo "${GITHUB_REF_SLUG_URL}-${{ env.GITHUB_SHA_SHORT }} Disk: $COMMIT_CACHED_DISK_NAME"
|
|
if [[ -n "$COMMIT_CACHED_DISK_NAME" ]]; then
|
|
echo "Description: $(gcloud compute images describe $COMMIT_CACHED_DISK_NAME --format='value(DESCRIPTION)')"
|
|
fi
|
|
|
|
# Try to find an image generated from the main branch
|
|
MAIN_CACHED_DISK_NAME=$(gcloud compute images list --filter="status=READY AND name~${DISK_PREFIX}-main-[0-9a-f]+-v${LOCAL_STATE_VERSION}-${NETWORK}-${{ inputs.disk_suffix }}" --format="value(NAME)" --sort-by=~creationTimestamp --limit=1)
|
|
echo "main Disk: $MAIN_CACHED_DISK_NAME"
|
|
if [[ -n "$MAIN_CACHED_DISK_NAME" ]]; then
|
|
echo "Description: $(gcloud compute images describe $MAIN_CACHED_DISK_NAME --format='value(DESCRIPTION)')"
|
|
fi
|
|
|
|
# Try to find an image generated from any other branch
|
|
ANY_CACHED_DISK_NAME=$(gcloud compute images list --filter="status=READY AND name~${DISK_PREFIX}-.+-[0-9a-f]+-v${LOCAL_STATE_VERSION}-${NETWORK}-${{ inputs.disk_suffix }}" --format="value(NAME)" --sort-by=~creationTimestamp --limit=1)
|
|
echo "any branch Disk: $ANY_CACHED_DISK_NAME"
|
|
if [[ -n "$ANY_CACHED_DISK_NAME" ]]; then
|
|
echo "Description: $(gcloud compute images describe $ANY_CACHED_DISK_NAME --format='value(DESCRIPTION)')"
|
|
fi
|
|
|
|
# Select a cached disk based on the job settings
|
|
CACHED_DISK_NAME="$COMMIT_CACHED_DISK_NAME"
|
|
if [[ -z "$CACHED_DISK_NAME" ]] && [[ "${{ inputs.prefer_main_cached_state }}" == "true" ]]; then
|
|
echo "Preferring main branch cached state to other branches..."
|
|
CACHED_DISK_NAME="$MAIN_CACHED_DISK_NAME"
|
|
fi
|
|
if [[ -z "$CACHED_DISK_NAME" ]]; then
|
|
CACHED_DISK_NAME="$ANY_CACHED_DISK_NAME"
|
|
fi
|
|
|
|
if [[ -z "$CACHED_DISK_NAME" ]]; then
|
|
echo "No cached state disk available"
|
|
echo "Expected ${COMMIT_DISK_PREFIX}"
|
|
echo "Also searched for cached disks from other branches"
|
|
echo "Cached state test jobs must depend on the cached state rebuild job"
|
|
exit 1
|
|
fi
|
|
|
|
echo "Selected Disk: $CACHED_DISK_NAME"
|
|
echo "cached_disk_name=$CACHED_DISK_NAME" >> "$GITHUB_OUTPUT"
|
|
|
|
echo "STATE_VERSION=$LOCAL_STATE_VERSION" >> "$GITHUB_ENV"
|
|
echo "CACHED_DISK_NAME=$CACHED_DISK_NAME" >> "$GITHUB_ENV"
|
|
|
|
# Create a Compute Engine virtual machine and attach a cached state disk using the
|
|
# $CACHED_DISK_NAME variable as the source image to populate the disk cached state
|
|
- name: Create ${{ inputs.test_id }} GCP compute instance
|
|
id: create-instance
|
|
run: |
|
|
gcloud compute instances create-with-container "${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" \
|
|
--boot-disk-size 300GB \
|
|
--boot-disk-type pd-ssd \
|
|
--image-project=cos-cloud \
|
|
--image-family=cos-stable \
|
|
--create-disk=image=${{ env.CACHED_DISK_NAME }},name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",device-name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",size=300GB,type=pd-ssd \
|
|
--container-image=gcr.io/google-containers/busybox \
|
|
--machine-type ${{ vars.GCP_LARGE_MACHINE }} \
|
|
--network-interface=subnet=${{ vars.GCP_SUBNETWORK }} \
|
|
--scopes cloud-platform \
|
|
--metadata=google-monitoring-enabled=TRUE,google-logging-enabled=TRUE \
|
|
--metadata-from-file=startup-script=.github/workflows/scripts/gcp-vm-startup-script.sh \
|
|
--labels=app=${{ inputs.app_name }},environment=test,network=${NETWORK},github_ref=${{ env.GITHUB_REF_SLUG_URL }},test=${{ inputs.test_id }} \
|
|
--tags ${{ inputs.app_name }} \
|
|
--zone ${{ vars.GCP_ZONE }}
|
|
sleep 60
|
|
|
|
# Create a docker volume with the selected cached state.
|
|
#
|
|
# SSH into the just created VM and create a docker volume with the recently attached disk.
|
|
# (The cached state and disk are usually the same size,
|
|
# but the cached state can be smaller if we just increased the disk size.)
|
|
- name: Create ${{ inputs.test_id }} Docker volume
|
|
run: |
|
|
gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
|
--zone ${{ vars.GCP_ZONE }} \
|
|
--ssh-flag="-o ServerAliveInterval=5" \
|
|
--ssh-flag="-o ConnectionAttempts=20" \
|
|
--ssh-flag="-o ConnectTimeout=5" \
|
|
--command \
|
|
"\
|
|
sudo docker volume create --driver local --opt type=ext4 --opt device=/dev/sdb \
|
|
${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }} \
|
|
"
|
|
|
|
# launch the test, if it uses cached state
|
|
launch-with-cached-state:
|
|
name: Launch ${{ inputs.test_id }} test
|
|
needs: [ setup-with-cached-state ]
|
|
# If creating the Google Cloud instance fails, we don't want to launch another docker instance.
|
|
if: ${{ !cancelled() && !failure() && inputs.needs_zebra_state }}
|
|
runs-on: zfnd-runners
|
|
permissions:
|
|
contents: 'read'
|
|
id-token: 'write'
|
|
steps:
|
|
- uses: actions/checkout@v4.0.0
|
|
with:
|
|
persist-credentials: false
|
|
fetch-depth: '2'
|
|
- uses: r7kamura/rust-problem-matchers@v1.4.0
|
|
|
|
- name: Inject slug/short variables
|
|
uses: rlespinasse/github-slug-action@v4
|
|
with:
|
|
short-length: 7
|
|
|
|
# Setup gcloud CLI
|
|
- name: Authenticate to Google Cloud
|
|
id: auth
|
|
uses: google-github-actions/auth@v1.1.1
|
|
with:
|
|
retries: '3'
|
|
workload_identity_provider: '${{ vars.GCP_WIF }}'
|
|
service_account: '${{ vars.GCP_DEPLOYMENTS_SA }}'
|
|
|
|
- name: Set up Cloud SDK
|
|
uses: google-github-actions/setup-gcloud@v1.1.1
|
|
|
|
# Launch the test with the previously created Zebra-only cached state.
|
|
# Each test runs one of the "Launch test" steps, and skips the other.
|
|
#
|
|
# SSH into the just created VM, and create a Docker container to run the incoming test
|
|
# from ${{ inputs.test_id }}, then mount the sudo docker volume created in the previous job.
|
|
#
|
|
# The disk mounted in the VM is located at /dev/sdb, we mount the root `/` of this disk to the docker
|
|
# container in one path:
|
|
# - /var/cache/zebrad-cache -> ${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} -> $ZEBRA_CACHED_STATE_DIR
|
|
#
|
|
# This path must match the variable used by the tests in Rust, which are also set in
|
|
# `continous-integration-docker.yml` to be able to run this tests.
|
|
#
|
|
# Although we're mounting the disk root, Zebra will only respect the values from
|
|
# $ZEBRA_CACHED_STATE_DIR. The inputs like ${{ inputs.zebra_state_dir }} are only used
|
|
# to match that variable paths.
|
|
- name: Launch ${{ inputs.test_id }} test
|
|
# This step only runs for tests that just read or write a Zebra state.
|
|
#
|
|
# lightwalletd-full-sync reads Zebra and writes lwd, so it is handled specially.
|
|
# TODO: we should find a better logic for this use cases
|
|
if: ${{ (inputs.needs_zebra_state && !inputs.needs_lwd_state) && inputs.test_id != 'lwd-full-sync' }}
|
|
run: |
|
|
gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
|
--zone ${{ vars.GCP_ZONE }} \
|
|
--ssh-flag="-o ServerAliveInterval=5" \
|
|
--ssh-flag="-o ConnectionAttempts=20" \
|
|
--ssh-flag="-o ConnectTimeout=5" \
|
|
--command \
|
|
"\
|
|
sudo docker run \
|
|
--name ${{ inputs.test_id }} \
|
|
--tty \
|
|
--detach \
|
|
${{ inputs.test_variables }} \
|
|
--mount type=volume,src=${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }},dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \
|
|
${{ vars.GAR_BASE }}/${{ vars.CI_IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \
|
|
"
|
|
|
|
# Launch the test with the previously created Lightwalletd and Zebra cached state.
|
|
# Each test runs one of the "Launch test" steps, and skips the other.
|
|
#
|
|
# SSH into the just created VM, and create a Docker container to run the incoming test
|
|
# from ${{ inputs.test_id }}, then mount the sudo docker volume created in the previous job.
|
|
#
|
|
# In this step we're using the same disk for simplicity, as mounting multiple disks to the
|
|
# VM and to the container might require more steps in this workflow, and additional
|
|
# considerations.
|
|
#
|
|
# The disk mounted in the VM is located at /dev/sdb, we mount the root `/` of this disk to the docker
|
|
# container in two different paths:
|
|
# - /var/cache/zebrad-cache -> ${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} -> $ZEBRA_CACHED_STATE_DIR
|
|
# - /var/cache/lwd-cache -> ${{ inputs.root_state_path }}/${{ inputs.lwd_state_dir }} -> $LIGHTWALLETD_DATA_DIR
|
|
#
|
|
# This doesn't cause any path conflicts, because Zebra and lightwalletd create different
|
|
# subdirectories for their data. (But Zebra, lightwalletd, and the test harness must not
|
|
# delete the whole cache directory.)
|
|
#
|
|
# This paths must match the variables used by the tests in Rust, which are also set in
|
|
# `continous-integration-docker.yml` to be able to run this tests.
|
|
#
|
|
# Although we're mounting the disk root to both directories, Zebra and Lightwalletd
|
|
# will only respect the values from $ZEBRA_CACHED_STATE_DIR and $LIGHTWALLETD_DATA_DIR,
|
|
# the inputs like ${{ inputs.lwd_state_dir }} are only used to match those variables paths.
|
|
- name: Launch ${{ inputs.test_id }} test
|
|
# This step only runs for tests that read or write Lightwalletd and Zebra states.
|
|
#
|
|
# lightwalletd-full-sync reads Zebra and writes lwd, so it is handled specially.
|
|
# TODO: we should find a better logic for this use cases
|
|
if: ${{ (inputs.needs_zebra_state && inputs.needs_lwd_state) || inputs.test_id == 'lwd-full-sync' }}
|
|
run: |
|
|
gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
|
--zone ${{ vars.GCP_ZONE }} \
|
|
--ssh-flag="-o ServerAliveInterval=5" \
|
|
--ssh-flag="-o ConnectionAttempts=20" \
|
|
--ssh-flag="-o ConnectTimeout=5" \
|
|
--command \
|
|
"\
|
|
sudo docker run \
|
|
--name ${{ inputs.test_id }} \
|
|
--tty \
|
|
--detach \
|
|
${{ inputs.test_variables }} \
|
|
--mount type=volume,src=${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }},dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \
|
|
--mount type=volume,src=${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }},dst=${{ inputs.root_state_path }}/${{ inputs.lwd_state_dir }} \
|
|
${{ vars.GAR_BASE }}/${{ vars.CI_IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \
|
|
"
|
|
|
|
|
|
# check the logs of the test we just launched for zebrad startup messages
|
|
#
|
|
# this step makes sure `zebrad` is running, and configured for `inputs.network`.
|
|
logs-startup:
|
|
name: Check startup for ${{ inputs.test_id }}
|
|
# We run exactly one of without-cached-state or with-cached-state, and we always skip the other one.
|
|
needs: [ launch-with-cached-state, launch-without-cached-state ]
|
|
# If the previous job fails, we still want to show the logs.
|
|
if: ${{ !cancelled() }}
|
|
runs-on: ubuntu-latest
|
|
permissions:
|
|
contents: 'read'
|
|
id-token: 'write'
|
|
steps:
|
|
- uses: actions/checkout@v4.0.0
|
|
with:
|
|
persist-credentials: false
|
|
fetch-depth: '2'
|
|
|
|
- name: Inject slug/short variables
|
|
uses: rlespinasse/github-slug-action@v4
|
|
with:
|
|
short-length: 7
|
|
|
|
# Setup gcloud CLI
|
|
- name: Authenticate to Google Cloud
|
|
id: auth
|
|
uses: google-github-actions/auth@v1.1.1
|
|
with:
|
|
retries: '3'
|
|
workload_identity_provider: '${{ vars.GCP_WIF }}'
|
|
service_account: '${{ vars.GCP_DEPLOYMENTS_SA }}'
|
|
|
|
- name: Set up Cloud SDK
|
|
uses: google-github-actions/setup-gcloud@v1.1.1
|
|
|
|
# Show all the logs since the container launched,
|
|
# following until we see zebrad startup messages.
|
|
#
|
|
# This check limits the number of log lines, so tests running on the wrong network don't
|
|
# run until the job timeout. If Zebra does a complete recompile, there are a few hundred log
|
|
# lines before the startup logs. So that's what we use here.
|
|
#
|
|
# The log pipeline ignores the exit status of `docker logs`.
|
|
# It also ignores the expected 'broken pipe' error from `tee`,
|
|
# which happens when `grep` finds a matching output and moves on to the next job.
|
|
#
|
|
# Errors in the tests are caught by the final test status job.
|
|
- name: Check startup logs for ${{ inputs.test_id }}
|
|
run: |
|
|
gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
|
--zone ${{ vars.GCP_ZONE }} \
|
|
--ssh-flag="-o ServerAliveInterval=5" \
|
|
--ssh-flag="-o ConnectionAttempts=20" \
|
|
--ssh-flag="-o ConnectTimeout=5" \
|
|
--command \
|
|
"\
|
|
sudo docker logs \
|
|
--tail all \
|
|
--follow \
|
|
${{ inputs.test_id }} | \
|
|
head -700 | \
|
|
tee --output-error=exit /dev/stderr | \
|
|
grep --max-count=1 --extended-regexp --color=always \
|
|
-e 'Zcash network: ${{ inputs.network }}' \
|
|
"
|
|
|
|
# follow the logs of the test we just launched, up to Canopy activation (or the test finishing)
|
|
#
|
|
# If `inputs.is_long_test` is `false`, this job is skipped.
|
|
logs-heartwood:
|
|
name: Log ${{ inputs.test_id }} test (heartwood)
|
|
needs: [ logs-startup ]
|
|
# If the previous job fails, we still want to show the logs.
|
|
if: ${{ !cancelled() && inputs.is_long_test }}
|
|
runs-on: ubuntu-latest
|
|
permissions:
|
|
contents: 'read'
|
|
id-token: 'write'
|
|
steps:
|
|
- uses: actions/checkout@v4.0.0
|
|
with:
|
|
persist-credentials: false
|
|
fetch-depth: '2'
|
|
|
|
- name: Inject slug/short variables
|
|
uses: rlespinasse/github-slug-action@v4
|
|
with:
|
|
short-length: 7
|
|
|
|
# Setup gcloud CLI
|
|
- name: Authenticate to Google Cloud
|
|
id: auth
|
|
uses: google-github-actions/auth@v1.1.1
|
|
with:
|
|
retries: '3'
|
|
workload_identity_provider: '${{ vars.GCP_WIF }}'
|
|
service_account: '${{ vars.GCP_DEPLOYMENTS_SA }}'
|
|
|
|
- name: Set up Cloud SDK
|
|
uses: google-github-actions/setup-gcloud@v1.1.1
|
|
|
|
# Show all the logs since the container launched,
|
|
# following until Canopy activation (or the test finishes)
|
|
- name: Show logs for ${{ inputs.test_id }} test (heartwood)
|
|
run: |
|
|
gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
|
--zone ${{ vars.GCP_ZONE }} \
|
|
--ssh-flag="-o ServerAliveInterval=5" \
|
|
--ssh-flag="-o ConnectionAttempts=20" \
|
|
--ssh-flag="-o ConnectTimeout=5" \
|
|
--command \
|
|
"\
|
|
sudo docker logs \
|
|
--tail all \
|
|
--follow \
|
|
${{ inputs.test_id }} | \
|
|
tee --output-error=exit /dev/stderr | \
|
|
grep --max-count=1 --extended-regexp --color=always \
|
|
-e 'estimated progress.*network_upgrade.*=.*Canopy' \
|
|
-e 'estimated progress.*network_upgrade.*=.*Nu5' \
|
|
-e 'test result:.*finished in' \
|
|
"
|
|
|
|
# follow the logs of the test we just launched, up to NU5 activation (or the test finishing)
|
|
logs-canopy:
|
|
name: Log ${{ inputs.test_id }} test (canopy)
|
|
needs: [ logs-heartwood ]
|
|
# If the previous job fails, we still want to show the logs.
|
|
if: ${{ !cancelled() && inputs.is_long_test }}
|
|
runs-on: ubuntu-latest
|
|
permissions:
|
|
contents: 'read'
|
|
id-token: 'write'
|
|
steps:
|
|
- uses: actions/checkout@v4.0.0
|
|
with:
|
|
persist-credentials: false
|
|
fetch-depth: '2'
|
|
|
|
- name: Inject slug/short variables
|
|
uses: rlespinasse/github-slug-action@v4
|
|
with:
|
|
short-length: 7
|
|
|
|
# Setup gcloud CLI
|
|
- name: Authenticate to Google Cloud
|
|
id: auth
|
|
uses: google-github-actions/auth@v1.1.1
|
|
with:
|
|
retries: '3'
|
|
workload_identity_provider: '${{ vars.GCP_WIF }}'
|
|
service_account: '${{ vars.GCP_DEPLOYMENTS_SA }}'
|
|
|
|
- name: Set up Cloud SDK
|
|
uses: google-github-actions/setup-gcloud@v1.1.1
|
|
|
|
# Show recent logs, following until NU5 activation (or the test finishes)
|
|
- name: Show logs for ${{ inputs.test_id }} test (canopy)
|
|
run: |
|
|
gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
|
--zone ${{ vars.GCP_ZONE }} \
|
|
--ssh-flag="-o ServerAliveInterval=5" \
|
|
--ssh-flag="-o ConnectionAttempts=20" \
|
|
--ssh-flag="-o ConnectTimeout=5" \
|
|
--command \
|
|
"\
|
|
sudo docker logs \
|
|
--tail all \
|
|
--follow \
|
|
${{ inputs.test_id }} | \
|
|
tee --output-error=exit /dev/stderr | \
|
|
grep --max-count=1 --extended-regexp --color=always \
|
|
-e 'estimated progress.*network_upgrade.*=.*Nu5' \
|
|
-e 'test result:.*finished in' \
|
|
"
|
|
|
|
# follow the logs of the test we just launched, up to the last checkpoint, or the test finishing,
|
|
# or for lightwalletd tests, about 5 hours into the full lightwalletd sync (block 1880k)
|
|
logs-checkpoint:
|
|
name: Log ${{ inputs.test_id }} test (checkpoint)
|
|
needs: [ logs-canopy ]
|
|
# If the previous job fails, we still want to show the logs.
|
|
if: ${{ !cancelled() && inputs.is_long_test }}
|
|
runs-on: zfnd-runners
|
|
permissions:
|
|
contents: 'read'
|
|
id-token: 'write'
|
|
steps:
|
|
- uses: actions/checkout@v4.0.0
|
|
with:
|
|
persist-credentials: false
|
|
fetch-depth: '2'
|
|
|
|
- name: Inject slug/short variables
|
|
uses: rlespinasse/github-slug-action@v4
|
|
with:
|
|
short-length: 7
|
|
|
|
# Setup gcloud CLI
|
|
- name: Authenticate to Google Cloud
|
|
id: auth
|
|
uses: google-github-actions/auth@v1.1.1
|
|
with:
|
|
retries: '3'
|
|
workload_identity_provider: '${{ vars.GCP_WIF }}'
|
|
service_account: '${{ vars.GCP_DEPLOYMENTS_SA }}'
|
|
|
|
- name: Set up Cloud SDK
|
|
uses: google-github-actions/setup-gcloud@v1.1.1
|
|
|
|
# Show recent logs, following until the last checkpoint, or the test finishes, or 5 hours of lightwalletd sync (1880k)
|
|
#
|
|
# TODO: when doing obtain/extend tips, log the verifier in use, and check for full verification here
|
|
- name: Show logs for ${{ inputs.test_id }} test (checkpoint)
|
|
run: |
|
|
gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
|
--zone ${{ vars.GCP_ZONE }} \
|
|
--ssh-flag="-o ServerAliveInterval=5" \
|
|
--ssh-flag="-o ConnectionAttempts=20" \
|
|
--ssh-flag="-o ConnectTimeout=5" \
|
|
--command \
|
|
"\
|
|
sudo docker logs \
|
|
--tail ${{ env.EXTRA_LOG_LINES }} \
|
|
--follow \
|
|
${{ inputs.test_id }} | \
|
|
tee --output-error=exit /dev/stderr | \
|
|
grep --max-count=1 --extended-regexp --color=always \
|
|
-e 'verified final checkpoint' \
|
|
-e 'lightwalletd.*Adding block to cache 18[8-9][0-9][0-9][0-9][0-9]' \
|
|
-e 'lightwalletd.*Adding block to cache 19[0-9][0-9][0-9][0-9][0-9]' \
|
|
-e 'lightwalletd.*Adding block to cache [2-9][0-9][0-9][0-9][0-9][0-9][0-9]' \
|
|
-e 'test result:.*finished in' \
|
|
"
|
|
|
|
|
|
# Show all the test logs, then follow the logs of the test we just launched, until it finishes.
|
|
# Then check the result of the test.
|
|
#
|
|
# If `inputs.is_long_test` is `false`, the Rust test harness mostly runs in this job.
|
|
# Otherwise, it mostly runs in the "logs" jobs.
|
|
test-result:
|
|
name: Run ${{ inputs.test_id }} test
|
|
needs: [ logs-checkpoint ]
|
|
# If the previous job fails, we also want to run and fail this job,
|
|
# so that the branch protection rule fails in Mergify and GitHub.
|
|
if: ${{ !cancelled() }}
|
|
runs-on: ubuntu-latest
|
|
permissions:
|
|
contents: 'read'
|
|
id-token: 'write'
|
|
steps:
|
|
- uses: actions/checkout@v4.0.0
|
|
with:
|
|
persist-credentials: false
|
|
fetch-depth: '2'
|
|
|
|
- name: Inject slug/short variables
|
|
uses: rlespinasse/github-slug-action@v4
|
|
with:
|
|
short-length: 7
|
|
|
|
# Setup gcloud CLI
|
|
- name: Authenticate to Google Cloud
|
|
id: auth
|
|
uses: google-github-actions/auth@v1.1.1
|
|
with:
|
|
retries: '3'
|
|
workload_identity_provider: '${{ vars.GCP_WIF }}'
|
|
service_account: '${{ vars.GCP_DEPLOYMENTS_SA }}'
|
|
|
|
- name: Set up Cloud SDK
|
|
uses: google-github-actions/setup-gcloud@v1.1.1
|
|
|
|
# Check that the container executed at least 1 Rust test harness test, and that all tests passed.
|
|
# Then wait for the container to finish, and exit with the test's exit status.
|
|
# Also shows all the test logs.
|
|
#
|
|
# If the container has already finished, `docker wait` should return its status.
|
|
# But sometimes this doesn't work, so we use `docker inspect` as a fallback.
|
|
#
|
|
# `docker wait` prints the container exit status as a string, but we need to exit the `ssh` command
|
|
# with that status.
|
|
# (`docker wait` can also wait for multiple containers, but we only ever wait for a single container.)
|
|
- name: Result of ${{ inputs.test_id }} test
|
|
run: |
|
|
gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
|
--zone ${{ vars.GCP_ZONE }} \
|
|
--ssh-flag="-o ServerAliveInterval=5" \
|
|
--ssh-flag="-o ConnectionAttempts=20" \
|
|
--ssh-flag="-o ConnectTimeout=5" \
|
|
--command=' \
|
|
set -e;
|
|
sudo docker logs \
|
|
--tail all \
|
|
--follow \
|
|
${{ inputs.test_id }} | \
|
|
tee --output-error=exit /dev/stderr | \
|
|
grep --max-count=1 --extended-regexp --color=always \
|
|
"test result: .*ok.* [1-9][0-9]* passed.*finished in"; \
|
|
EXIT_STATUS=$( \
|
|
sudo docker wait ${{ inputs.test_id }} || \
|
|
sudo docker inspect --format "{{.State.ExitCode}}" ${{ inputs.test_id }} || \
|
|
echo "missing container, or missing exit status for container" \
|
|
); \
|
|
echo "sudo docker exit status: $EXIT_STATUS"; \
|
|
exit "$EXIT_STATUS" \
|
|
'
|
|
|
|
|
|
# create a state image from the instance's state disk, if requested by the caller
|
|
create-state-image:
|
|
name: Create ${{ inputs.test_id }} cached state image
|
|
runs-on: ubuntu-latest
|
|
needs: [ test-result, setup-with-cached-state ]
|
|
# We run exactly one of without-cached-state or with-cached-state, and we always skip the other one.
|
|
# Normally, if a job is skipped, all the jobs that depend on it are also skipped.
|
|
# So we need to override the default success() check to make this job run.
|
|
if: ${{ !cancelled() && !failure() && (inputs.saves_to_disk || inputs.force_save_to_disk) }}
|
|
permissions:
|
|
contents: 'read'
|
|
id-token: 'write'
|
|
steps:
|
|
- uses: actions/checkout@v4.0.0
|
|
with:
|
|
persist-credentials: false
|
|
fetch-depth: '2'
|
|
- uses: r7kamura/rust-problem-matchers@v1.4.0
|
|
|
|
- name: Inject slug/short variables
|
|
uses: rlespinasse/github-slug-action@v4
|
|
with:
|
|
short-length: 7
|
|
|
|
# Performs formatting on disk name components.
|
|
#
|
|
# Disk images in GCP are required to be in lowercase, but the blockchain network
|
|
# uses sentence case, so we need to downcase ${{ inputs.network }}.
|
|
#
|
|
# Disk image names in GCP are limited to 63 characters, so we need to limit
|
|
# branch names to 12 characters.
|
|
#
|
|
# Passes ${{ inputs.network }} to subsequent steps using $NETWORK env variable.
|
|
# Passes ${{ env.GITHUB_REF_SLUG_URL }} to subsequent steps using $SHORT_GITHUB_REF env variable.
|
|
- name: Format network name and branch name for disks
|
|
run: |
|
|
NETWORK_CAPS="${{ inputs.network }}"
|
|
echo "NETWORK=${NETWORK_CAPS,,}" >> "$GITHUB_ENV"
|
|
LONG_GITHUB_REF="${{ env.GITHUB_REF_SLUG_URL }}"
|
|
echo "SHORT_GITHUB_REF=${LONG_GITHUB_REF:0:12}" >> "$GITHUB_ENV"
|
|
|
|
# Setup gcloud CLI
|
|
- name: Authenticate to Google Cloud
|
|
id: auth
|
|
uses: google-github-actions/auth@v1.1.1
|
|
with:
|
|
workload_identity_provider: '${{ vars.GCP_WIF }}'
|
|
service_account: '${{ vars.GCP_DEPLOYMENTS_SA }}'
|
|
|
|
- name: Set up Cloud SDK
|
|
uses: google-github-actions/setup-gcloud@v1.1.1
|
|
|
|
# Get the state version from the local constants.rs file to be used in the image creation,
|
|
# as the state version is part of the disk image name.
|
|
#
|
|
# Passes the state version to subsequent steps using $STATE_VERSION env variable
|
|
- name: Get state version from constants.rs
|
|
run: |
|
|
LOCAL_STATE_VERSION=$(grep -oE "DATABASE_FORMAT_VERSION: .* [0-9]+" $GITHUB_WORKSPACE/zebra-state/src/constants.rs | grep -oE "[0-9]+" | tail -n1)
|
|
echo "STATE_VERSION: $LOCAL_STATE_VERSION"
|
|
|
|
echo "STATE_VERSION=$LOCAL_STATE_VERSION" >> "$GITHUB_ENV"
|
|
|
|
# Sets the $UPDATE_SUFFIX env var to "-u" if updating a previous cached state,
|
|
# and the empty string otherwise.
|
|
#
|
|
# Also sets a unique date and time suffix $TIME_SUFFIX.
|
|
- name: Set update and time suffixes
|
|
run: |
|
|
UPDATE_SUFFIX=""
|
|
|
|
if [[ "${{ inputs.needs_zebra_state }}" == "true" ]] && [[ "${{ inputs.app_name }}" == "zebrad" ]]; then
|
|
UPDATE_SUFFIX="-u"
|
|
fi
|
|
|
|
# TODO: find a better logic for the lwd-full-sync case
|
|
if [[ "${{ inputs.needs_lwd_state }}" == "true" ]] && [[ "${{ inputs.app_name }}" == "lightwalletd" ]] && [[ "${{ inputs.test_id }}" != 'lwd-full-sync' ]]; then
|
|
UPDATE_SUFFIX="-u"
|
|
fi
|
|
|
|
# We're going to delete old images after a few days, so we only need the time here
|
|
TIME_SUFFIX=$(date '+%H%M%S' --utc)
|
|
|
|
echo "UPDATE_SUFFIX=$UPDATE_SUFFIX" >> "$GITHUB_ENV"
|
|
echo "TIME_SUFFIX=$TIME_SUFFIX" >> "$GITHUB_ENV"
|
|
|
|
# Get the full initial and running database versions from the test logs.
|
|
# These versions are used as part of the disk description and labels.
|
|
#
|
|
# If these versions are missing from the logs, the job fails.
|
|
#
|
|
# Typically, the database versions are around line 20 in the logs..
|
|
# But we check the first 1000 log lines, just in case the test harness recompiles all the
|
|
# dependencies before running the test. (This can happen if the cache is invalid.)
|
|
#
|
|
# Passes the versions to subsequent steps using the $INITIAL_DISK_DB_VERSION,
|
|
# $RUNNING_DB_VERSION, and $DB_VERSION_SUMMARY env variables.
|
|
- name: Get database versions from logs
|
|
run: |
|
|
INITIAL_DISK_DB_VERSION=""
|
|
RUNNING_DB_VERSION=""
|
|
DB_VERSION_SUMMARY=""
|
|
|
|
DOCKER_LOGS=$( \
|
|
gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
|
--zone ${{ vars.GCP_ZONE }} \
|
|
--ssh-flag="-o ServerAliveInterval=5" \
|
|
--ssh-flag="-o ConnectionAttempts=20" \
|
|
--ssh-flag="-o ConnectTimeout=5" \
|
|
--command=" \
|
|
sudo docker logs ${{ inputs.test_id }} | head -1000 \
|
|
")
|
|
|
|
# either a semantic version or "creating new database"
|
|
INITIAL_DISK_DB_VERSION=$( \
|
|
echo "$DOCKER_LOGS" | \
|
|
grep --extended-regexp --only-matching 'initial disk state version: [0-9a-z\.]+' | \
|
|
grep --extended-regexp --only-matching '[0-9a-z\.]+' | \
|
|
tail -1 || \
|
|
[[ $? == 1 ]] \
|
|
)
|
|
|
|
if [[ -z "$INITIAL_DISK_DB_VERSION" ]]; then
|
|
echo "Checked logs:"
|
|
echo ""
|
|
echo "$DOCKER_LOGS"
|
|
echo ""
|
|
echo "Missing initial disk database version in logs: $INITIAL_DISK_DB_VERSION"
|
|
# Fail the tests, because Zebra didn't log the initial disk database version,
|
|
# or the regex in this step is wrong.
|
|
false
|
|
fi
|
|
|
|
if [[ "$INITIAL_DISK_DB_VERSION" = "creating.new.database" ]]; then
|
|
INITIAL_DISK_DB_VERSION="new"
|
|
else
|
|
INITIAL_DISK_DB_VERSION="v${INITIAL_DISK_DB_VERSION//./-}"
|
|
fi
|
|
|
|
echo "Found initial disk database version in logs: $INITIAL_DISK_DB_VERSION"
|
|
echo "INITIAL_DISK_DB_VERSION=$INITIAL_DISK_DB_VERSION" >> "$GITHUB_ENV"
|
|
|
|
RUNNING_DB_VERSION=$( \
|
|
echo "$DOCKER_LOGS" | \
|
|
grep --extended-regexp --only-matching 'running state version: [0-9\.]+' | \
|
|
grep --extended-regexp --only-matching '[0-9\.]+' | \
|
|
tail -1 || \
|
|
[[ $? == 1 ]] \
|
|
)
|
|
|
|
if [[ -z "$RUNNING_DB_VERSION" ]]; then
|
|
echo "Checked logs:"
|
|
echo ""
|
|
echo "$DOCKER_LOGS"
|
|
echo ""
|
|
echo "Missing running database version in logs: $RUNNING_DB_VERSION"
|
|
# Fail the tests, because Zebra didn't log the running database version,
|
|
# or the regex in this step is wrong.
|
|
false
|
|
fi
|
|
|
|
RUNNING_DB_VERSION="v${RUNNING_DB_VERSION//./-}"
|
|
echo "Found running database version in logs: $RUNNING_DB_VERSION"
|
|
echo "RUNNING_DB_VERSION=$RUNNING_DB_VERSION" >> "$GITHUB_ENV"
|
|
|
|
if [[ "$INITIAL_DISK_DB_VERSION" = "$RUNNING_DB_VERSION" ]]; then
|
|
DB_VERSION_SUMMARY="$RUNNING_DB_VERSION"
|
|
elif [[ "$INITIAL_DISK_DB_VERSION" = "new" ]]; then
|
|
DB_VERSION_SUMMARY="$RUNNING_DB_VERSION in new database"
|
|
else
|
|
DB_VERSION_SUMMARY="$INITIAL_DISK_DB_VERSION changing to $RUNNING_DB_VERSION"
|
|
fi
|
|
|
|
echo "Summarised database versions from logs: $DB_VERSION_SUMMARY"
|
|
echo "DB_VERSION_SUMMARY=$DB_VERSION_SUMMARY" >> "$GITHUB_ENV"
|
|
|
|
# Get the sync height from the test logs, which is later used as part of the
|
|
# disk description and labels.
|
|
#
|
|
# The regex used to grep the sync height is provided by ${{ inputs.height_grep_text }},
|
|
# this allows to dynamically change the height as needed by different situations or
|
|
# based on the logs output from different tests.
|
|
#
|
|
# If the sync height is missing from the logs, the job fails.
|
|
#
|
|
# Passes the sync height to subsequent steps using the $SYNC_HEIGHT env variable.
|
|
- name: Get sync height from logs
|
|
run: |
|
|
SYNC_HEIGHT=""
|
|
|
|
DOCKER_LOGS=$( \
|
|
gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
|
--zone ${{ vars.GCP_ZONE }} \
|
|
--ssh-flag="-o ServerAliveInterval=5" \
|
|
--ssh-flag="-o ConnectionAttempts=20" \
|
|
--ssh-flag="-o ConnectTimeout=5" \
|
|
--command=" \
|
|
sudo docker logs ${{ inputs.test_id }} --tail 200 \
|
|
")
|
|
|
|
SYNC_HEIGHT=$( \
|
|
echo "$DOCKER_LOGS" | \
|
|
grep --extended-regexp --only-matching '${{ inputs.height_grep_text }}[0-9]+' | \
|
|
grep --extended-regexp --only-matching '[0-9]+' | \
|
|
tail -1 || \
|
|
[[ $? == 1 ]] \
|
|
)
|
|
|
|
if [[ -z "$SYNC_HEIGHT" ]]; then
|
|
echo "Checked logs:"
|
|
echo ""
|
|
echo "$DOCKER_LOGS"
|
|
echo ""
|
|
echo "Missing sync height in logs: $SYNC_HEIGHT"
|
|
# Fail the tests, because Zebra and lightwalletd didn't log their sync heights,
|
|
# or the CI workflow sync height regex is wrong.
|
|
false
|
|
fi
|
|
|
|
echo "Found sync height in logs: $SYNC_HEIGHT"
|
|
echo "SYNC_HEIGHT=$SYNC_HEIGHT" >> "$GITHUB_ENV"
|
|
|
|
# Get the original cached state height from google cloud.
|
|
#
|
|
# If the height is missing from the image labels, uses zero instead.
|
|
#
|
|
# TODO: fail the job if needs_zebra_state but the height is missing
|
|
# we can make this change after all the old images have been deleted, this should happen around 15 September 2022
|
|
# we'll also need to do a manual checkpoint rebuild before opening the PR for this change
|
|
#
|
|
# Passes the original height to subsequent steps using $ORIGINAL_HEIGHT env variable.
|
|
- name: Get original cached state height from google cloud
|
|
run: |
|
|
ORIGINAL_HEIGHT="0"
|
|
|
|
if [[ -n "${{ format('{0}', needs.setup-with-cached-state.outputs.cached_disk_name) }}" ]]; then
|
|
ORIGINAL_HEIGHT=$(gcloud compute images list --filter="status=READY AND name=${{ needs.setup-with-cached-state.outputs.cached_disk_name }}" --format="value(labels.height)")
|
|
ORIGINAL_HEIGHT=${ORIGINAL_HEIGHT:-0}
|
|
echo "$CACHED_DISK_NAME height: $ORIGINAL_HEIGHT"
|
|
fi
|
|
|
|
echo "ORIGINAL_HEIGHT=$ORIGINAL_HEIGHT" >> "$GITHUB_ENV"
|
|
|
|
# Create an image from the state disk, which will be used for any tests that start
|
|
# after it is created. These tests can be in the same workflow, or in a different PR.
|
|
#
|
|
# Using the newest image makes future jobs faster, because it is closer to the chain tip.
|
|
#
|
|
# Skips creating updated images if the original image is less than $CACHED_STATE_UPDATE_LIMIT behind the current tip.
|
|
# Full sync images are always created.
|
|
#
|
|
# The image can contain:
|
|
# - Zebra cached state, or
|
|
# - Zebra + lightwalletd cached state.
|
|
# Which cached state is being saved to the disk is defined by ${{ inputs.disk_prefix }}.
|
|
#
|
|
# Google Cloud doesn't have an atomic image replacement operation.
|
|
# We don't want to delete and re-create the image, because that causes a ~5 minute
|
|
# window where might be no recent image. So we add an extra image with a unique name,
|
|
# which gets selected because it has a later creation time.
|
|
# This also simplifies the process of deleting old images,
|
|
# because we don't have to worry about accidentally deleting all the images.
|
|
#
|
|
# The timestamp makes images from the same commit unique,
|
|
# as long as they don't finish in the same second.
|
|
# (This is unlikely, because each image created by a workflow has a different name.)
|
|
#
|
|
# The image name must also be 63 characters or less.
|
|
#
|
|
# Force the image creation (--force) as the disk is still attached even though is not being
|
|
# used by the container.
|
|
- name: Create image from state disk
|
|
run: |
|
|
MINIMUM_UPDATE_HEIGHT=$((ORIGINAL_HEIGHT+CACHED_STATE_UPDATE_LIMIT))
|
|
if [[ -z "$UPDATE_SUFFIX" ]] || [[ "$SYNC_HEIGHT" -gt "$MINIMUM_UPDATE_HEIGHT" ]] || [[ "${{ inputs.force_save_to_disk }}" == "true" ]]; then
|
|
gcloud compute images create \
|
|
"${{ inputs.disk_prefix }}-${SHORT_GITHUB_REF}-${{ env.GITHUB_SHA_SHORT }}-v${{ env.STATE_VERSION }}-${NETWORK}-${{ inputs.disk_suffix }}${UPDATE_SUFFIX}-${TIME_SUFFIX}" \
|
|
--force \
|
|
--source-disk=${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }} \
|
|
--source-disk-zone=${{ vars.GCP_ZONE }} \
|
|
--storage-location=us \
|
|
--description="Created from commit ${{ env.GITHUB_SHA_SHORT }} with height ${{ env.SYNC_HEIGHT }} and database format ${{ env.DB_VERSION_SUMMARY }}" \
|
|
--labels="height=${{ env.SYNC_HEIGHT }},purpose=${{ inputs.disk_prefix }},commit=${{ env.GITHUB_SHA_SHORT }},state-version=${{ env.STATE_VERSION }},state-running-version=${RUNNING_DB_VERSION},initial-state-disk-version=${INITIAL_DISK_DB_VERSION},network=${NETWORK},target-height-kind=${{ inputs.disk_suffix }},update-flag=${UPDATE_SUFFIX},force-save=${{ inputs.force_save_to_disk }},updated-from-height=${ORIGINAL_HEIGHT},test-id=${{ inputs.test_id }},app-name=${{ inputs.app_name }}"
|
|
else
|
|
echo "Skipped cached state update because the new sync height $SYNC_HEIGHT was less than $CACHED_STATE_UPDATE_LIMIT blocks above the original height $ORIGINAL_HEIGHT"
|
|
fi
|
|
|
|
# delete the Google Cloud instance for this test
|
|
delete-instance:
|
|
name: Delete ${{ inputs.test_id }} instance
|
|
runs-on: ubuntu-latest
|
|
needs: [ create-state-image ]
|
|
# If a disk generation step timeouts (+6 hours) the previous job (creating the image) will be skipped.
|
|
# Even if the instance continues running, no image will be created, so it's better to delete it.
|
|
if: always()
|
|
continue-on-error: true
|
|
permissions:
|
|
contents: 'read'
|
|
id-token: 'write'
|
|
steps:
|
|
- uses: actions/checkout@v4.0.0
|
|
with:
|
|
persist-credentials: false
|
|
fetch-depth: '2'
|
|
- uses: r7kamura/rust-problem-matchers@v1.4.0
|
|
|
|
- name: Inject slug/short variables
|
|
uses: rlespinasse/github-slug-action@v4
|
|
with:
|
|
short-length: 7
|
|
|
|
# Setup gcloud CLI
|
|
- name: Authenticate to Google Cloud
|
|
id: auth
|
|
uses: google-github-actions/auth@v1.1.1
|
|
with:
|
|
workload_identity_provider: '${{ vars.GCP_WIF }}'
|
|
service_account: '${{ vars.GCP_DEPLOYMENTS_SA }}'
|
|
|
|
- name: Set up Cloud SDK
|
|
uses: google-github-actions/setup-gcloud@v1.1.1
|
|
|
|
# Deletes the instances that has been recently deployed in the actual commit after all
|
|
# previous jobs have run, no matter the outcome of the job.
|
|
- name: Delete test instance
|
|
continue-on-error: true
|
|
run: |
|
|
INSTANCE=$(gcloud compute instances list --filter=${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} --format='value(NAME)')
|
|
if [ -z "${INSTANCE}" ]; then
|
|
echo "No instance to delete"
|
|
else
|
|
gcloud compute instances delete "${INSTANCE}" --zone "${{ vars.GCP_ZONE }}" --delete-disks all --quiet
|
|
fi
|