fix(ci): Wait 1 day before creating cached state image updates (#5088)
* Increase search range for sync height * Update sync height regexes for zebrad and lwd cached states * Add labels to cached state images * Update deploy-gcp-tests.yml * Don't create new cached states for lwd updates * Add a missing line continuation * Fix a comment * Revert a mistaken comment change * Clarify a TODO comment * Partially revert to old docker height log handling * Use an output for the cached disk name
This commit is contained in:
parent
c6fd7aa96d
commit
a58b72c92b
|
@ -273,6 +273,7 @@ jobs:
|
|||
uses: ./.github/workflows/deploy-gcp-tests.yml
|
||||
if: ${{ !fromJSON(needs.get-available-disks.outputs.zebra_checkpoint_disk) || github.event.inputs.regenerate-disks == 'true' }}
|
||||
with:
|
||||
app_name: zebrad
|
||||
test_id: sync-to-checkpoint
|
||||
test_description: Test sync up to mandatory checkpoint
|
||||
test_variables: '-e TEST_DISK_REBUILD=1 -e ZEBRA_FORCE_USE_COLOR=1'
|
||||
|
@ -291,6 +292,7 @@ jobs:
|
|||
uses: ./.github/workflows/deploy-gcp-tests.yml
|
||||
if: ${{ !cancelled() && !failure() && github.event.inputs.regenerate-disks != 'true' && github.event.inputs.run-full-sync != 'true' }}
|
||||
with:
|
||||
app_name: zebrad
|
||||
test_id: sync-past-checkpoint
|
||||
test_description: Test full validation sync from a cached state
|
||||
test_variables: '-e TEST_CHECKPOINT_SYNC=1 -e ZEBRA_FORCE_USE_COLOR=1'
|
||||
|
@ -318,6 +320,7 @@ jobs:
|
|||
# https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#running-your-workflow-based-on-the-head-or-base-branch-of-a-pull-request-1
|
||||
if: ${{ (github.event_name == 'push' && github.ref_name == 'main') || !fromJSON(needs.get-available-disks.outputs.zebra_tip_disk) || github.event.inputs.run-full-sync == 'true' }}
|
||||
with:
|
||||
app_name: zebrad
|
||||
test_id: full-sync-to-tip
|
||||
test_description: Test a full sync up to the tip
|
||||
test_variables: '-e TEST_FULL_SYNC=1 -e ZEBRA_FORCE_USE_COLOR=1 -e FULL_SYNC_MAINNET_TIMEOUT_MINUTES=600'
|
||||
|
@ -417,8 +420,8 @@ jobs:
|
|||
test_variables: '-e TEST_LWD_UPDATE_SYNC=1 -e ZEBRA_TEST_LIGHTWALLETD=1 -e ZEBRA_FORCE_USE_COLOR=1 -e ZEBRA_CACHED_STATE_DIR=/var/cache/zebrad-cache -e LIGHTWALLETD_DATA_DIR=/var/cache/lwd-cache'
|
||||
needs_zebra_state: true
|
||||
needs_lwd_state: true
|
||||
# update the disk on every PR, to increase CI speed
|
||||
saves_to_disk: true
|
||||
# since we do a full sync in every PR, the new cached state will only be a few minutes newer than the original one
|
||||
saves_to_disk: false
|
||||
disk_prefix: lwd-cache
|
||||
disk_suffix: tip
|
||||
root_state_path: '/var/cache'
|
||||
|
|
|
@ -78,7 +78,7 @@ on:
|
|||
required: false
|
||||
type: string
|
||||
default: 'zebra'
|
||||
description: 'Application name for Google Cloud instance metadata'
|
||||
description: 'Application name, used to work out when a job is an update job'
|
||||
|
||||
env:
|
||||
# where we get the Docker image from
|
||||
|
@ -94,6 +94,9 @@ env:
|
|||
# but we don't know how long it will be between jobs.
|
||||
# 200 lines is about 6-15 minutes of sync logs, or one panic log.
|
||||
EXTRA_LOG_LINES: 200
|
||||
# How many blocks to wait before creating an updated cached state image.
|
||||
# 1 day is approximately 1152 blocks.
|
||||
CACHED_STATE_UPDATE_LIMIT: 1152
|
||||
|
||||
jobs:
|
||||
# set up the test, if it doesn't use any cached state
|
||||
|
@ -228,6 +231,8 @@ jobs:
|
|||
name: Setup ${{ inputs.test_id }} test
|
||||
if: ${{ inputs.needs_zebra_state }}
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
cached_disk_name: ${{ steps.get-disk-name.outputs.cached_disk_name }}
|
||||
permissions:
|
||||
contents: 'read'
|
||||
id-token: 'write'
|
||||
|
@ -340,6 +345,7 @@ jobs:
|
|||
fi
|
||||
|
||||
echo "Selected Disk: $CACHED_DISK_NAME"
|
||||
echo "::set-output name=cached_disk_name::$CACHED_DISK_NAME"
|
||||
|
||||
echo "STATE_VERSION=$LOCAL_STATE_VERSION" >> $GITHUB_ENV
|
||||
echo "CACHED_DISK_NAME=$CACHED_DISK_NAME" >> $GITHUB_ENV
|
||||
|
@ -1065,7 +1071,7 @@ jobs:
|
|||
create-state-image:
|
||||
name: Create ${{ inputs.test_id }} cached state image
|
||||
runs-on: ubuntu-latest
|
||||
needs: [ test-result ]
|
||||
needs: [ test-result, setup-with-cached-state ]
|
||||
# We run exactly one of without-cached-state or with-cached-state, and we always skip the other one.
|
||||
# Normally, if a job is skipped, all the jobs that depend on it are also skipped.
|
||||
# So we need to override the default success() check to make this job run.
|
||||
|
@ -1121,30 +1127,7 @@ jobs:
|
|||
|
||||
echo "STATE_VERSION=$LOCAL_STATE_VERSION" >> $GITHUB_ENV
|
||||
|
||||
# Get the sync height from the test logs, which is later used as part of the
|
||||
# disk description.
|
||||
#
|
||||
# The regex used to grep the sync height is provided by ${{ inputs.height_grep_text }},
|
||||
# this allows to dynamically change the height as needed by different situations or
|
||||
# based on the logs output from different tests
|
||||
#
|
||||
# Passes the sync height to subsequent steps using $SYNC_HEIGHT env variable
|
||||
- name: Get sync height from logs
|
||||
run: |
|
||||
SYNC_HEIGHT=""
|
||||
|
||||
DOCKER_LOGS=$(\
|
||||
gcloud compute ssh \
|
||||
${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
||||
--zone ${{ env.ZONE }} \
|
||||
--quiet \
|
||||
--ssh-flag="-o ServerAliveInterval=5" \
|
||||
--command="docker logs ${{ inputs.test_id }} --tail 200")
|
||||
|
||||
SYNC_HEIGHT=$(echo $DOCKER_LOGS | grep -oE '${{ inputs.height_grep_text }}[0-9]+' | grep -oE '[0-9]+' | tail -1 || [[ $? == 1 ]])
|
||||
echo "SYNC_HEIGHT=$SYNC_HEIGHT" >> $GITHUB_ENV
|
||||
|
||||
# Sets the $UPDATE_SUFFIX env var to "-u" if using cached state,
|
||||
# Sets the $UPDATE_SUFFIX env var to "-u" if updating a previous cached state,
|
||||
# and the empty string otherwise.
|
||||
#
|
||||
# Also sets a unique date and time suffix $TIME_SUFFIX.
|
||||
|
@ -1152,7 +1135,12 @@ jobs:
|
|||
run: |
|
||||
UPDATE_SUFFIX=""
|
||||
|
||||
if [[ "${{ inputs.needs_zebra_state }}" == "true" ]]; then
|
||||
if [[ "${{ inputs.needs_zebra_state }}" == "true" ]] && [[ "${{ inputs.app_name }}" == "zebrad" ]]; then
|
||||
UPDATE_SUFFIX="-u"
|
||||
fi
|
||||
|
||||
# TODO: find a better logic for the lwd-full-sync case
|
||||
if [[ "${{ inputs.needs_lwd_state }}" == "true" ]] && [[ "${{ inputs.app_name }}" == "lightwalletd" ]] && [[ "${{ inputs.test_id }}" != 'lwd-full-sync' ]]; then
|
||||
UPDATE_SUFFIX="-u"
|
||||
fi
|
||||
|
||||
|
@ -1162,11 +1150,77 @@ jobs:
|
|||
echo "UPDATE_SUFFIX=$UPDATE_SUFFIX" >> $GITHUB_ENV
|
||||
echo "TIME_SUFFIX=$TIME_SUFFIX" >> $GITHUB_ENV
|
||||
|
||||
# Get the sync height from the test logs, which is later used as part of the
|
||||
# disk description and labels.
|
||||
#
|
||||
# The regex used to grep the sync height is provided by ${{ inputs.height_grep_text }},
|
||||
# this allows to dynamically change the height as needed by different situations or
|
||||
# based on the logs output from different tests.
|
||||
#
|
||||
# If the sync height is missing from the logs, the job fails.
|
||||
#
|
||||
# Passes the sync height to subsequent steps using $SYNC_HEIGHT env variable.
|
||||
- name: Get sync height from logs
|
||||
run: |
|
||||
SYNC_HEIGHT=""
|
||||
|
||||
DOCKER_LOGS=$( \
|
||||
gcloud compute ssh \
|
||||
${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
||||
--zone ${{ env.ZONE }} \
|
||||
--quiet \
|
||||
--ssh-flag="-o ServerAliveInterval=5" \
|
||||
--command=" \
|
||||
docker logs ${{ inputs.test_id }} --tail 200 \
|
||||
")
|
||||
|
||||
SYNC_HEIGHT=$( \
|
||||
echo "$DOCKER_LOGS" | \
|
||||
grep --extended-regexp --only-matching '${{ inputs.height_grep_text }}[0-9]+' | \
|
||||
grep --extended-regexp --only-matching '[0-9]+' | \
|
||||
tail -1 || \
|
||||
[[ $? == 1 ]] \
|
||||
)
|
||||
|
||||
if [[ -z "$SYNC_HEIGHT" ]]; then
|
||||
echo "Missing sync height in logs: $SYNC_HEIGHT"
|
||||
# Fail the tests, because Zebra and lightwalletd didn't log their sync heights,
|
||||
# or the CI workflow sync height regex is wrong.
|
||||
false
|
||||
fi
|
||||
|
||||
echo "Found sync height in logs: $SYNC_HEIGHT"
|
||||
echo "SYNC_HEIGHT=$SYNC_HEIGHT" >> $GITHUB_ENV
|
||||
|
||||
# Get the original cached state height from google cloud.
|
||||
#
|
||||
# If the height is missing from the image labels, uses zero instead.
|
||||
#
|
||||
# TODO: fail the job if needs_zebra_state but the height is missing
|
||||
# we can make this change after all the old images have been deleted, this should happen around 15 September 2022
|
||||
# we'll also need to do a manual checkpoint rebuild before opening the PR for this change
|
||||
#
|
||||
# Passes the original height to subsequent steps using $ORIGINAL_HEIGHT env variable.
|
||||
- name: Get original cached state height from google cloud
|
||||
run: |
|
||||
ORIGINAL_HEIGHT="0"
|
||||
|
||||
if [[ -n "${{ format('{0}', needs.setup-with-cached-state.outputs.cached_disk_name) }}" ]]; then
|
||||
ORIGINAL_HEIGHT=$(gcloud compute images list --filter="name=${{ needs.setup-with-cached-state.outputs.cached_disk_name }}" --format="value(labels.height)")
|
||||
ORIGINAL_HEIGHT=${ORIGINAL_HEIGHT:-0}
|
||||
echo "$CACHED_DISK_NAME height: $ORIGINAL_HEIGHT"
|
||||
fi
|
||||
|
||||
echo "ORIGINAL_HEIGHT=$ORIGINAL_HEIGHT" >> $GITHUB_ENV
|
||||
|
||||
# Create an image from the state disk, which will be used for any tests that start
|
||||
# after it is created. These tests can be in the same workflow, or in a different PR.
|
||||
#
|
||||
# Using the newest image makes future jobs faster, because it is closer to the chain tip.
|
||||
#
|
||||
# Skips creating updated images if the original image is less than $CACHED_STATE_UPDATE_LIMIT behind the current tip.
|
||||
# Full sync images are always created.
|
||||
#
|
||||
# The image can contain:
|
||||
# - Zebra cached state, or
|
||||
# - Zebra + lightwalletd cached state.
|
||||
|
@ -1189,14 +1243,19 @@ jobs:
|
|||
# used by the container.
|
||||
- name: Create image from state disk
|
||||
run: |
|
||||
gcloud compute images create \
|
||||
"${{ inputs.disk_prefix }}-${SHORT_GITHUB_REF}-${{ env.GITHUB_SHA_SHORT }}-v${{ env.STATE_VERSION }}-${{ env.NETWORK }}-${{ inputs.disk_suffix }}${UPDATE_SUFFIX}-${TIME_SUFFIX}" \
|
||||
--force \
|
||||
--source-disk=${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }} \
|
||||
--source-disk-zone=${{ env.ZONE }} \
|
||||
--storage-location=us \
|
||||
--description="Created from commit ${{ env.GITHUB_SHA_SHORT }} with height ${{ env.SYNC_HEIGHT }}" \
|
||||
--labels="height=${{ env.SYNC_HEIGHT }},purpose=${{ inputs.disk_prefix }},commit=${{ env.GITHUB_SHA_SHORT }},state-version=${{ env.STATE_VERSION }},network=${{ env.NETWORK }},target-height=${{ inputs.disk_suffix }},update-flag=${UPDATE_SUFFIX},test-id=${{ inputs.test_id }},app-name=${{ inputs.app_name }}"
|
||||
MINIMUM_UPDATE_HEIGHT=$((ORIGINAL_HEIGHT+CACHED_STATE_UPDATE_LIMIT))
|
||||
if [[ -z "$UPDATE_SUFFIX" ]] || [[ "$SYNC_HEIGHT" -gt "$MINIMUM_UPDATE_HEIGHT" ]]; then
|
||||
gcloud compute images create \
|
||||
"${{ inputs.disk_prefix }}-${SHORT_GITHUB_REF}-${{ env.GITHUB_SHA_SHORT }}-v${{ env.STATE_VERSION }}-${{ env.NETWORK }}-${{ inputs.disk_suffix }}${UPDATE_SUFFIX}-${TIME_SUFFIX}" \
|
||||
--force \
|
||||
--source-disk=${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }} \
|
||||
--source-disk-zone=${{ env.ZONE }} \
|
||||
--storage-location=us \
|
||||
--description="Created from commit ${{ env.GITHUB_SHA_SHORT }} with height ${{ env.SYNC_HEIGHT }}" \
|
||||
--labels="height=${{ env.SYNC_HEIGHT }},purpose=${{ inputs.disk_prefix }},commit=${{ env.GITHUB_SHA_SHORT }},state-version=${{ env.STATE_VERSION }},network=${{ env.NETWORK }},target-height-kind=${{ inputs.disk_suffix }},update-flag=${UPDATE_SUFFIX},updated-from-height=${ORIGINAL_HEIGHT},test-id=${{ inputs.test_id }},app-name=${{ inputs.app_name }}"
|
||||
else
|
||||
echo "Skipped cached state update because the new sync height $SYNC_HEIGHT was less than $CACHED_STATE_UPDATE_LIMIT blocks above the original height $ORIGINAL_HEIGHT"
|
||||
fi
|
||||
|
||||
# delete the Google Cloud instance for this test
|
||||
delete-instance:
|
||||
|
|
Loading…
Reference in New Issue