fix(ci): Wait 1 day before creating cached state image updates (#5088)
* Increase search range for sync height * Update sync height regexes for zebrad and lwd cached states * Add labels to cached state images * Update deploy-gcp-tests.yml * Don't create new cached states for lwd updates * Add a missing line continuation * Fix a comment * Revert a mistaken comment change * Clarify a TODO comment * Partially revert to old docker height log handling * Use an output for the cached disk name
This commit is contained in:
parent
c6fd7aa96d
commit
a58b72c92b
|
@ -273,6 +273,7 @@ jobs:
|
||||||
uses: ./.github/workflows/deploy-gcp-tests.yml
|
uses: ./.github/workflows/deploy-gcp-tests.yml
|
||||||
if: ${{ !fromJSON(needs.get-available-disks.outputs.zebra_checkpoint_disk) || github.event.inputs.regenerate-disks == 'true' }}
|
if: ${{ !fromJSON(needs.get-available-disks.outputs.zebra_checkpoint_disk) || github.event.inputs.regenerate-disks == 'true' }}
|
||||||
with:
|
with:
|
||||||
|
app_name: zebrad
|
||||||
test_id: sync-to-checkpoint
|
test_id: sync-to-checkpoint
|
||||||
test_description: Test sync up to mandatory checkpoint
|
test_description: Test sync up to mandatory checkpoint
|
||||||
test_variables: '-e TEST_DISK_REBUILD=1 -e ZEBRA_FORCE_USE_COLOR=1'
|
test_variables: '-e TEST_DISK_REBUILD=1 -e ZEBRA_FORCE_USE_COLOR=1'
|
||||||
|
@ -291,6 +292,7 @@ jobs:
|
||||||
uses: ./.github/workflows/deploy-gcp-tests.yml
|
uses: ./.github/workflows/deploy-gcp-tests.yml
|
||||||
if: ${{ !cancelled() && !failure() && github.event.inputs.regenerate-disks != 'true' && github.event.inputs.run-full-sync != 'true' }}
|
if: ${{ !cancelled() && !failure() && github.event.inputs.regenerate-disks != 'true' && github.event.inputs.run-full-sync != 'true' }}
|
||||||
with:
|
with:
|
||||||
|
app_name: zebrad
|
||||||
test_id: sync-past-checkpoint
|
test_id: sync-past-checkpoint
|
||||||
test_description: Test full validation sync from a cached state
|
test_description: Test full validation sync from a cached state
|
||||||
test_variables: '-e TEST_CHECKPOINT_SYNC=1 -e ZEBRA_FORCE_USE_COLOR=1'
|
test_variables: '-e TEST_CHECKPOINT_SYNC=1 -e ZEBRA_FORCE_USE_COLOR=1'
|
||||||
|
@ -318,6 +320,7 @@ jobs:
|
||||||
# https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#running-your-workflow-based-on-the-head-or-base-branch-of-a-pull-request-1
|
# https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#running-your-workflow-based-on-the-head-or-base-branch-of-a-pull-request-1
|
||||||
if: ${{ (github.event_name == 'push' && github.ref_name == 'main') || !fromJSON(needs.get-available-disks.outputs.zebra_tip_disk) || github.event.inputs.run-full-sync == 'true' }}
|
if: ${{ (github.event_name == 'push' && github.ref_name == 'main') || !fromJSON(needs.get-available-disks.outputs.zebra_tip_disk) || github.event.inputs.run-full-sync == 'true' }}
|
||||||
with:
|
with:
|
||||||
|
app_name: zebrad
|
||||||
test_id: full-sync-to-tip
|
test_id: full-sync-to-tip
|
||||||
test_description: Test a full sync up to the tip
|
test_description: Test a full sync up to the tip
|
||||||
test_variables: '-e TEST_FULL_SYNC=1 -e ZEBRA_FORCE_USE_COLOR=1 -e FULL_SYNC_MAINNET_TIMEOUT_MINUTES=600'
|
test_variables: '-e TEST_FULL_SYNC=1 -e ZEBRA_FORCE_USE_COLOR=1 -e FULL_SYNC_MAINNET_TIMEOUT_MINUTES=600'
|
||||||
|
@ -417,8 +420,8 @@ jobs:
|
||||||
test_variables: '-e TEST_LWD_UPDATE_SYNC=1 -e ZEBRA_TEST_LIGHTWALLETD=1 -e ZEBRA_FORCE_USE_COLOR=1 -e ZEBRA_CACHED_STATE_DIR=/var/cache/zebrad-cache -e LIGHTWALLETD_DATA_DIR=/var/cache/lwd-cache'
|
test_variables: '-e TEST_LWD_UPDATE_SYNC=1 -e ZEBRA_TEST_LIGHTWALLETD=1 -e ZEBRA_FORCE_USE_COLOR=1 -e ZEBRA_CACHED_STATE_DIR=/var/cache/zebrad-cache -e LIGHTWALLETD_DATA_DIR=/var/cache/lwd-cache'
|
||||||
needs_zebra_state: true
|
needs_zebra_state: true
|
||||||
needs_lwd_state: true
|
needs_lwd_state: true
|
||||||
# update the disk on every PR, to increase CI speed
|
# since we do a full sync in every PR, the new cached state will only be a few minutes newer than the original one
|
||||||
saves_to_disk: true
|
saves_to_disk: false
|
||||||
disk_prefix: lwd-cache
|
disk_prefix: lwd-cache
|
||||||
disk_suffix: tip
|
disk_suffix: tip
|
||||||
root_state_path: '/var/cache'
|
root_state_path: '/var/cache'
|
||||||
|
|
|
@ -78,7 +78,7 @@ on:
|
||||||
required: false
|
required: false
|
||||||
type: string
|
type: string
|
||||||
default: 'zebra'
|
default: 'zebra'
|
||||||
description: 'Application name for Google Cloud instance metadata'
|
description: 'Application name, used to work out when a job is an update job'
|
||||||
|
|
||||||
env:
|
env:
|
||||||
# where we get the Docker image from
|
# where we get the Docker image from
|
||||||
|
@ -94,6 +94,9 @@ env:
|
||||||
# but we don't know how long it will be between jobs.
|
# but we don't know how long it will be between jobs.
|
||||||
# 200 lines is about 6-15 minutes of sync logs, or one panic log.
|
# 200 lines is about 6-15 minutes of sync logs, or one panic log.
|
||||||
EXTRA_LOG_LINES: 200
|
EXTRA_LOG_LINES: 200
|
||||||
|
# How many blocks to wait before creating an updated cached state image.
|
||||||
|
# 1 day is approximately 1152 blocks.
|
||||||
|
CACHED_STATE_UPDATE_LIMIT: 1152
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
# set up the test, if it doesn't use any cached state
|
# set up the test, if it doesn't use any cached state
|
||||||
|
@ -228,6 +231,8 @@ jobs:
|
||||||
name: Setup ${{ inputs.test_id }} test
|
name: Setup ${{ inputs.test_id }} test
|
||||||
if: ${{ inputs.needs_zebra_state }}
|
if: ${{ inputs.needs_zebra_state }}
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
outputs:
|
||||||
|
cached_disk_name: ${{ steps.get-disk-name.outputs.cached_disk_name }}
|
||||||
permissions:
|
permissions:
|
||||||
contents: 'read'
|
contents: 'read'
|
||||||
id-token: 'write'
|
id-token: 'write'
|
||||||
|
@ -340,6 +345,7 @@ jobs:
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo "Selected Disk: $CACHED_DISK_NAME"
|
echo "Selected Disk: $CACHED_DISK_NAME"
|
||||||
|
echo "::set-output name=cached_disk_name::$CACHED_DISK_NAME"
|
||||||
|
|
||||||
echo "STATE_VERSION=$LOCAL_STATE_VERSION" >> $GITHUB_ENV
|
echo "STATE_VERSION=$LOCAL_STATE_VERSION" >> $GITHUB_ENV
|
||||||
echo "CACHED_DISK_NAME=$CACHED_DISK_NAME" >> $GITHUB_ENV
|
echo "CACHED_DISK_NAME=$CACHED_DISK_NAME" >> $GITHUB_ENV
|
||||||
|
@ -1065,7 +1071,7 @@ jobs:
|
||||||
create-state-image:
|
create-state-image:
|
||||||
name: Create ${{ inputs.test_id }} cached state image
|
name: Create ${{ inputs.test_id }} cached state image
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
needs: [ test-result ]
|
needs: [ test-result, setup-with-cached-state ]
|
||||||
# We run exactly one of without-cached-state or with-cached-state, and we always skip the other one.
|
# We run exactly one of without-cached-state or with-cached-state, and we always skip the other one.
|
||||||
# Normally, if a job is skipped, all the jobs that depend on it are also skipped.
|
# Normally, if a job is skipped, all the jobs that depend on it are also skipped.
|
||||||
# So we need to override the default success() check to make this job run.
|
# So we need to override the default success() check to make this job run.
|
||||||
|
@ -1120,31 +1126,8 @@ jobs:
|
||||||
echo "STATE_VERSION: $LOCAL_STATE_VERSION"
|
echo "STATE_VERSION: $LOCAL_STATE_VERSION"
|
||||||
|
|
||||||
echo "STATE_VERSION=$LOCAL_STATE_VERSION" >> $GITHUB_ENV
|
echo "STATE_VERSION=$LOCAL_STATE_VERSION" >> $GITHUB_ENV
|
||||||
|
|
||||||
# Get the sync height from the test logs, which is later used as part of the
|
# Sets the $UPDATE_SUFFIX env var to "-u" if updating a previous cached state,
|
||||||
# disk description.
|
|
||||||
#
|
|
||||||
# The regex used to grep the sync height is provided by ${{ inputs.height_grep_text }},
|
|
||||||
# this allows to dynamically change the height as needed by different situations or
|
|
||||||
# based on the logs output from different tests
|
|
||||||
#
|
|
||||||
# Passes the sync height to subsequent steps using $SYNC_HEIGHT env variable
|
|
||||||
- name: Get sync height from logs
|
|
||||||
run: |
|
|
||||||
SYNC_HEIGHT=""
|
|
||||||
|
|
||||||
DOCKER_LOGS=$(\
|
|
||||||
gcloud compute ssh \
|
|
||||||
${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
|
||||||
--zone ${{ env.ZONE }} \
|
|
||||||
--quiet \
|
|
||||||
--ssh-flag="-o ServerAliveInterval=5" \
|
|
||||||
--command="docker logs ${{ inputs.test_id }} --tail 200")
|
|
||||||
|
|
||||||
SYNC_HEIGHT=$(echo $DOCKER_LOGS | grep -oE '${{ inputs.height_grep_text }}[0-9]+' | grep -oE '[0-9]+' | tail -1 || [[ $? == 1 ]])
|
|
||||||
echo "SYNC_HEIGHT=$SYNC_HEIGHT" >> $GITHUB_ENV
|
|
||||||
|
|
||||||
# Sets the $UPDATE_SUFFIX env var to "-u" if using cached state,
|
|
||||||
# and the empty string otherwise.
|
# and the empty string otherwise.
|
||||||
#
|
#
|
||||||
# Also sets a unique date and time suffix $TIME_SUFFIX.
|
# Also sets a unique date and time suffix $TIME_SUFFIX.
|
||||||
|
@ -1152,21 +1135,92 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
UPDATE_SUFFIX=""
|
UPDATE_SUFFIX=""
|
||||||
|
|
||||||
if [[ "${{ inputs.needs_zebra_state }}" == "true" ]]; then
|
if [[ "${{ inputs.needs_zebra_state }}" == "true" ]] && [[ "${{ inputs.app_name }}" == "zebrad" ]]; then
|
||||||
UPDATE_SUFFIX="-u"
|
UPDATE_SUFFIX="-u"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# TODO: find a better logic for the lwd-full-sync case
|
||||||
|
if [[ "${{ inputs.needs_lwd_state }}" == "true" ]] && [[ "${{ inputs.app_name }}" == "lightwalletd" ]] && [[ "${{ inputs.test_id }}" != 'lwd-full-sync' ]]; then
|
||||||
|
UPDATE_SUFFIX="-u"
|
||||||
|
fi
|
||||||
|
|
||||||
# We're going to delete old images after a month, so we don't need the year here
|
# We're going to delete old images after a month, so we don't need the year here
|
||||||
TIME_SUFFIX=$(date '+%m%d%H%M%S' --utc)
|
TIME_SUFFIX=$(date '+%m%d%H%M%S' --utc)
|
||||||
|
|
||||||
echo "UPDATE_SUFFIX=$UPDATE_SUFFIX" >> $GITHUB_ENV
|
echo "UPDATE_SUFFIX=$UPDATE_SUFFIX" >> $GITHUB_ENV
|
||||||
echo "TIME_SUFFIX=$TIME_SUFFIX" >> $GITHUB_ENV
|
echo "TIME_SUFFIX=$TIME_SUFFIX" >> $GITHUB_ENV
|
||||||
|
|
||||||
|
# Get the sync height from the test logs, which is later used as part of the
|
||||||
|
# disk description and labels.
|
||||||
|
#
|
||||||
|
# The regex used to grep the sync height is provided by ${{ inputs.height_grep_text }},
|
||||||
|
# this allows to dynamically change the height as needed by different situations or
|
||||||
|
# based on the logs output from different tests.
|
||||||
|
#
|
||||||
|
# If the sync height is missing from the logs, the job fails.
|
||||||
|
#
|
||||||
|
# Passes the sync height to subsequent steps using $SYNC_HEIGHT env variable.
|
||||||
|
- name: Get sync height from logs
|
||||||
|
run: |
|
||||||
|
SYNC_HEIGHT=""
|
||||||
|
|
||||||
|
DOCKER_LOGS=$( \
|
||||||
|
gcloud compute ssh \
|
||||||
|
${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
||||||
|
--zone ${{ env.ZONE }} \
|
||||||
|
--quiet \
|
||||||
|
--ssh-flag="-o ServerAliveInterval=5" \
|
||||||
|
--command=" \
|
||||||
|
docker logs ${{ inputs.test_id }} --tail 200 \
|
||||||
|
")
|
||||||
|
|
||||||
|
SYNC_HEIGHT=$( \
|
||||||
|
echo "$DOCKER_LOGS" | \
|
||||||
|
grep --extended-regexp --only-matching '${{ inputs.height_grep_text }}[0-9]+' | \
|
||||||
|
grep --extended-regexp --only-matching '[0-9]+' | \
|
||||||
|
tail -1 || \
|
||||||
|
[[ $? == 1 ]] \
|
||||||
|
)
|
||||||
|
|
||||||
|
if [[ -z "$SYNC_HEIGHT" ]]; then
|
||||||
|
echo "Missing sync height in logs: $SYNC_HEIGHT"
|
||||||
|
# Fail the tests, because Zebra and lightwalletd didn't log their sync heights,
|
||||||
|
# or the CI workflow sync height regex is wrong.
|
||||||
|
false
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Found sync height in logs: $SYNC_HEIGHT"
|
||||||
|
echo "SYNC_HEIGHT=$SYNC_HEIGHT" >> $GITHUB_ENV
|
||||||
|
|
||||||
|
# Get the original cached state height from google cloud.
|
||||||
|
#
|
||||||
|
# If the height is missing from the image labels, uses zero instead.
|
||||||
|
#
|
||||||
|
# TODO: fail the job if needs_zebra_state but the height is missing
|
||||||
|
# we can make this change after all the old images have been deleted, this should happen around 15 September 2022
|
||||||
|
# we'll also need to do a manual checkpoint rebuild before opening the PR for this change
|
||||||
|
#
|
||||||
|
# Passes the original height to subsequent steps using $ORIGINAL_HEIGHT env variable.
|
||||||
|
- name: Get original cached state height from google cloud
|
||||||
|
run: |
|
||||||
|
ORIGINAL_HEIGHT="0"
|
||||||
|
|
||||||
|
if [[ -n "${{ format('{0}', needs.setup-with-cached-state.outputs.cached_disk_name) }}" ]]; then
|
||||||
|
ORIGINAL_HEIGHT=$(gcloud compute images list --filter="name=${{ needs.setup-with-cached-state.outputs.cached_disk_name }}" --format="value(labels.height)")
|
||||||
|
ORIGINAL_HEIGHT=${ORIGINAL_HEIGHT:-0}
|
||||||
|
echo "$CACHED_DISK_NAME height: $ORIGINAL_HEIGHT"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "ORIGINAL_HEIGHT=$ORIGINAL_HEIGHT" >> $GITHUB_ENV
|
||||||
|
|
||||||
# Create an image from the state disk, which will be used for any tests that start
|
# Create an image from the state disk, which will be used for any tests that start
|
||||||
# after it is created. These tests can be in the same workflow, or in a different PR.
|
# after it is created. These tests can be in the same workflow, or in a different PR.
|
||||||
#
|
#
|
||||||
# Using the newest image makes future jobs faster, because it is closer to the chain tip.
|
# Using the newest image makes future jobs faster, because it is closer to the chain tip.
|
||||||
#
|
#
|
||||||
|
# Skips creating updated images if the original image is less than $CACHED_STATE_UPDATE_LIMIT behind the current tip.
|
||||||
|
# Full sync images are always created.
|
||||||
|
#
|
||||||
# The image can contain:
|
# The image can contain:
|
||||||
# - Zebra cached state, or
|
# - Zebra cached state, or
|
||||||
# - Zebra + lightwalletd cached state.
|
# - Zebra + lightwalletd cached state.
|
||||||
|
@ -1189,14 +1243,19 @@ jobs:
|
||||||
# used by the container.
|
# used by the container.
|
||||||
- name: Create image from state disk
|
- name: Create image from state disk
|
||||||
run: |
|
run: |
|
||||||
gcloud compute images create \
|
MINIMUM_UPDATE_HEIGHT=$((ORIGINAL_HEIGHT+CACHED_STATE_UPDATE_LIMIT))
|
||||||
"${{ inputs.disk_prefix }}-${SHORT_GITHUB_REF}-${{ env.GITHUB_SHA_SHORT }}-v${{ env.STATE_VERSION }}-${{ env.NETWORK }}-${{ inputs.disk_suffix }}${UPDATE_SUFFIX}-${TIME_SUFFIX}" \
|
if [[ -z "$UPDATE_SUFFIX" ]] || [[ "$SYNC_HEIGHT" -gt "$MINIMUM_UPDATE_HEIGHT" ]]; then
|
||||||
--force \
|
gcloud compute images create \
|
||||||
--source-disk=${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }} \
|
"${{ inputs.disk_prefix }}-${SHORT_GITHUB_REF}-${{ env.GITHUB_SHA_SHORT }}-v${{ env.STATE_VERSION }}-${{ env.NETWORK }}-${{ inputs.disk_suffix }}${UPDATE_SUFFIX}-${TIME_SUFFIX}" \
|
||||||
--source-disk-zone=${{ env.ZONE }} \
|
--force \
|
||||||
--storage-location=us \
|
--source-disk=${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }} \
|
||||||
--description="Created from commit ${{ env.GITHUB_SHA_SHORT }} with height ${{ env.SYNC_HEIGHT }}" \
|
--source-disk-zone=${{ env.ZONE }} \
|
||||||
--labels="height=${{ env.SYNC_HEIGHT }},purpose=${{ inputs.disk_prefix }},commit=${{ env.GITHUB_SHA_SHORT }},state-version=${{ env.STATE_VERSION }},network=${{ env.NETWORK }},target-height=${{ inputs.disk_suffix }},update-flag=${UPDATE_SUFFIX},test-id=${{ inputs.test_id }},app-name=${{ inputs.app_name }}"
|
--storage-location=us \
|
||||||
|
--description="Created from commit ${{ env.GITHUB_SHA_SHORT }} with height ${{ env.SYNC_HEIGHT }}" \
|
||||||
|
--labels="height=${{ env.SYNC_HEIGHT }},purpose=${{ inputs.disk_prefix }},commit=${{ env.GITHUB_SHA_SHORT }},state-version=${{ env.STATE_VERSION }},network=${{ env.NETWORK }},target-height-kind=${{ inputs.disk_suffix }},update-flag=${UPDATE_SUFFIX},updated-from-height=${ORIGINAL_HEIGHT},test-id=${{ inputs.test_id }},app-name=${{ inputs.app_name }}"
|
||||||
|
else
|
||||||
|
echo "Skipped cached state update because the new sync height $SYNC_HEIGHT was less than $CACHED_STATE_UPDATE_LIMIT blocks above the original height $ORIGINAL_HEIGHT"
|
||||||
|
fi
|
||||||
|
|
||||||
# delete the Google Cloud instance for this test
|
# delete the Google Cloud instance for this test
|
||||||
delete-instance:
|
delete-instance:
|
||||||
|
|
Loading…
Reference in New Issue