zebra/.github/workflows/deploy-gcp-tests.yml

439 lines
20 KiB
YAML

name: Deploy GCP tests
on:
workflow_call:
inputs:
network:
required: false
type: string
default: Mainnet
app_name:
required: false
type: string
default: 'zebra'
test_id:
required: true
type: string
test_description:
required: true
type: string
test_variables:
required: true
type: string
# TODO: find a better name
root_state_path:
required: false
type: string
default: '/zebrad-cache'
# TODO: find a better name
zebra_state_dir:
required: false
type: string
default: ''
description: 'Name of the Zebra cached state directory and input image prefix to search in GCP'
# TODO: find a better name
lwd_state_dir:
required: false
type: string
default: ''
description: 'Name of the Lightwalletd cached state directory and input image prefix to search in GCP'
disk_prefix:
required: false
type: string
default: 'zebrad-cache'
description: 'Used to name the image, and for tests that do not use a `zebra_state_dir` to work, but builds a cached state'
disk_suffix:
required: false
type: string
needs_zebra_state:
required: true
type: boolean
description: 'Indicates if a test needs a disk with a Zebra cached state to run'
needs_lwd_state:
required: false
type: boolean
description: 'Indicates if a test needs a disk with Lightwalletd cached state to run (which also includes a Zebra cached state)'
saves_to_disk:
required: true
type: boolean
height_grep_text:
required: false
type: string
env:
IMAGE_NAME: zebrad-test
GAR_BASE: us-docker.pkg.dev/zealous-zebra/zebra
ZONE: us-central1-a
MACHINE_TYPE: c2d-standard-16
jobs:
test-without-cached-state:
name: Run ${{ inputs.test_id }} test
if: ${{ !inputs.needs_zebra_state }}
runs-on: ubuntu-latest
permissions:
contents: 'read'
id-token: 'write'
steps:
- uses: actions/checkout@v3.0.2
with:
persist-credentials: false
- name: Inject slug/short variables
uses: rlespinasse/github-slug-action@v4
with:
short-length: 7
- name: Downcase network name for disks
run: |
NETWORK_CAPS=${{ inputs.network }}
echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV
# Setup gcloud CLI
- name: Authenticate to Google Cloud
id: auth
uses: google-github-actions/auth@v0.8.0
with:
workload_identity_provider: 'projects/143793276228/locations/global/workloadIdentityPools/github-actions/providers/github-oidc'
service_account: 'github-service-account@zealous-zebra.iam.gserviceaccount.com'
token_format: 'access_token'
- name: Create GCP compute instance
id: create-instance
run: |
gcloud compute instances create-with-container "${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" \
--boot-disk-size 100GB \
--boot-disk-type pd-ssd \
--create-disk name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",device-name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",size=100GB,type=pd-ssd \
--container-image debian:buster \
--container-restart-policy=never \
--machine-type ${{ env.MACHINE_TYPE }} \
--scopes cloud-platform \
--metadata=google-monitoring-enabled=true,google-logging-enabled=true \
--tags ${{ inputs.app_name }} \
--zone ${{ env.ZONE }}
sleep 60
- name: Run ${{ inputs.test_id }} test
run: |
gcloud compute ssh \
${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
--zone ${{ env.ZONE }} \
--quiet \
--ssh-flag="-o ServerAliveInterval=5" \
--command \
"\
sudo mkfs.ext4 /dev/sdb \
&& \
docker volume create --driver local --opt type=ext4 --opt device=/dev/sdb \
${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }} \
&& \
docker run ${{ inputs.test_variables }} -t --name ${{ inputs.test_id }} \
--mount type=volume,src=${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }},dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \
${{ env.GAR_BASE }}/${{ env.IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }}"
test-with-cached-state:
name: Run ${{ inputs.test_id }} test
if: ${{ inputs.needs_zebra_state }}
runs-on: ubuntu-latest
permissions:
contents: 'read'
id-token: 'write'
steps:
- uses: actions/checkout@v3.0.2
with:
persist-credentials: false
fetch-depth: '2'
- name: Inject slug/short variables
uses: rlespinasse/github-slug-action@v4
with:
short-length: 7
- name: Downcase network name for disks
run: |
NETWORK_CAPS=${{ inputs.network }}
echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV
# Setup gcloud CLI
- name: Authenticate to Google Cloud
id: auth
uses: google-github-actions/auth@v0.8.0
with:
workload_identity_provider: 'projects/143793276228/locations/global/workloadIdentityPools/github-actions/providers/github-oidc'
service_account: 'github-service-account@zealous-zebra.iam.gserviceaccount.com'
token_format: 'access_token'
# Find a cached state disk for this job, matching all of:
# - disk cached state (lwd_state_dir/zebra_state_dir or disk_prefix) - zebrad-cache or lwd-cache
# - state version (from the source code) - v{N}
# - network (network) - mainnet or testnet
# - disk target height kind (disk_suffix) - checkpoint or tip
#
# If the test needs a lightwalletd state (needs_lwd_state) set the variable DISK_PREFIX accordingly
# - To ${{ inputs.lwd_state_dir }}" if needed
# - To ${{ inputs.zebra_state_dir || inputs.disk_prefix }} if not
#
# If there are multiple disks:
# - prefer images generated from the `main` branch, then any other branch
# - prefer newer images to older images
#
# Passes the disk name to subsequent steps using $CACHED_DISK_NAME env variable
# Passes the state version to subsequent steps using $STATE_VERSION env variable
- name: Find cached state disk
id: get-disk-name
run: |
LOCAL_STATE_VERSION=$(grep -oE "DATABASE_FORMAT_VERSION: .* [0-9]+" "$GITHUB_WORKSPACE/zebra-state/src/constants.rs" | grep -oE "[0-9]+" | tail -n1)
echo "STATE_VERSION: $LOCAL_STATE_VERSION"
if [[ "${{ inputs.needs_lwd_state }}" == "true" ]]; then
DISK_PREFIX=${{ inputs.lwd_state_dir }}
else
DISK_PREFIX=${{ inputs.zebra_state_dir || inputs.disk_prefix }}
fi
# Try to find an image generated from the main branch
# Fields are listed in the "Create image from state disk" step
CACHED_DISK_NAME=$(gcloud compute images list --filter="name~${DISK_PREFIX}-main-[0-9a-f]+-v${LOCAL_STATE_VERSION}-${NETWORK}-${{ inputs.disk_suffix }}" --format="value(NAME)" --sort-by=~creationTimestamp --limit=1)
echo "main Disk: $CACHED_DISK_NAME"
if [[ -z "$CACHED_DISK_NAME" ]]; then
# Try to find an image generated from any other branch
CACHED_DISK_NAME=$(gcloud compute images list --filter="name~${DISK_PREFIX}-.+-[0-9a-f]+-v${LOCAL_STATE_VERSION}-${NETWORK}-${{ inputs.disk_suffix }}" --format="value(NAME)" --sort-by=~creationTimestamp --limit=1)
echo "Disk: $CACHED_DISK_NAME"
fi
if [[ -z "$CACHED_DISK_NAME" ]]; then
echo "No cached state disk available"
echo "Expected ${DISK_PREFIX}-(branch)-[0-9a-f]+-v${LOCAL_STATE_VERSION}-${NETWORK}-${{ inputs.disk_suffix }}"
echo "Cached state test jobs must depend on the cached state rebuild job"
exit 1
fi
echo "Description: $(gcloud compute images describe $CACHED_DISK_NAME --format='value(DESCRIPTION)')"
echo "STATE_VERSION=$LOCAL_STATE_VERSION" >> $GITHUB_ENV
echo "CACHED_DISK_NAME=$CACHED_DISK_NAME" >> $GITHUB_ENV
# Creates Compute Engine virtual machine and attach a cached state disk using the
# $CACHED_DISK_NAME variable as the source image to populate the disk cached state
- name: Create GCP compute instance
id: create-instance
run: |
gcloud compute instances create-with-container "${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" \
--boot-disk-size 100GB \
--boot-disk-type pd-ssd \
--create-disk image=${{ env.CACHED_DISK_NAME }},name="${{ inputs.disk_prefix }}-${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",device-name="${{ inputs.disk_prefix }}-${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",size=100GB,type=pd-ssd \
--container-image debian:buster \
--container-restart-policy=never \
--machine-type ${{ env.MACHINE_TYPE }} \
--scopes cloud-platform \
--metadata=google-monitoring-enabled=true,google-logging-enabled=true \
--tags ${{ inputs.app_name }} \
--zone ${{ env.ZONE }}
sleep 60
# SSH into the just created VM, and create a Docker container to run the incoming test
# from ${{ inputs.test_id }}, then create a docker volume with the recently attached disk.
# The disk will be mounted in ${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }}.
#
# The disk mounted in the VM is located at /dev/sdb, we mount the root `/` of this disk to the docker
# container in one path:
# - /var/cache/zebrad-cache -> ${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} -> $ZEBRA_CACHED_STATE_DIR
#
# This path must match the variable used by the tests in Rust, which are also set in
# `continous-integration-docker.yml` to be able to run this tests.
#
# Although we're mounting the disk root, Zebra will only respect the values from
# $ZEBRA_CACHED_STATE_DIR. The inputs like ${{ inputs.zebra_state_dir }} are only used
# to match that variable paths.
- name: Run ${{ inputs.test_id }} test
# This step mounts the volume only when a single cached state is needed, in this case
# the cached state from Zebra.
# lightwalletd-full-sync test is an exception to this rule, as it does not need a lwd cached state,
# but it does saves a lwd cached state
# TODO: we should find a better logic for this use cases
if: ${{ (inputs.needs_zebra_state && !inputs.needs_lwd_state) && inputs.test_id != 'lwd-full-sync' }}
run: |
gcloud compute ssh \
${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
--zone ${{ env.ZONE }} \
--quiet \
--ssh-flag="-o ServerAliveInterval=5" \
--command \
"\
docker volume create --driver local --opt type=ext4 --opt device=/dev/sdb \
${{ inputs.disk_prefix }}-${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }} \
&& \
docker run ${{ inputs.test_variables }} -t --name ${{ inputs.test_id }} \
--mount type=volume,src=${{ inputs.disk_prefix }}-${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }},dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \
${{ env.GAR_BASE }}/${{ env.IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }}"
# SSH into the just created VM, and create a Docker container to run the incoming test
# from ${{ inputs.test_id }}, then create a docker volume with the recently attached disk.
# The disk will be mounted in ${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }},
# and ${{ inputs.root_state_path }}/${{ inputs.lwd_state_dir }}
#
# In this step we're using the same disk for simplicity, as mounting multiple disks to the
# VM and to the container might require more steps in this workflow, and additional
# considerations.
#
# The disk mounted in the VM is located at /dev/sdb, we mount the root `/` of this disk to the docker
# container in two different paths:
# - /var/cache/zebrad-cache -> ${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} -> $ZEBRA_CACHED_STATE_DIR
# - /var/cache/lwd-cache -> ${{ inputs.root_state_path }}/${{ inputs.lwd_state_dir }} -> $LIGHTWALLETD_DATA_DIR
#
# This doesn't cause any path conflicts, because Zebra and lightwalletd create different
# subdirectories for their data. (But Zebra, lightwalletd, and the test harness must not
# delete the whole cache directory.)
#
# This paths must match the variables used by the tests in Rust, which are also set in
# `continous-integration-docker.yml` to be able to run this tests.
#
# Although we're mounting the disk root to both directories, Zebra and Lightwalletd
# will only respect the values from $ZEBRA_CACHED_STATE_DIR and $LIGHTWALLETD_DATA_DIR,
# the inputs like ${{ inputs.lwd_state_dir }} are only used to match those variables paths.
- name: Run ${{ inputs.test_id }} test
# This step mounts the volume only when both cached states are needed, in this case
# the cached state from Zebra and Lightwalletd
# lightwalletd-full-sync test is an exception to this rule, as it does not need a lwd cached state,
# but it does saves a lwd cached state
# TODO: we should find a better logic for this use cases
if: ${{ (inputs.needs_zebra_state && inputs.needs_lwd_state) || inputs.test_id == 'lwd-full-sync' }}
run: |
gcloud compute ssh \
${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
--zone ${{ env.ZONE }} \
--quiet \
--ssh-flag="-o ServerAliveInterval=5" \
--command \
"\
docker volume create --driver local --opt type=ext4 --opt device=/dev/sdb \
${{ inputs.disk_prefix }}-${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }} \
&& \
docker run ${{ inputs.test_variables }} -t --name ${{ inputs.test_id }} \
--mount type=volume,src=${{ inputs.disk_prefix }}-${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }},dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \
--mount type=volume,src=${{ inputs.disk_prefix }}-${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }},dst=${{ inputs.root_state_path }}/${{ inputs.lwd_state_dir }} \
${{ env.GAR_BASE }}/${{ env.IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }}"
create-state-image:
name: Create ${{ inputs.test_id }} cached state image
runs-on: ubuntu-latest
needs: [ test-without-cached-state, test-with-cached-state ]
if: ${{ inputs.saves_to_disk }}
permissions:
contents: 'read'
id-token: 'write'
steps:
- name: Inject slug/short variables
uses: rlespinasse/github-slug-action@v4
with:
short-length: 7
# Disk images in GCP are required to be in lowercase, but the blockchain network
# uses sentence case, so we need to downcase ${{ inputs.network }}
#
# Passes ${{ inputs.network }} to subsequent steps using $NETWORK env variable
- name: Downcase network name for disks
run: |
NETWORK_CAPS=${{ inputs.network }}
echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV
# Setup gcloud CLI
- name: Authenticate to Google Cloud
id: auth
uses: google-github-actions/auth@v0.7.3
with:
workload_identity_provider: 'projects/143793276228/locations/global/workloadIdentityPools/github-actions/providers/github-oidc'
service_account: 'github-service-account@zealous-zebra.iam.gserviceaccount.com'
token_format: 'access_token'
# Get the state version from the local constants.rs file to be used in the image creation,
# as the state version is part of the disk image name.
#
# Passes the state version to subsequent steps using $STATE_VERSION env variable
- name: Get state version from constants.rs
run: |
LOCAL_STATE_VERSION=$(grep -oE "DATABASE_FORMAT_VERSION: .* [0-9]+" $GITHUB_WORKSPACE/zebra-state/src/constants.rs | grep -oE "[0-9]+" | tail -n1)
echo "STATE_VERSION: $LOCAL_STATE_VERSION"
echo "STATE_VERSION=$LOCAL_STATE_VERSION" >> $GITHUB_ENV
# Get the sync height from the test logs, which is later used as part of the
# disk description.
#
# The regex used to grep the sync height is provided by ${{ inputs.height_grep_text }},
# this allows to dynamically change the height as needed by different situations or
# based on the logs output from different tests
#
# Passes the sync height to subsequent steps using $SYNC_HEIGHT env variable
- name: Get sync height from logs
run: |
SYNC_HEIGHT=""
DOCKER_LOGS=$(\
gcloud compute ssh \
${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
--zone ${{ env.ZONE }} \
--quiet \
--ssh-flag="-o ServerAliveInterval=5" \
--command="docker logs ${{ inputs.test_id }} --tail 20")
SYNC_HEIGHT=$(echo $DOCKER_LOGS | grep -oE '${{ inputs.height_grep_text }}\([0-9]+\)' | grep -oE '[0-9]+' | tail -1 || [[ $? == 1 ]])
echo "SYNC_HEIGHT=$SYNC_HEIGHT" >> $GITHUB_ENV
# Create an image from disk that will be used for following/other tests
# This image can contain:
# - Zebra cached state
# - Zebra + lightwalletd cached state
# Which cached state is being saved to the disk is defined by ${{ inputs.disk_prefix }}
#
# Force the image creation (--force) as the disk is still attached even though is not being
# used by the container
- name: Create image from state disk
run: |
gcloud compute images create ${{ inputs.disk_prefix }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-v${{ env.STATE_VERSION }}-${{ env.NETWORK }}-${{ inputs.disk_suffix }} \
--force \
--source-disk=${{ inputs.disk_prefix }}-${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }} \
--source-disk-zone=${{ env.ZONE }} \
--storage-location=us \
--description="Created from commit ${{ env.GITHUB_SHA_SHORT }} with height ${{ env.SYNC_HEIGHT }}"
delete-instance:
name: Delete ${{ inputs.test_id }} instance
runs-on: ubuntu-latest
needs: [ create-state-image ]
# If a disk generation step timeouts (+6 hours) the previous job (creating the image) will be skipped.
# Even if the instance continues running, no image will be created, so it's better to delete it.
if: always()
continue-on-error: true
permissions:
contents: 'read'
id-token: 'write'
steps:
- name: Inject slug/short variables
uses: rlespinasse/github-slug-action@v4
with:
short-length: 7
# Setup gcloud CLI
- name: Authenticate to Google Cloud
id: auth
uses: google-github-actions/auth@v0.7.3
with:
workload_identity_provider: 'projects/143793276228/locations/global/workloadIdentityPools/github-actions/providers/github-oidc'
service_account: 'github-service-account@zealous-zebra.iam.gserviceaccount.com'
token_format: 'access_token'
# Deletes the instances that has been recently deployed in the actual commit after all
# previous jobs have run, no matter the outcome of the job.
- name: Delete test instance
continue-on-error: true
run: |
INSTANCE=$(gcloud compute instances list --filter=${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} --format='value(NAME)')
if [ -z "${INSTANCE}" ]; then
echo "No instance to delete"
else
gcloud compute instances delete "${INSTANCE}" --zone "${{ env.ZONE }}" --delete-disks all --quiet
fi