fix(ci): disk validation for docker volume mount (#7665)

* fix(ci): disk validation for docker volume mount

* Use a symlink for lightwalletd cached state rather than mounting the same volume twice

* Avoid "sdb seems to be busy" errors from docker by adding extra sleeps

* Add a missing backslash

* Remove symlink from workflow

* Symlink lightwalletd path in entrypoint.sh

* Retry on failure and check Docker logs

* End ssh shell lines with explicit terminators

* Delete Docker containers if Docker mount fails

* Revert symlink changes in entrypoint.sh

* Debug using lsof

* Use correct lsof commands

* Use correct syntax for lsof +D

* fix(ci): make multiple validations before mounting

Loop and checks for three conditions:
The device `/dev/sdb` exists.
No process is using the device `/dev/sdb`.
No process is using the Docker volume directory.

* fix: do not pre-mount docker volume

The Docker version available with the newer `cos-stable` OS (https://cloud.google.com/release-notes#cos-109-17800-0-45) allows to mount the image when running it.

Mounting it before makes the disk unavailable.

* fix: remove extra `;`

* fix: just confirm with `lsof` and show it's output

* chore: reduce diff

---------

Co-authored-by: teor <teor@riseup.net>
This commit is contained in:
Gustavo Valverde 2023-10-06 14:00:57 +01:00 committed by GitHub
parent f3238fca1b
commit 5c3a02a1d0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 17 additions and 38 deletions

View File

@ -171,7 +171,7 @@ jobs:
--image-project=cos-cloud \ --image-project=cos-cloud \
--image-family=cos-stable \ --image-family=cos-stable \
--create-disk=name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",device-name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",size=300GB,type=pd-ssd \ --create-disk=name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",device-name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",size=300GB,type=pd-ssd \
--container-image=gcr.io/google-containers/ubuntu \ --container-image=gcr.io/google-containers/busybox \
--machine-type ${{ vars.GCP_LARGE_MACHINE }} \ --machine-type ${{ vars.GCP_LARGE_MACHINE }} \
--network-interface=subnet=${{ vars.GCP_SUBNETWORK }} \ --network-interface=subnet=${{ vars.GCP_SUBNETWORK }} \
--scopes cloud-platform \ --scopes cloud-platform \
@ -180,12 +180,9 @@ jobs:
--labels=app=${{ inputs.app_name }},environment=test,network=${NETWORK},github_ref=${{ env.GITHUB_REF_SLUG_URL }},test=${{ inputs.test_id }} \ --labels=app=${{ inputs.app_name }},environment=test,network=${NETWORK},github_ref=${{ env.GITHUB_REF_SLUG_URL }},test=${{ inputs.test_id }} \
--tags ${{ inputs.app_name }} \ --tags ${{ inputs.app_name }} \
--zone ${{ vars.GCP_ZONE }} --zone ${{ vars.GCP_ZONE }}
sleep 60
# Create a docker volume with the new disk we just created. # Format the mounted disk if the test doesn't use a cached state.
# - name: Format ${{ inputs.test_id }} volume
# SSH into the just created VM, and create a docker volume with the newly created disk.
- name: Create ${{ inputs.test_id }} Docker volume
run: | run: |
gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
--zone ${{ vars.GCP_ZONE }} \ --zone ${{ vars.GCP_ZONE }} \
@ -194,10 +191,11 @@ jobs:
--ssh-flag="-o ConnectTimeout=5" \ --ssh-flag="-o ConnectTimeout=5" \
--command \ --command \
"\ "\
while sudo lsof /dev/sdb; do \
echo 'Waiting for /dev/sdb to be free...'; \
sleep 10; \
done; \
sudo mkfs.ext4 -v /dev/sdb \ sudo mkfs.ext4 -v /dev/sdb \
&& \
sudo docker volume create --driver local --opt type=ext4 --opt device=/dev/sdb \
${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }} \
" "
# Launch the test without any cached state # Launch the test without any cached state
@ -215,7 +213,7 @@ jobs:
--tty \ --tty \
--detach \ --detach \
${{ inputs.test_variables }} \ ${{ inputs.test_variables }} \
--mount type=volume,src=${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }},dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \ --mount type=volume,volume-driver=local,volume-opt=device=/dev/sdb,volume-opt=type=ext4,dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \
${{ vars.GAR_BASE }}/${{ vars.CI_IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \ ${{ vars.GAR_BASE }}/${{ vars.CI_IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \
" "
@ -296,6 +294,7 @@ jobs:
- name: Find ${{ inputs.test_id }} cached state disk - name: Find ${{ inputs.test_id }} cached state disk
id: get-disk-name id: get-disk-name
run: | run: |
set -x
LOCAL_STATE_VERSION=$(grep -oE "DATABASE_FORMAT_VERSION: .* [0-9]+" "$GITHUB_WORKSPACE/zebra-state/src/constants.rs" | grep -oE "[0-9]+" | tail -n1) LOCAL_STATE_VERSION=$(grep -oE "DATABASE_FORMAT_VERSION: .* [0-9]+" "$GITHUB_WORKSPACE/zebra-state/src/constants.rs" | grep -oE "[0-9]+" | tail -n1)
echo "STATE_VERSION: $LOCAL_STATE_VERSION" echo "STATE_VERSION: $LOCAL_STATE_VERSION"
@ -373,7 +372,7 @@ jobs:
--image-project=cos-cloud \ --image-project=cos-cloud \
--image-family=cos-stable \ --image-family=cos-stable \
--create-disk=image=${{ env.CACHED_DISK_NAME }},name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",device-name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",size=300GB,type=pd-ssd \ --create-disk=image=${{ env.CACHED_DISK_NAME }},name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",device-name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",size=300GB,type=pd-ssd \
--container-image=gcr.io/google-containers/ubuntu \ --container-image=gcr.io/google-containers/busybox \
--machine-type ${{ vars.GCP_LARGE_MACHINE }} \ --machine-type ${{ vars.GCP_LARGE_MACHINE }} \
--network-interface=subnet=${{ vars.GCP_SUBNETWORK }} \ --network-interface=subnet=${{ vars.GCP_SUBNETWORK }} \
--scopes cloud-platform \ --scopes cloud-platform \
@ -384,24 +383,6 @@ jobs:
--zone ${{ vars.GCP_ZONE }} --zone ${{ vars.GCP_ZONE }}
sleep 60 sleep 60
# Create a docker volume with the selected cached state.
#
# SSH into the just created VM and create a docker volume with the recently attached disk.
# (The cached state and disk are usually the same size,
# but the cached state can be smaller if we just increased the disk size.)
- name: Create ${{ inputs.test_id }} Docker volume
run: |
gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
--zone ${{ vars.GCP_ZONE }} \
--ssh-flag="-o ServerAliveInterval=5" \
--ssh-flag="-o ConnectionAttempts=20" \
--ssh-flag="-o ConnectTimeout=5" \
--command \
"\
sudo docker volume create --driver local --opt type=ext4 --opt device=/dev/sdb \
${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }} \
"
# Launch the test with the previously created Zebra-only cached state. # Launch the test with the previously created Zebra-only cached state.
# Each test runs one of the "Launch test" steps, and skips the other. # Each test runs one of the "Launch test" steps, and skips the other.
# #
@ -432,14 +413,12 @@ jobs:
--ssh-flag="-o ConnectTimeout=5" \ --ssh-flag="-o ConnectTimeout=5" \
--command \ --command \
"\ "\
# Wait for the disk to be attached
while [[ ! -e /dev/sdb ]]; do sleep 1; done && \
sudo docker run \ sudo docker run \
--name ${{ inputs.test_id }} \ --name ${{ inputs.test_id }} \
--tty \ --tty \
--detach \ --detach \
${{ inputs.test_variables }} \ ${{ inputs.test_variables }} \
--mount type=volume,src=${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }},dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \ --mount type=volume,volume-driver=local,volume-opt=device=/dev/sdb,volume-opt=type=ext4,dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \
${{ vars.GAR_BASE }}/${{ vars.CI_IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \ ${{ vars.GAR_BASE }}/${{ vars.CI_IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \
" "
@ -453,11 +432,13 @@ jobs:
# VM and to the container might require more steps in this workflow, and additional # VM and to the container might require more steps in this workflow, and additional
# considerations. # considerations.
# #
# The disk mounted in the VM is located at /dev/sdb, we mount the root `/` of this disk to the docker # The disk mounted in the VM is located at /dev/sdb, we want the root `/` of this disk to be
# container in two different paths: # available in the docker container at two different paths:
# - /var/cache/zebrad-cache -> ${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} -> $ZEBRA_CACHED_STATE_DIR # - /var/cache/zebrad-cache -> ${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} -> $ZEBRA_CACHED_STATE_DIR
# - /var/cache/lwd-cache -> ${{ inputs.root_state_path }}/${{ inputs.lwd_state_dir }} -> $LIGHTWALLETD_DATA_DIR # - /var/cache/lwd-cache -> ${{ inputs.root_state_path }}/${{ inputs.lwd_state_dir }} -> $LIGHTWALLETD_DATA_DIR
# #
# Currently we do this by mounting the same disk at both paths.
#
# This doesn't cause any path conflicts, because Zebra and lightwalletd create different # This doesn't cause any path conflicts, because Zebra and lightwalletd create different
# subdirectories for their data. (But Zebra, lightwalletd, and the test harness must not # subdirectories for their data. (But Zebra, lightwalletd, and the test harness must not
# delete the whole cache directory.) # delete the whole cache directory.)
@ -482,15 +463,13 @@ jobs:
--ssh-flag="-o ConnectTimeout=5" \ --ssh-flag="-o ConnectTimeout=5" \
--command \ --command \
"\ "\
# Wait for the disk to be attached
while [[ ! -e /dev/sdb ]]; do sleep 1; done && \
sudo docker run \ sudo docker run \
--name ${{ inputs.test_id }} \ --name ${{ inputs.test_id }} \
--tty \ --tty \
--detach \ --detach \
${{ inputs.test_variables }} \ ${{ inputs.test_variables }} \
--mount type=volume,src=${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }},dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \ --mount type=volume,volume-driver=local,volume-opt=device=/dev/sdb,volume-opt=type=ext4,dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \
--mount type=volume,src=${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }},dst=${{ inputs.root_state_path }}/${{ inputs.lwd_state_dir }} \ --mount type=volume,volume-driver=local,volume-opt=device=/dev/sdb,volume-opt=type=ext4,dst=${{ inputs.root_state_path }}/${{ inputs.lwd_state_dir }} \
${{ vars.GAR_BASE }}/${{ vars.CI_IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \ ${{ vars.GAR_BASE }}/${{ vars.CI_IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \
" "