fix(ci): disk validation for docker volume mount (#7665)
* fix(ci): disk validation for docker volume mount * Use a symlink for lightwalletd cached state rather than mounting the same volume twice * Avoid "sdb seems to be busy" errors from docker by adding extra sleeps * Add a missing backslash * Remove symlink from workflow * Symlink lightwalletd path in entrypoint.sh * Retry on failure and check Docker logs * End ssh shell lines with explicit terminators * Delete Docker containers if Docker mount fails * Revert symlink changes in entrypoint.sh * Debug using lsof * Use correct lsof commands * Use correct syntax for lsof +D * fix(ci): make multiple validations before mounting Loop and checks for three conditions: The device `/dev/sdb` exists. No process is using the device `/dev/sdb`. No process is using the Docker volume directory. * fix: do not pre-mount docker volume The Docker version available with the newer `cos-stable` OS (https://cloud.google.com/release-notes#cos-109-17800-0-45) allows to mount the image when running it. Mounting it before makes the disk unavailable. * fix: remove extra `;` * fix: just confirm with `lsof` and show it's output * chore: reduce diff --------- Co-authored-by: teor <teor@riseup.net>
This commit is contained in:
parent
f3238fca1b
commit
5c3a02a1d0
|
@ -171,7 +171,7 @@ jobs:
|
||||||
--image-project=cos-cloud \
|
--image-project=cos-cloud \
|
||||||
--image-family=cos-stable \
|
--image-family=cos-stable \
|
||||||
--create-disk=name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",device-name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",size=300GB,type=pd-ssd \
|
--create-disk=name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",device-name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",size=300GB,type=pd-ssd \
|
||||||
--container-image=gcr.io/google-containers/ubuntu \
|
--container-image=gcr.io/google-containers/busybox \
|
||||||
--machine-type ${{ vars.GCP_LARGE_MACHINE }} \
|
--machine-type ${{ vars.GCP_LARGE_MACHINE }} \
|
||||||
--network-interface=subnet=${{ vars.GCP_SUBNETWORK }} \
|
--network-interface=subnet=${{ vars.GCP_SUBNETWORK }} \
|
||||||
--scopes cloud-platform \
|
--scopes cloud-platform \
|
||||||
|
@ -180,12 +180,9 @@ jobs:
|
||||||
--labels=app=${{ inputs.app_name }},environment=test,network=${NETWORK},github_ref=${{ env.GITHUB_REF_SLUG_URL }},test=${{ inputs.test_id }} \
|
--labels=app=${{ inputs.app_name }},environment=test,network=${NETWORK},github_ref=${{ env.GITHUB_REF_SLUG_URL }},test=${{ inputs.test_id }} \
|
||||||
--tags ${{ inputs.app_name }} \
|
--tags ${{ inputs.app_name }} \
|
||||||
--zone ${{ vars.GCP_ZONE }}
|
--zone ${{ vars.GCP_ZONE }}
|
||||||
sleep 60
|
|
||||||
|
|
||||||
# Create a docker volume with the new disk we just created.
|
# Format the mounted disk if the test doesn't use a cached state.
|
||||||
#
|
- name: Format ${{ inputs.test_id }} volume
|
||||||
# SSH into the just created VM, and create a docker volume with the newly created disk.
|
|
||||||
- name: Create ${{ inputs.test_id }} Docker volume
|
|
||||||
run: |
|
run: |
|
||||||
gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
||||||
--zone ${{ vars.GCP_ZONE }} \
|
--zone ${{ vars.GCP_ZONE }} \
|
||||||
|
@ -194,10 +191,11 @@ jobs:
|
||||||
--ssh-flag="-o ConnectTimeout=5" \
|
--ssh-flag="-o ConnectTimeout=5" \
|
||||||
--command \
|
--command \
|
||||||
"\
|
"\
|
||||||
|
while sudo lsof /dev/sdb; do \
|
||||||
|
echo 'Waiting for /dev/sdb to be free...'; \
|
||||||
|
sleep 10; \
|
||||||
|
done; \
|
||||||
sudo mkfs.ext4 -v /dev/sdb \
|
sudo mkfs.ext4 -v /dev/sdb \
|
||||||
&& \
|
|
||||||
sudo docker volume create --driver local --opt type=ext4 --opt device=/dev/sdb \
|
|
||||||
${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }} \
|
|
||||||
"
|
"
|
||||||
|
|
||||||
# Launch the test without any cached state
|
# Launch the test without any cached state
|
||||||
|
@ -215,7 +213,7 @@ jobs:
|
||||||
--tty \
|
--tty \
|
||||||
--detach \
|
--detach \
|
||||||
${{ inputs.test_variables }} \
|
${{ inputs.test_variables }} \
|
||||||
--mount type=volume,src=${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }},dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \
|
--mount type=volume,volume-driver=local,volume-opt=device=/dev/sdb,volume-opt=type=ext4,dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \
|
||||||
${{ vars.GAR_BASE }}/${{ vars.CI_IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \
|
${{ vars.GAR_BASE }}/${{ vars.CI_IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \
|
||||||
"
|
"
|
||||||
|
|
||||||
|
@ -296,6 +294,7 @@ jobs:
|
||||||
- name: Find ${{ inputs.test_id }} cached state disk
|
- name: Find ${{ inputs.test_id }} cached state disk
|
||||||
id: get-disk-name
|
id: get-disk-name
|
||||||
run: |
|
run: |
|
||||||
|
set -x
|
||||||
LOCAL_STATE_VERSION=$(grep -oE "DATABASE_FORMAT_VERSION: .* [0-9]+" "$GITHUB_WORKSPACE/zebra-state/src/constants.rs" | grep -oE "[0-9]+" | tail -n1)
|
LOCAL_STATE_VERSION=$(grep -oE "DATABASE_FORMAT_VERSION: .* [0-9]+" "$GITHUB_WORKSPACE/zebra-state/src/constants.rs" | grep -oE "[0-9]+" | tail -n1)
|
||||||
echo "STATE_VERSION: $LOCAL_STATE_VERSION"
|
echo "STATE_VERSION: $LOCAL_STATE_VERSION"
|
||||||
|
|
||||||
|
@ -373,7 +372,7 @@ jobs:
|
||||||
--image-project=cos-cloud \
|
--image-project=cos-cloud \
|
||||||
--image-family=cos-stable \
|
--image-family=cos-stable \
|
||||||
--create-disk=image=${{ env.CACHED_DISK_NAME }},name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",device-name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",size=300GB,type=pd-ssd \
|
--create-disk=image=${{ env.CACHED_DISK_NAME }},name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",device-name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",size=300GB,type=pd-ssd \
|
||||||
--container-image=gcr.io/google-containers/ubuntu \
|
--container-image=gcr.io/google-containers/busybox \
|
||||||
--machine-type ${{ vars.GCP_LARGE_MACHINE }} \
|
--machine-type ${{ vars.GCP_LARGE_MACHINE }} \
|
||||||
--network-interface=subnet=${{ vars.GCP_SUBNETWORK }} \
|
--network-interface=subnet=${{ vars.GCP_SUBNETWORK }} \
|
||||||
--scopes cloud-platform \
|
--scopes cloud-platform \
|
||||||
|
@ -384,24 +383,6 @@ jobs:
|
||||||
--zone ${{ vars.GCP_ZONE }}
|
--zone ${{ vars.GCP_ZONE }}
|
||||||
sleep 60
|
sleep 60
|
||||||
|
|
||||||
# Create a docker volume with the selected cached state.
|
|
||||||
#
|
|
||||||
# SSH into the just created VM and create a docker volume with the recently attached disk.
|
|
||||||
# (The cached state and disk are usually the same size,
|
|
||||||
# but the cached state can be smaller if we just increased the disk size.)
|
|
||||||
- name: Create ${{ inputs.test_id }} Docker volume
|
|
||||||
run: |
|
|
||||||
gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
|
||||||
--zone ${{ vars.GCP_ZONE }} \
|
|
||||||
--ssh-flag="-o ServerAliveInterval=5" \
|
|
||||||
--ssh-flag="-o ConnectionAttempts=20" \
|
|
||||||
--ssh-flag="-o ConnectTimeout=5" \
|
|
||||||
--command \
|
|
||||||
"\
|
|
||||||
sudo docker volume create --driver local --opt type=ext4 --opt device=/dev/sdb \
|
|
||||||
${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }} \
|
|
||||||
"
|
|
||||||
|
|
||||||
# Launch the test with the previously created Zebra-only cached state.
|
# Launch the test with the previously created Zebra-only cached state.
|
||||||
# Each test runs one of the "Launch test" steps, and skips the other.
|
# Each test runs one of the "Launch test" steps, and skips the other.
|
||||||
#
|
#
|
||||||
|
@ -432,14 +413,12 @@ jobs:
|
||||||
--ssh-flag="-o ConnectTimeout=5" \
|
--ssh-flag="-o ConnectTimeout=5" \
|
||||||
--command \
|
--command \
|
||||||
"\
|
"\
|
||||||
# Wait for the disk to be attached
|
|
||||||
while [[ ! -e /dev/sdb ]]; do sleep 1; done && \
|
|
||||||
sudo docker run \
|
sudo docker run \
|
||||||
--name ${{ inputs.test_id }} \
|
--name ${{ inputs.test_id }} \
|
||||||
--tty \
|
--tty \
|
||||||
--detach \
|
--detach \
|
||||||
${{ inputs.test_variables }} \
|
${{ inputs.test_variables }} \
|
||||||
--mount type=volume,src=${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }},dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \
|
--mount type=volume,volume-driver=local,volume-opt=device=/dev/sdb,volume-opt=type=ext4,dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \
|
||||||
${{ vars.GAR_BASE }}/${{ vars.CI_IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \
|
${{ vars.GAR_BASE }}/${{ vars.CI_IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \
|
||||||
"
|
"
|
||||||
|
|
||||||
|
@ -453,11 +432,13 @@ jobs:
|
||||||
# VM and to the container might require more steps in this workflow, and additional
|
# VM and to the container might require more steps in this workflow, and additional
|
||||||
# considerations.
|
# considerations.
|
||||||
#
|
#
|
||||||
# The disk mounted in the VM is located at /dev/sdb, we mount the root `/` of this disk to the docker
|
# The disk mounted in the VM is located at /dev/sdb, we want the root `/` of this disk to be
|
||||||
# container in two different paths:
|
# available in the docker container at two different paths:
|
||||||
# - /var/cache/zebrad-cache -> ${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} -> $ZEBRA_CACHED_STATE_DIR
|
# - /var/cache/zebrad-cache -> ${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} -> $ZEBRA_CACHED_STATE_DIR
|
||||||
# - /var/cache/lwd-cache -> ${{ inputs.root_state_path }}/${{ inputs.lwd_state_dir }} -> $LIGHTWALLETD_DATA_DIR
|
# - /var/cache/lwd-cache -> ${{ inputs.root_state_path }}/${{ inputs.lwd_state_dir }} -> $LIGHTWALLETD_DATA_DIR
|
||||||
#
|
#
|
||||||
|
# Currently we do this by mounting the same disk at both paths.
|
||||||
|
#
|
||||||
# This doesn't cause any path conflicts, because Zebra and lightwalletd create different
|
# This doesn't cause any path conflicts, because Zebra and lightwalletd create different
|
||||||
# subdirectories for their data. (But Zebra, lightwalletd, and the test harness must not
|
# subdirectories for their data. (But Zebra, lightwalletd, and the test harness must not
|
||||||
# delete the whole cache directory.)
|
# delete the whole cache directory.)
|
||||||
|
@ -482,15 +463,13 @@ jobs:
|
||||||
--ssh-flag="-o ConnectTimeout=5" \
|
--ssh-flag="-o ConnectTimeout=5" \
|
||||||
--command \
|
--command \
|
||||||
"\
|
"\
|
||||||
# Wait for the disk to be attached
|
|
||||||
while [[ ! -e /dev/sdb ]]; do sleep 1; done && \
|
|
||||||
sudo docker run \
|
sudo docker run \
|
||||||
--name ${{ inputs.test_id }} \
|
--name ${{ inputs.test_id }} \
|
||||||
--tty \
|
--tty \
|
||||||
--detach \
|
--detach \
|
||||||
${{ inputs.test_variables }} \
|
${{ inputs.test_variables }} \
|
||||||
--mount type=volume,src=${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }},dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \
|
--mount type=volume,volume-driver=local,volume-opt=device=/dev/sdb,volume-opt=type=ext4,dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \
|
||||||
--mount type=volume,src=${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }},dst=${{ inputs.root_state_path }}/${{ inputs.lwd_state_dir }} \
|
--mount type=volume,volume-driver=local,volume-opt=device=/dev/sdb,volume-opt=type=ext4,dst=${{ inputs.root_state_path }}/${{ inputs.lwd_state_dir }} \
|
||||||
${{ vars.GAR_BASE }}/${{ vars.CI_IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \
|
${{ vars.GAR_BASE }}/${{ vars.CI_IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \
|
||||||
"
|
"
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue