From 25b46ea0ecdf931d6a496838b0b6c210ccf65dfd Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Sun, 16 Oct 2022 08:01:59 -0400 Subject: [PATCH] ci(disk): use an official GCP image on CI VMs for disk auto-resizing, make CI & CD disks 300GB (#5371) * Revert "ci(ssh): connect using `ssh-compute` action by Google (#5330)" This reverts commit b366d6e7bb41125fbb861aa551610b3c3de7a544. * ci(ssh): use sudo for docker commands if user is not root * ci(ssh): specify the service account to connect with * ci(ssh): increase the Google Cloud instance sshd connection limit * chore: add a new line at the end of the script * chore: update our VM image to bullseye * chore: fix `tj-actions/changed-files` file comparison * ci(disk): use an official image on CI VMs for disk auto-resizing Previous behavior: We've presented issues in the past with resizing as the device is busy, for example: ``` e2fsck: Cannot continue, aborting. /dev/sdb is in use. ``` Expected behavior: We've been manually resizing the disk as this task was not being done automatically, but having an official Public Image from GCP would make this easier (automatic) and it also integrates better with other GCP services Configuration differences: https://cloud.google.com/compute/docs/images/os-details#notable-difference-debian Solution: - Use `debian-11` from the official public images https://cloud.google.com/compute/docs/images/os-details#debian - Remove the manual disk resizing from the pipeline * ci: increase VM disk size to fit future cached states sizes Some GCP disk images are 160 GB, which means they could get to the current 200 GB size soon. --- .github/workflows/continous-delivery.yml | 6 +++--- .github/workflows/deploy-gcp-tests.yml | 19 +++++++------------ 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/.github/workflows/continous-delivery.yml b/.github/workflows/continous-delivery.yml index 796af11ca..43bce253b 100644 --- a/.github/workflows/continous-delivery.yml +++ b/.github/workflows/continous-delivery.yml @@ -118,7 +118,7 @@ jobs: gcloud compute instance-templates create-with-container zebrad-${{ needs.versioning.outputs.major_version || env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ --boot-disk-type=pd-ssd \ --container-image ${{ env.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} \ - --create-disk name=zebrad-cache-${{ env.GITHUB_SHA_SHORT }},auto-delete=yes,size=200GB,type=pd-ssd \ + --create-disk name=zebrad-cache-${{ env.GITHUB_SHA_SHORT }},auto-delete=yes,size=300GB,type=pd-ssd \ --container-mount-disk mount-path="/zebrad-cache",name=zebrad-cache-${{ env.GITHUB_SHA_SHORT }} \ --machine-type ${{ env.MACHINE_TYPE }} \ --scopes cloud-platform \ @@ -189,12 +189,12 @@ jobs: - name: Manual deploy of a single instance running zebrad run: | gcloud compute instances create-with-container "zebrad-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" \ - --boot-disk-size 200GB \ + --boot-disk-size 300GB \ --boot-disk-type=pd-ssd \ --container-stdin \ --container-tty \ --container-image ${{ env.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} \ - --create-disk name=zebrad-cache-${{ env.GITHUB_SHA_SHORT }},auto-delete=yes,size=200GB,type=pd-ssd \ + --create-disk name=zebrad-cache-${{ env.GITHUB_SHA_SHORT }},auto-delete=yes,size=300GB,type=pd-ssd \ --container-mount-disk mount-path='/zebrad-cache',name=zebrad-cache-${{ env.GITHUB_SHA_SHORT }} \ --machine-type ${{ env.MACHINE_TYPE }} \ --zone ${{ env.ZONE }} \ diff --git a/.github/workflows/deploy-gcp-tests.yml b/.github/workflows/deploy-gcp-tests.yml index 9d43a853c..255b625a9 100644 --- a/.github/workflows/deploy-gcp-tests.yml +++ b/.github/workflows/deploy-gcp-tests.yml @@ -139,10 +139,10 @@ jobs: id: create-instance run: | gcloud compute instances create-with-container "${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" \ - --boot-disk-size 200GB \ + --boot-disk-size 300GB \ --boot-disk-type pd-ssd \ - --create-disk name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",device-name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",size=200GB,type=pd-ssd \ - --container-image debian:bullseye \ + --create-disk name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",device-name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",size=300GB,type=pd-ssd \ + --container-image debian-11 \ --container-restart-policy=never \ --machine-type ${{ env.MACHINE_TYPE }} \ --scopes cloud-platform \ @@ -361,10 +361,10 @@ jobs: id: create-instance run: | gcloud compute instances create-with-container "${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" \ - --boot-disk-size 200GB \ + --boot-disk-size 300GB \ --boot-disk-type pd-ssd \ - --create-disk image=${{ env.CACHED_DISK_NAME }},name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",device-name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",size=200GB,type=pd-ssd \ - --container-image debian:bullseye \ + --create-disk image=${{ env.CACHED_DISK_NAME }},name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",device-name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",size=300GB,type=pd-ssd \ + --container-image debian-11 \ --container-restart-policy=never \ --machine-type ${{ env.MACHINE_TYPE }} \ --scopes cloud-platform \ @@ -376,8 +376,7 @@ jobs: # Create a docker volume with the selected cached state. # - # SSH into the just created VM, expand the partition and filesystem to fill the entire disk, - # then create a docker volume with the recently attached disk. + # SSH into the just created VM and create a docker volume with the recently attached disk. # (The cached state and disk are usually the same size, # but the cached state can be smaller if we just increased the disk size.) - name: Create ${{ inputs.test_id }} Docker volume @@ -391,10 +390,6 @@ jobs: --ssh-flag="-o ConnectTimeout=5" \ --command \ "\ - sudo e2fsck -v -f -p /dev/sdb \ - && \ - sudo resize2fs -p /dev/sdb \ - && \ sudo docker volume create --driver local --opt type=ext4 --opt device=/dev/sdb \ ${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }} \ "