fix(ci): Split Docker logs into sprout, other checkpoints, and full validation (#4704)
* Checkout zebra in each job to avoid warnings
But put TODOs where we might be able to skip checkouts
* Split log following into sprout checkpoints, sapling/orchard checkpoints, and full validation
* Make job IDs shorter
* Use /dev/stderr because docker doesn't have a tty
* remove pipefail
* Revert "remove pipefail"
This reverts commit a7ee37bebdc107a4215e7dd307b189d925969234.
* Make tee ignore errors writing to a grep pipe
* Avoid launching multiple docker instances for duplicate jobs
* Ignore broken pipe error messages and statuses
* fix(ci): docker wait not finding container
We had this issue before, I can't recall if this was a parsing error between GitHub Actions and gcloud `--command` parsing, but we had to change this into two pieces.
This implementation keeps it how we did it before 9b9578c999/.github/workflows/test.yml (L235-L243)
* docs: remove pending TODO
We can't remove `actions/checkout` nor set `create_credentials_file` to `false` as next steps won't be able to authenticate to GCP.
We can surely remove `actions/checkout` and leave `create_credentials_file` as `true`, but this will raise a warning on each step, and there's no benefit of doing so.
* Show `docker wait` and `gcloud ssh` output
* If `docker wait` fails, get the exit code using `docker inspect`
Co-authored-by: Conrado Gouvea <conrado@zfnd.org>
Co-authored-by: Gustavo Valverde <gustavo@iterativo.do>
Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
This commit is contained in:
parent
c8cdf0617c
commit
67dc26fbb5
|
@ -75,10 +75,19 @@ on:
|
||||||
description: 'Application name for Google Cloud instance metadata'
|
description: 'Application name for Google Cloud instance metadata'
|
||||||
|
|
||||||
env:
|
env:
|
||||||
|
# where we get the Docker image from
|
||||||
IMAGE_NAME: zebrad-test
|
IMAGE_NAME: zebrad-test
|
||||||
GAR_BASE: us-docker.pkg.dev/zealous-zebra/zebra
|
GAR_BASE: us-docker.pkg.dev/zealous-zebra/zebra
|
||||||
|
# what kind of Google Cloud instance we want to launch
|
||||||
ZONE: us-central1-a
|
ZONE: us-central1-a
|
||||||
MACHINE_TYPE: c2d-standard-16
|
MACHINE_TYPE: c2d-standard-16
|
||||||
|
# How many previous log lines we show at the start of each new log job.
|
||||||
|
# Increase this number if some log lines are skipped between jobs
|
||||||
|
#
|
||||||
|
# We want to show all the logs since the last job finished,
|
||||||
|
# but we don't know how long it will be between jobs.
|
||||||
|
# 200 lines is about 6-15 minutes of sync logs, or one panic log.
|
||||||
|
EXTRA_LOG_LINES: 200
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
# set up the test, if it doesn't use any cached state
|
# set up the test, if it doesn't use any cached state
|
||||||
|
@ -94,6 +103,7 @@ jobs:
|
||||||
- uses: actions/checkout@v3.0.2
|
- uses: actions/checkout@v3.0.2
|
||||||
with:
|
with:
|
||||||
persist-credentials: false
|
persist-credentials: false
|
||||||
|
fetch-depth: '2'
|
||||||
|
|
||||||
- name: Inject slug/short variables
|
- name: Inject slug/short variables
|
||||||
uses: rlespinasse/github-slug-action@v4
|
uses: rlespinasse/github-slug-action@v4
|
||||||
|
@ -150,9 +160,8 @@ jobs:
|
||||||
launch-without-cached-state:
|
launch-without-cached-state:
|
||||||
name: Launch ${{ inputs.test_id }} test
|
name: Launch ${{ inputs.test_id }} test
|
||||||
needs: [ setup-without-cached-state ]
|
needs: [ setup-without-cached-state ]
|
||||||
# If the previous job fails, we also want to run and fail this job,
|
# If creating the Google Cloud instance fails, we don't want to launch another docker instance.
|
||||||
# so that the branch protection rule fails in Mergify and GitHub.
|
if: ${{ !cancelled() && !failure() && !inputs.needs_zebra_state }}
|
||||||
if: ${{ !cancelled() && !inputs.needs_zebra_state }}
|
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
permissions:
|
permissions:
|
||||||
contents: 'read'
|
contents: 'read'
|
||||||
|
@ -161,6 +170,7 @@ jobs:
|
||||||
- uses: actions/checkout@v3.0.2
|
- uses: actions/checkout@v3.0.2
|
||||||
with:
|
with:
|
||||||
persist-credentials: false
|
persist-credentials: false
|
||||||
|
fetch-depth: '2'
|
||||||
|
|
||||||
- name: Inject slug/short variables
|
- name: Inject slug/short variables
|
||||||
uses: rlespinasse/github-slug-action@v4
|
uses: rlespinasse/github-slug-action@v4
|
||||||
|
@ -324,9 +334,8 @@ jobs:
|
||||||
launch-with-cached-state:
|
launch-with-cached-state:
|
||||||
name: Launch ${{ inputs.test_id }} test
|
name: Launch ${{ inputs.test_id }} test
|
||||||
needs: [ setup-with-cached-state ]
|
needs: [ setup-with-cached-state ]
|
||||||
# If the previous job fails, we also want to run and fail this job,
|
# If creating the Google Cloud instance fails, we don't want to launch another docker instance.
|
||||||
# so that the branch protection rule fails in Mergify and GitHub.
|
if: ${{ !cancelled() && !failure() && inputs.needs_zebra_state }}
|
||||||
if: ${{ !cancelled() && inputs.needs_zebra_state }}
|
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
permissions:
|
permissions:
|
||||||
contents: 'read'
|
contents: 'read'
|
||||||
|
@ -445,13 +454,12 @@ jobs:
|
||||||
"
|
"
|
||||||
|
|
||||||
|
|
||||||
# follow the logs of the test we just launched
|
# follow the logs of the test we just launched, up to Sapling activation (or the test finishing)
|
||||||
follow-logs:
|
logs-sprout:
|
||||||
name: Show logs for ${{ inputs.test_id }} test
|
name: Log ${{ inputs.test_id }} test (sprout)
|
||||||
needs: [ launch-with-cached-state, launch-without-cached-state ]
|
|
||||||
# We run exactly one of without-cached-state or with-cached-state, and we always skip the other one.
|
# We run exactly one of without-cached-state or with-cached-state, and we always skip the other one.
|
||||||
# If the previous job fails, we also want to run and fail this job,
|
needs: [ launch-with-cached-state, launch-without-cached-state ]
|
||||||
# so that the branch protection rule fails in Mergify and GitHub.
|
# If the previous job fails, we still want to show the logs.
|
||||||
if: ${{ !cancelled() }}
|
if: ${{ !cancelled() }}
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
permissions:
|
permissions:
|
||||||
|
@ -461,6 +469,7 @@ jobs:
|
||||||
- uses: actions/checkout@v3.0.2
|
- uses: actions/checkout@v3.0.2
|
||||||
with:
|
with:
|
||||||
persist-credentials: false
|
persist-credentials: false
|
||||||
|
fetch-depth: '2'
|
||||||
|
|
||||||
- name: Inject slug/short variables
|
- name: Inject slug/short variables
|
||||||
uses: rlespinasse/github-slug-action@v4
|
uses: rlespinasse/github-slug-action@v4
|
||||||
|
@ -481,8 +490,12 @@ jobs:
|
||||||
service_account: 'github-service-account@zealous-zebra.iam.gserviceaccount.com'
|
service_account: 'github-service-account@zealous-zebra.iam.gserviceaccount.com'
|
||||||
token_format: 'access_token'
|
token_format: 'access_token'
|
||||||
|
|
||||||
# Show all the logs since the container launched
|
# Show all the logs since the container launched,
|
||||||
- name: Show logs for ${{ inputs.test_id }} test
|
# following until Sapling activation (or the test finishes).
|
||||||
|
#
|
||||||
|
# The log pipeline ignores the exit status of `docker logs`.
|
||||||
|
# Errors in the tests are caught by the final test status job.
|
||||||
|
- name: Show logs for ${{ inputs.test_id }} test (sprout)
|
||||||
run: |
|
run: |
|
||||||
gcloud compute ssh \
|
gcloud compute ssh \
|
||||||
${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
||||||
|
@ -494,14 +507,127 @@ jobs:
|
||||||
docker logs \
|
docker logs \
|
||||||
--tail all \
|
--tail all \
|
||||||
--follow \
|
--follow \
|
||||||
${{ inputs.test_id }} \
|
${{ inputs.test_id }} | \
|
||||||
|
tee --output-error=exit /dev/stderr | \
|
||||||
|
grep --max-count=1 --extended-regexp --color=always \
|
||||||
|
'(estimated progress.*network_upgrade.*=.*Sapling)|(test result:.*finished in)' \
|
||||||
"
|
"
|
||||||
|
|
||||||
|
# follow the logs of the test we just launched, up to the last checkpoint (or the test finishing)
|
||||||
|
# TODO: split out sapling logs when the mandatory checkpoint is above NU5 activation
|
||||||
|
logs-checkpoint:
|
||||||
|
name: Log ${{ inputs.test_id }} test (checkpoint)
|
||||||
|
needs: [ logs-sprout ]
|
||||||
|
# If the previous job fails, we still want to show the logs.
|
||||||
|
if: ${{ !cancelled() }}
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
permissions:
|
||||||
|
contents: 'read'
|
||||||
|
id-token: 'write'
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3.0.2
|
||||||
|
with:
|
||||||
|
persist-credentials: false
|
||||||
|
fetch-depth: '2'
|
||||||
|
|
||||||
|
- name: Inject slug/short variables
|
||||||
|
uses: rlespinasse/github-slug-action@v4
|
||||||
|
with:
|
||||||
|
short-length: 7
|
||||||
|
|
||||||
|
- name: Downcase network name for disks
|
||||||
|
run: |
|
||||||
|
NETWORK_CAPS=${{ inputs.network }}
|
||||||
|
echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV
|
||||||
|
|
||||||
|
# Setup gcloud CLI
|
||||||
|
- name: Authenticate to Google Cloud
|
||||||
|
id: auth
|
||||||
|
uses: google-github-actions/auth@v0.8.0
|
||||||
|
with:
|
||||||
|
workload_identity_provider: 'projects/143793276228/locations/global/workloadIdentityPools/github-actions/providers/github-oidc'
|
||||||
|
service_account: 'github-service-account@zealous-zebra.iam.gserviceaccount.com'
|
||||||
|
token_format: 'access_token'
|
||||||
|
|
||||||
|
# Show recent logs, following until the last checkpoint (or the test finishes)
|
||||||
|
- name: Show logs for ${{ inputs.test_id }} test (checkpoint)
|
||||||
|
run: |
|
||||||
|
gcloud compute ssh \
|
||||||
|
${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
||||||
|
--zone ${{ env.ZONE }} \
|
||||||
|
--quiet \
|
||||||
|
--ssh-flag="-o ServerAliveInterval=5" \
|
||||||
|
--command \
|
||||||
|
"\
|
||||||
|
docker logs \
|
||||||
|
--tail ${{ env.EXTRA_LOG_LINES }} \
|
||||||
|
--follow \
|
||||||
|
${{ inputs.test_id }} | \
|
||||||
|
tee --output-error=exit /dev/stderr | \
|
||||||
|
grep --max-count=1 --extended-regexp --color=always \
|
||||||
|
'(verified final checkpoint)|(test result:.*finished in)' \
|
||||||
|
"
|
||||||
|
|
||||||
|
# follow the logs of the test we just launched, up to the last checkpoint (or the test finishing)
|
||||||
|
logs-end:
|
||||||
|
name: Log ${{ inputs.test_id }} test (end)
|
||||||
|
needs: [ logs-checkpoint ]
|
||||||
|
# If the previous job fails, we still want to show the logs.
|
||||||
|
if: ${{ !cancelled() }}
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
permissions:
|
||||||
|
contents: 'read'
|
||||||
|
id-token: 'write'
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3.0.2
|
||||||
|
with:
|
||||||
|
persist-credentials: false
|
||||||
|
fetch-depth: '2'
|
||||||
|
|
||||||
|
- name: Inject slug/short variables
|
||||||
|
uses: rlespinasse/github-slug-action@v4
|
||||||
|
with:
|
||||||
|
short-length: 7
|
||||||
|
|
||||||
|
- name: Downcase network name for disks
|
||||||
|
run: |
|
||||||
|
NETWORK_CAPS=${{ inputs.network }}
|
||||||
|
echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV
|
||||||
|
|
||||||
|
# Setup gcloud CLI
|
||||||
|
- name: Authenticate to Google Cloud
|
||||||
|
id: auth
|
||||||
|
uses: google-github-actions/auth@v0.8.0
|
||||||
|
with:
|
||||||
|
workload_identity_provider: 'projects/143793276228/locations/global/workloadIdentityPools/github-actions/providers/github-oidc'
|
||||||
|
service_account: 'github-service-account@zealous-zebra.iam.gserviceaccount.com'
|
||||||
|
token_format: 'access_token'
|
||||||
|
|
||||||
|
# Show recent logs, following until the test finishes
|
||||||
|
- name: Show logs for ${{ inputs.test_id }} test (end)
|
||||||
|
run: |
|
||||||
|
gcloud compute ssh \
|
||||||
|
${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
|
||||||
|
--zone ${{ env.ZONE }} \
|
||||||
|
--quiet \
|
||||||
|
--ssh-flag="-o ServerAliveInterval=5" \
|
||||||
|
--command \
|
||||||
|
"\
|
||||||
|
docker logs \
|
||||||
|
--tail ${{ env.EXTRA_LOG_LINES }} \
|
||||||
|
--follow \
|
||||||
|
${{ inputs.test_id }} | \
|
||||||
|
tee --output-error=exit /dev/stderr | \
|
||||||
|
grep --max-count=1 --extended-regexp --color=always \
|
||||||
|
'test result:.*finished in' \
|
||||||
|
"
|
||||||
|
|
||||||
|
|
||||||
# wait for the result of the test
|
# wait for the result of the test
|
||||||
test-result:
|
test-result:
|
||||||
# TODO: update the job name here, and in the branch protection rules
|
# TODO: update the job name here, and in the branch protection rules
|
||||||
name: Run ${{ inputs.test_id }} test
|
name: Run ${{ inputs.test_id }} test
|
||||||
needs: [ follow-logs ]
|
needs: [ logs-end ]
|
||||||
# If the previous job fails, we also want to run and fail this job,
|
# If the previous job fails, we also want to run and fail this job,
|
||||||
# so that the branch protection rule fails in Mergify and GitHub.
|
# so that the branch protection rule fails in Mergify and GitHub.
|
||||||
if: ${{ !cancelled() }}
|
if: ${{ !cancelled() }}
|
||||||
|
@ -513,6 +639,7 @@ jobs:
|
||||||
- uses: actions/checkout@v3.0.2
|
- uses: actions/checkout@v3.0.2
|
||||||
with:
|
with:
|
||||||
persist-credentials: false
|
persist-credentials: false
|
||||||
|
fetch-depth: '2'
|
||||||
|
|
||||||
- name: Inject slug/short variables
|
- name: Inject slug/short variables
|
||||||
uses: rlespinasse/github-slug-action@v4
|
uses: rlespinasse/github-slug-action@v4
|
||||||
|
@ -535,8 +662,12 @@ jobs:
|
||||||
|
|
||||||
# Wait for the container to finish, then exit with the test's exit status.
|
# Wait for the container to finish, then exit with the test's exit status.
|
||||||
#
|
#
|
||||||
# `docker wait` prints the container exit status as a string, but we need to exit `ssh` with that status.
|
# If the container has already finished, `docker wait` should return its status.
|
||||||
# `docker wait` can also wait for multiple containers, but we only ever wait for a single container.
|
# But sometimes this doesn't work, so we use `docker inspect` as a fallback.
|
||||||
|
#
|
||||||
|
# `docker wait` prints the container exit status as a string, but we need to exit the `ssh` command
|
||||||
|
# with that status.
|
||||||
|
# (`docker wait` can also wait for multiple containers, but we only ever wait for a single container.)
|
||||||
- name: Result of ${{ inputs.test_id }} test
|
- name: Result of ${{ inputs.test_id }} test
|
||||||
run: |
|
run: |
|
||||||
gcloud compute ssh \
|
gcloud compute ssh \
|
||||||
|
@ -544,10 +675,15 @@ jobs:
|
||||||
--zone ${{ env.ZONE }} \
|
--zone ${{ env.ZONE }} \
|
||||||
--quiet \
|
--quiet \
|
||||||
--ssh-flag="-o ServerAliveInterval=5" \
|
--ssh-flag="-o ServerAliveInterval=5" \
|
||||||
--command \
|
--command=' \
|
||||||
"\
|
EXIT_STATUS=$( \
|
||||||
exit $(docker wait ${{ inputs.test_id }}) \
|
docker wait ${{ inputs.test_id }} || \
|
||||||
"
|
docker inspect --format "{{.State.ExitCode}}" ${{ inputs.test_id }} || \
|
||||||
|
echo "missing container, or missing exit status for container" \
|
||||||
|
); \
|
||||||
|
echo "docker exit status: $EXIT_STATUS"; \
|
||||||
|
exit "$EXIT_STATUS" \
|
||||||
|
'
|
||||||
|
|
||||||
|
|
||||||
# create a state image from the instance's state disk, if requested by the caller
|
# create a state image from the instance's state disk, if requested by the caller
|
||||||
|
@ -563,6 +699,11 @@ jobs:
|
||||||
contents: 'read'
|
contents: 'read'
|
||||||
id-token: 'write'
|
id-token: 'write'
|
||||||
steps:
|
steps:
|
||||||
|
- uses: actions/checkout@v3.0.2
|
||||||
|
with:
|
||||||
|
persist-credentials: false
|
||||||
|
fetch-depth: '2'
|
||||||
|
|
||||||
- name: Inject slug/short variables
|
- name: Inject slug/short variables
|
||||||
uses: rlespinasse/github-slug-action@v4
|
uses: rlespinasse/github-slug-action@v4
|
||||||
with:
|
with:
|
||||||
|
@ -650,6 +791,11 @@ jobs:
|
||||||
contents: 'read'
|
contents: 'read'
|
||||||
id-token: 'write'
|
id-token: 'write'
|
||||||
steps:
|
steps:
|
||||||
|
- uses: actions/checkout@v3.0.2
|
||||||
|
with:
|
||||||
|
persist-credentials: false
|
||||||
|
fetch-depth: '2'
|
||||||
|
|
||||||
- name: Inject slug/short variables
|
- name: Inject slug/short variables
|
||||||
uses: rlespinasse/github-slug-action@v4
|
uses: rlespinasse/github-slug-action@v4
|
||||||
with:
|
with:
|
||||||
|
|
Loading…
Reference in New Issue