From a9fcde3ebfbd12ffb8e5e5aa08d46ca38c97508b Mon Sep 17 00:00:00 2001
From: teor <teor@riseup.net>
Date: Tue, 26 Jul 2022 08:13:25 +1000
Subject: [PATCH 1/2] 1. add(ci): Add a Zebra cached state update test, fix
 lightwalletd tests (#4813)

* Fix clippy::let_and_return

* Increase lightwalletd test timeouts for zebrad slowness

* Add a `zebrad_update_sync()` test, that update syncs Zebra without lightwalletd

* Run the zebrad-update-sync test in CI

* Add extra zebrad time to workaround lightwalletd bugs
---
 .../continous-integration-docker.patch.yml    |   6 +
 .../continous-integration-docker.yml          |  28 ++
 docker/entrypoint.sh                          |   6 +
 zebra-network/src/peer/handshake.rs           |   6 +-
 zebrad/tests/acceptance.rs                    | 260 ++++++++++--------
 zebrad/tests/common/launch.rs                 |  14 +-
 zebrad/tests/common/lightwalletd.rs           |  88 ++++--
 7 files changed, 267 insertions(+), 141 deletions(-)

diff --git a/.github/workflows/continous-integration-docker.patch.yml b/.github/workflows/continous-integration-docker.patch.yml
index 520e08ddc..b96f03e6b 100644
--- a/.github/workflows/continous-integration-docker.patch.yml
+++ b/.github/workflows/continous-integration-docker.patch.yml
@@ -63,6 +63,12 @@ jobs:
     steps:
       - run: 'echo "No build required"'
 
+  test-update-sync:
+    name: Zebra tip update / Run update-to-tip test
+    runs-on: ubuntu-latest
+    steps:
+      - run: 'echo "No build required"'
+
   lightwalletd-rpc-test:
     name: Zebra tip JSON-RPC / Run fully-synced-rpc test
     runs-on: ubuntu-latest
diff --git a/.github/workflows/continous-integration-docker.yml b/.github/workflows/continous-integration-docker.yml
index 800da62d1..14fdd108d 100644
--- a/.github/workflows/continous-integration-docker.yml
+++ b/.github/workflows/continous-integration-docker.yml
@@ -309,6 +309,33 @@ jobs:
       disk_suffix: tip
       height_grep_text: 'current_height.*=.*Height'
 
+  # Test that Zebra can sync to the chain tip, using a cached Zebra tip state,
+  # without launching `lightwalletd`.
+  #
+  # Runs:
+  # - after every PR is merged to `main`
+  # - on every PR update
+  #
+  # If the state version has changed, waits for the new cached state to be created.
+  # Otherwise, if the state rebuild was skipped, runs immediately after the build job.
+  test-update-sync:
+    name: Zebra tip update
+    needs: test-full-sync
+    uses: ./.github/workflows/deploy-gcp-tests.yml
+    if: ${{ !cancelled() && !failure() && github.event.inputs.regenerate-disks != 'true' && github.event.inputs.run-full-sync != 'true' }}
+    with:
+      app_name: zebrad
+      test_id: update-to-tip
+      test_description: Test syncing to tip with a Zebra tip state
+      test_variables: '-e TEST_UPDATE_SYNC=1 -e ZEBRA_FORCE_USE_COLOR=1 -e ZEBRA_CACHED_STATE_DIR=/var/cache/zebrad-cache'
+      needs_zebra_state: true
+      # TODO: do we want to update the disk on every PR, to increase CI speed?
+      saves_to_disk: false
+      disk_suffix: tip
+      root_state_path: '/var/cache'
+      # TODO: do we also want to test the `zebrad` part of the `lwd-cache`? (But not update it.)
+      zebra_state_dir: 'zebrad-cache'
+
   # Test that Zebra can answer a synthetic RPC call, using a cached Zebra tip state
   #
   # Runs:
@@ -410,6 +437,7 @@ jobs:
       test_variables: '-e TEST_LWD_UPDATE_SYNC=1 -e ZEBRA_TEST_LIGHTWALLETD=1 -e ZEBRA_FORCE_USE_COLOR=1 -e ZEBRA_CACHED_STATE_DIR=/var/cache/zebrad-cache -e LIGHTWALLETD_DATA_DIR=/var/cache/lwd-cache'
       needs_zebra_state: true
       needs_lwd_state: true
+      # TODO: do we want to update the disk on every PR, to increase CI speed?
       saves_to_disk: false
       disk_prefix: lwd-cache
       disk_suffix: tip
diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh
index 002a3e301..0e4ecf65b 100755
--- a/docker/entrypoint.sh
+++ b/docker/entrypoint.sh
@@ -38,6 +38,12 @@ case "$1" in
             cargo test --locked --release --features "test_sync_to_mandatory_checkpoint_${NETWORK,,},lightwalletd-grpc-tests" --package zebrad --test acceptance -- --nocapture --include-ignored "sync_to_mandatory_checkpoint_${NETWORK,,}"
             # TODO: replace with $ZEBRA_CACHED_STATE_DIR in Rust and workflows
             ls -lh "/zebrad-cache"/*/* || (echo "No /zebrad-cache/*/*"; ls -lhR "/zebrad-cache" | head -50 || echo "No /zebrad-cache directory")
+        elif [[ "$TEST_UPDATE_SYNC" -eq "1" ]]; then
+            # Run a Zebra sync starting at the cached tip, and syncing to the latest tip.
+            #
+            # List directory used by test
+            ls -lh "$ZEBRA_CACHED_STATE_DIR"/*/* || (echo "No $ZEBRA_CACHED_STATE_DIR/*/*"; ls -lhR  "$ZEBRA_CACHED_STATE_DIR" | head -50 || echo "No $ZEBRA_CACHED_STATE_DIR directory")
+            cargo test --locked --release --features lightwalletd-grpc-tests --package zebrad --test acceptance -- --nocapture --include-ignored zebrad_update_sync
         elif [[ "$TEST_CHECKPOINT_SYNC" -eq "1" ]]; then
             # Run a Zebra sync starting at the cached mandatory checkpoint, and syncing past it.
             #
diff --git a/zebra-network/src/peer/handshake.rs b/zebra-network/src/peer/handshake.rs
index 43e481aae..63e6f3ba6 100644
--- a/zebra-network/src/peer/handshake.rs
+++ b/zebra-network/src/peer/handshake.rs
@@ -1137,7 +1137,7 @@ async fn send_periodic_heartbeats_with_shutdown_handle(
     // slow rate, and shutdown is a oneshot. If both futures
     // are ready, we want the shutdown to take priority over
     // sending a useless heartbeat.
-    let result = match future::select(shutdown_rx, heartbeat_run_loop).await {
+    match future::select(shutdown_rx, heartbeat_run_loop).await {
         Either::Left((Ok(CancelHeartbeatTask), _unused_run_loop)) => {
             tracing::trace!("shutting down because Client requested shut down");
             handle_heartbeat_shutdown(
@@ -1164,9 +1164,7 @@ async fn send_periodic_heartbeats_with_shutdown_handle(
 
             result
         }
-    };
-
-    result
+    }
 }
 
 /// Send periodical heartbeats to `server_tx`, and update the peer status through
diff --git a/zebrad/tests/acceptance.rs b/zebrad/tests/acceptance.rs
index db4abd618..18f42764a 100644
--- a/zebrad/tests/acceptance.rs
+++ b/zebrad/tests/acceptance.rs
@@ -1199,10 +1199,17 @@ fn lightwalletd_integration() -> Result<()> {
     lightwalletd_integration_test(LaunchWithEmptyState)
 }
 
-/// Make sure `lightwalletd` can sync from Zebra, in update sync mode.
+/// Make sure `zebrad` can sync from peers, but don't actually launch `lightwalletd`.
 ///
-/// If `LIGHTWALLETD_DATA_DIR` is set, runs a quick sync, then a full sync.
-/// If `LIGHTWALLETD_DATA_DIR` is not set, just runs a full sync.
+/// This test only runs when the `ZEBRA_CACHED_STATE_DIR` env var is set.
+///
+/// This test might work on Windows.
+#[test]
+fn zebrad_update_sync() -> Result<()> {
+    lightwalletd_integration_test(UpdateZebraCachedStateNoRpc)
+}
+
+/// Make sure `lightwalletd` can sync from Zebra, in update sync mode.
 ///
 /// This test only runs when the `ZEBRA_TEST_LIGHTWALLETD`,
 /// `ZEBRA_CACHED_STATE_DIR`, and `LIGHTWALLETD_DATA_DIR` env vars are set.
@@ -1249,6 +1256,9 @@ fn lightwalletd_full_sync() -> Result<()> {
 async fn lightwalletd_test_suite() -> Result<()> {
     lightwalletd_integration_test(LaunchWithEmptyState)?;
 
+    // Only runs when ZEBRA_CACHED_STATE_DIR is set.
+    lightwalletd_integration_test(UpdateZebraCachedStateNoRpc)?;
+
     // Only runs when ZEBRA_CACHED_STATE_DIR is set.
     // When manually running the test suite, allow cached state in the full sync test.
     lightwalletd_integration_test(FullSyncFromGenesis {
@@ -1271,26 +1281,29 @@ async fn lightwalletd_test_suite() -> Result<()> {
 
 /// Run a lightwalletd integration test with a configuration for `test_type`.
 ///
-/// Set `allow_cached_state_for_full_sync` to speed up manual full sync tests.
+/// Set `FullSyncFromGenesis { allow_lightwalletd_cached_state: true }` to speed up manual full sync tests.
 ///
 /// The random ports in this test can cause [rare port conflicts.](#Note on port conflict)
-#[cfg(not(target_os = "windows"))]
 fn lightwalletd_integration_test(test_type: LightwalletdTestType) -> Result<()> {
     zebra_test::init();
 
     // Skip the test unless the user specifically asked for it
-    if zebra_skip_lightwalletd_tests() {
+    //
+    // TODO: pass test_type to zebra_skip_lightwalletd_tests() and check for lightwalletd launch in there
+    if test_type.launches_lightwalletd() && zebra_skip_lightwalletd_tests() {
         return Ok(());
     }
 
-    // Get the zebrad and lightwalletd configs
+    // TODO: split the zebrad and lightwalletd launches and checks into separate functions?
+
+    // Get the zebrad config
 
     // Handle the Zebra state directory based on the test type:
     // - LaunchWithEmptyState: ignore the state directory
-    // - FullSyncFromGenesis & UpdateCachedState:
+    // - FullSyncFromGenesis, UpdateCachedState, UpdateZebraCachedStateNoRpc:
     //   skip the test if it is not available, timeout if it is not populated
 
-    // Write a configuration that has RPC listen_addr set.
+    // Write a configuration that has RPC listen_addr set (if needed).
     // If the state path env var is set, use it in the config.
     let config = if let Some(config) =
         test_type.zebrad_config("lightwalletd_integration_test".to_string())
@@ -1301,7 +1314,7 @@ fn lightwalletd_integration_test(test_type: LightwalletdTestType) -> Result<()>
     };
 
     // Handle the lightwalletd state directory based on the test type:
-    // - LaunchWithEmptyState: ignore the state directory
+    // - LaunchWithEmptyState, UpdateZebraCachedStateNoRpc: ignore the state directory
     // - FullSyncFromGenesis: use it if available, timeout if it is already populated
     // - UpdateCachedState: skip the test if it is not available, timeout if it is not populated
     let lightwalletd_state_path =
@@ -1321,9 +1334,6 @@ fn lightwalletd_integration_test(test_type: LightwalletdTestType) -> Result<()>
     // Get the lists of process failure logs
     let (zebrad_failure_messages, zebrad_ignore_messages) = test_type.zebrad_failure_messages();
 
-    let (lightwalletd_failure_messages, lightwalletd_ignore_messages) =
-        test_type.lightwalletd_failure_messages();
-
     // Launch zebrad
     let zdir = testdir()?.with_exact_config(&config)?;
     let mut zebrad = zdir
@@ -1339,109 +1349,121 @@ fn lightwalletd_integration_test(test_type: LightwalletdTestType) -> Result<()>
         zebrad.expect_stdout_line_matches("loaded Zebra state cache .*tip.*=.*None")?;
     }
 
-    // Wait until `zebrad` has opened the RPC endpoint
-    zebrad.expect_stdout_line_matches(regex::escape(
-        format!("Opened RPC endpoint at {}", config.rpc.listen_addr.unwrap()).as_str(),
-    ))?;
+    // Launch lightwalletd, if needed
+    let mut lightwalletd = if test_type.launches_lightwalletd() {
+        // Wait until `zebrad` has opened the RPC endpoint
+        zebrad.expect_stdout_line_matches(regex::escape(
+            format!("Opened RPC endpoint at {}", config.rpc.listen_addr.unwrap()).as_str(),
+        ))?;
 
-    // Launch lightwalletd
+        // Write a fake zcashd configuration that has the rpcbind and rpcport options set
+        let ldir = testdir()?;
+        let ldir = ldir.with_lightwalletd_config(config.rpc.listen_addr.unwrap())?;
 
-    // Write a fake zcashd configuration that has the rpcbind and rpcport options set
-    let ldir = testdir()?;
-    let ldir = ldir.with_lightwalletd_config(config.rpc.listen_addr.unwrap())?;
+        let (lightwalletd_failure_messages, lightwalletd_ignore_messages) =
+            test_type.lightwalletd_failure_messages();
 
-    // Launch the lightwalletd process
-    let lightwalletd = if test_type == LaunchWithEmptyState {
-        ldir.spawn_lightwalletd_child(None, args![])?
+        // Launch the lightwalletd process
+        let lightwalletd = if test_type == LaunchWithEmptyState {
+            ldir.spawn_lightwalletd_child(None, args![])?
+        } else {
+            ldir.spawn_lightwalletd_child(lightwalletd_state_path, args![])?
+        };
+
+        let mut lightwalletd = lightwalletd
+            .with_timeout(test_type.lightwalletd_timeout())
+            .with_failure_regex_iter(lightwalletd_failure_messages, lightwalletd_ignore_messages);
+
+        // Wait until `lightwalletd` has launched
+        lightwalletd.expect_stdout_line_matches(regex::escape("Starting gRPC server"))?;
+
+        // Check that `lightwalletd` is calling the expected Zebra RPCs
+
+        // getblockchaininfo
+        if test_type.needs_zebra_cached_state() {
+            lightwalletd.expect_stdout_line_matches(
+                "Got sapling height 419200 block height [0-9]{7} chain main branchID [0-9a-f]{8}",
+            )?;
+        } else {
+            // Timeout the test if we're somehow accidentally using a cached state in our temp dir
+            lightwalletd.expect_stdout_line_matches(
+                "Got sapling height 419200 block height [0-9]{1,6} chain main branchID 00000000",
+            )?;
+        }
+
+        if test_type.needs_lightwalletd_cached_state() {
+            lightwalletd.expect_stdout_line_matches("Found [0-9]{7} blocks in cache")?;
+        } else if !test_type.allow_lightwalletd_cached_state() {
+            // Timeout the test if we're somehow accidentally using a cached state in our temp dir
+            lightwalletd.expect_stdout_line_matches("Found 0 blocks in cache")?;
+        }
+
+        // getblock with the first Sapling block in Zebra's state
+        //
+        // zcash/lightwalletd calls getbestblockhash here, but
+        // adityapk00/lightwalletd calls getblock
+        //
+        // The log also depends on what is in Zebra's state:
+        //
+        // # Cached Zebra State
+        //
+        // lightwalletd ingests blocks into its cache.
+        //
+        // # Empty Zebra State
+        //
+        // lightwalletd tries to download the Sapling activation block, but it's not in the state.
+        //
+        // Until the Sapling activation block has been downloaded,
+        // lightwalletd will keep retrying getblock.
+        if !test_type.allow_lightwalletd_cached_state() {
+            if test_type.needs_zebra_cached_state() {
+                lightwalletd.expect_stdout_line_matches(
+                    "([Aa]dding block to cache)|([Ww]aiting for block)",
+                )?;
+            } else {
+                lightwalletd.expect_stdout_line_matches(regex::escape(
+                    "Waiting for zcashd height to reach Sapling activation height (419200)",
+                ))?;
+            }
+        }
+
+        Some(lightwalletd)
     } else {
-        ldir.spawn_lightwalletd_child(lightwalletd_state_path, args![])?
+        None
     };
 
-    let mut lightwalletd = lightwalletd
-        .with_timeout(test_type.lightwalletd_timeout())
-        .with_failure_regex_iter(lightwalletd_failure_messages, lightwalletd_ignore_messages);
-
-    // Wait until `lightwalletd` has launched
-    lightwalletd.expect_stdout_line_matches(regex::escape("Starting gRPC server"))?;
-
-    // Check that `lightwalletd` is calling the expected Zebra RPCs
-
-    // getblockchaininfo
     if test_type.needs_zebra_cached_state() {
-        lightwalletd.expect_stdout_line_matches(
-            "Got sapling height 419200 block height [0-9]{7} chain main branchID [0-9a-f]{8}",
-        )?;
-    } else {
-        // Timeout the test if we're somehow accidentally using a cached state in our temp dir
-        lightwalletd.expect_stdout_line_matches(
-            "Got sapling height 419200 block height [0-9]{1,6} chain main branchID 00000000",
-        )?;
-    }
-
-    if test_type.needs_lightwalletd_cached_state() {
-        lightwalletd.expect_stdout_line_matches("Found [0-9]{7} blocks in cache")?;
-    } else if !test_type.allow_lightwalletd_cached_state() {
-        // Timeout the test if we're somehow accidentally using a cached state in our temp dir
-        lightwalletd.expect_stdout_line_matches("Found 0 blocks in cache")?;
-    }
-
-    // getblock with the first Sapling block in Zebra's state
-    //
-    // zcash/lightwalletd calls getbestblockhash here, but
-    // adityapk00/lightwalletd calls getblock
-    //
-    // The log also depends on what is in Zebra's state:
-    //
-    // # Cached Zebra State
-    //
-    // lightwalletd ingests blocks into its cache.
-    //
-    // # Empty Zebra State
-    //
-    // lightwalletd tries to download the Sapling activation block, but it's not in the state.
-    //
-    // Until the Sapling activation block has been downloaded,
-    // lightwalletd will keep retrying getblock.
-    if !test_type.allow_lightwalletd_cached_state() {
-        if test_type.needs_zebra_cached_state() {
-            lightwalletd
-                .expect_stdout_line_matches("([Aa]dding block to cache)|([Ww]aiting for block)")?;
-        } else {
-            lightwalletd.expect_stdout_line_matches(regex::escape(
-                "Waiting for zcashd height to reach Sapling activation height (419200)",
-            ))?;
-        }
-    }
-
-    if matches!(test_type, UpdateCachedState | FullSyncFromGenesis { .. }) {
         // Wait for Zebra to sync its cached state to the chain tip
         zebrad.expect_stdout_line_matches(SYNC_FINISHED_REGEX)?;
 
         // Wait for lightwalletd to sync some blocks
-        lightwalletd
-            .expect_stdout_line_matches("([Aa]dding block to cache)|([Ww]aiting for block)")?;
+        if let Some(ref mut lightwalletd) = lightwalletd {
+            lightwalletd
+                .expect_stdout_line_matches("([Aa]dding block to cache)|([Ww]aiting for block)")?;
 
-        // Wait for lightwalletd to sync to Zebra's tip.
-        //
-        // TODO: after the lightwalletd hangs are fixed, fail the test on errors or timeouts
-        if cfg!(lightwalletd_hang_fix) {
-            lightwalletd.expect_stdout_line_matches("[Ww]aiting for block")?;
-        } else {
-            // To work around a hang bug, we run the test until:
-            // - lightwalletd starts waiting for blocks (best case scenario)
-            // - lightwalletd syncs to near the tip (workaround, cached state image is usable)
-            // - the test times out with an error, but we ignore it
-            //   (workaround, cached state might be usable, slow, or might fail other tests)
+            // Wait for lightwalletd to sync to Zebra's tip.
             //
-            // TODO: update the regex to `1[8-9][0-9]{5}` when mainnet reaches block 1_800_000
-            let log_result = lightwalletd.expect_stdout_line_matches(
-                "([Aa]dding block to cache 1[7-9][0-9]{5})|([Ww]aiting for block)",
-            );
-            if log_result.is_err() {
-                tracing::warn!(
-                    ?log_result,
-                    "ignoring a lightwalletd test failure, to work around a lightwalletd hang bug",
+            // TODO: after the lightwalletd hangs are fixed, fail the test on errors or timeouts
+            if cfg!(lightwalletd_hang_fix) {
+                lightwalletd.expect_stdout_line_matches("[Ww]aiting for block")?;
+            } else {
+                // To work around a hang bug, we run the test until:
+                // - lightwalletd starts waiting for blocks (best case scenario)
+                // - lightwalletd syncs to near the tip (workaround, cached state image is usable)
+                // - the test times out with an error, but we ignore it
+                //   (workaround, cached state might be usable, slow, or might fail other tests)
+                //
+                // TODO: update the regex to `1[8-9][0-9]{5}` when mainnet reaches block 1_800_000
+                let log_result = lightwalletd.expect_stdout_line_matches(
+                    "([Aa]dding block to cache 1[7-9][0-9]{5})|([Ww]aiting for block)",
                 );
+                if log_result.is_err() {
+                    // This error takes up about 100 lines, and looks like a panic message
+                    tracing::warn!(
+                        multi_line_error = ?log_result,
+                        "ignoring a lightwalletd test failure, to work around a lightwalletd hang bug",
+                    );
+                }
             }
         }
 
@@ -1455,28 +1477,36 @@ fn lightwalletd_integration_test(test_type: LightwalletdTestType) -> Result<()>
         //
         // TODO: re-enable this code when lightwalletd hangs are fixed
         if cfg!(lightwalletd_hang_fix) {
-            lightwalletd.expect_stdout_line_matches(regex::escape(
-                "Block hash changed, clearing mempool clients",
-            ))?;
-            lightwalletd.expect_stdout_line_matches(regex::escape("Adding new mempool txid"))?;
+            if let Some(ref mut lightwalletd) = lightwalletd {
+                lightwalletd.expect_stdout_line_matches(regex::escape(
+                    "Block hash changed, clearing mempool clients",
+                ))?;
+                lightwalletd
+                    .expect_stdout_line_matches(regex::escape("Adding new mempool txid"))?;
+            }
         }
     }
 
     // Cleanup both processes
-    lightwalletd.kill()?;
-    zebrad.kill()?;
-
-    let lightwalletd_output = lightwalletd.wait_with_output()?.assert_failure()?;
-    let zebrad_output = zebrad.wait_with_output()?.assert_failure()?;
-
+    //
     // If the test fails here, see the [note on port conflict](#Note on port conflict)
     //
     // zcash/lightwalletd exits by itself, but
     // adityapk00/lightwalletd keeps on going, so it gets killed by the test harness.
+    zebrad.kill()?;
+
+    if let Some(mut lightwalletd) = lightwalletd {
+        lightwalletd.kill()?;
+
+        let lightwalletd_output = lightwalletd.wait_with_output()?.assert_failure()?;
+
+        lightwalletd_output
+            .assert_was_killed()
+            .wrap_err("Possible port conflict. Are there other acceptance tests running?")?;
+    }
+
+    let zebrad_output = zebrad.wait_with_output()?.assert_failure()?;
 
-    lightwalletd_output
-        .assert_was_killed()
-        .wrap_err("Possible port conflict. Are there other acceptance tests running?")?;
     zebrad_output
         .assert_was_killed()
         .wrap_err("Possible port conflict. Are there other acceptance tests running?")?;
diff --git a/zebrad/tests/common/launch.rs b/zebrad/tests/common/launch.rs
index 75e378b44..db8a200e6 100644
--- a/zebrad/tests/common/launch.rs
+++ b/zebrad/tests/common/launch.rs
@@ -45,7 +45,11 @@ pub const BETWEEN_NODES_DELAY: Duration = Duration::from_secs(2);
 /// The amount of time we wait for lightwalletd to update to the tip.
 ///
 /// The cached tip can be a few days old, and Zebra needs time to activate its mempool.
-pub const LIGHTWALLETD_UPDATE_TIP_DELAY: Duration = Duration::from_secs(20 * 60);
+///
+/// Currently, `zebrad` syncs are slower than `lightwalletd` syncs, so we re-use its timeout.
+///
+/// TODO: reduce to 20 minutes when `zebrad` sync performance improves
+pub const LIGHTWALLETD_UPDATE_TIP_DELAY: Duration = LIGHTWALLETD_FULL_SYNC_TIP_DELAY;
 
 /// The amount of time we wait for lightwalletd to do a full sync to the tip.
 ///
@@ -53,6 +57,14 @@ pub const LIGHTWALLETD_UPDATE_TIP_DELAY: Duration = Duration::from_secs(20 * 60)
 /// and Zebra needs time to activate its mempool.
 pub const LIGHTWALLETD_FULL_SYNC_TIP_DELAY: Duration = Duration::from_secs(90 * 60);
 
+/// The amount of extra time we wait for Zebra to sync to the tip,
+/// after we ignore a lightwalletd failure.
+///
+/// Zebra logs a status entry every minute, so there should be at least 4 in this interval.
+///
+/// TODO: remove this extra time when lightwalletd hangs are fixed
+pub const ZEBRAD_EXTRA_DELAY_FOR_LIGHTWALLETD_WORKAROUND: Duration = Duration::from_secs(5 * 60);
+
 /// Extension trait for methods on `tempfile::TempDir` for using it as a test
 /// directory for `zebrad`.
 pub trait ZebradTestDirExt
diff --git a/zebrad/tests/common/lightwalletd.rs b/zebrad/tests/common/lightwalletd.rs
index 8581da6fe..cadf6cd12 100644
--- a/zebrad/tests/common/lightwalletd.rs
+++ b/zebrad/tests/common/lightwalletd.rs
@@ -28,7 +28,7 @@ use super::{
     },
     launch::{
         ZebradTestDirExt, LIGHTWALLETD_DELAY, LIGHTWALLETD_FULL_SYNC_TIP_DELAY,
-        LIGHTWALLETD_UPDATE_TIP_DELAY,
+        LIGHTWALLETD_UPDATE_TIP_DELAY, ZEBRAD_EXTRA_DELAY_FOR_LIGHTWALLETD_WORKAROUND,
     },
 };
 
@@ -67,7 +67,7 @@ pub fn zebra_skip_lightwalletd_tests() -> bool {
     // TODO: check if the lightwalletd binary is in the PATH?
     //       (this doesn't seem to be implemented in the standard library)
     //
-    // See is_command_available in zebra-test/tests/command.rs for one way to do this.
+    // See is_command_available() in zebra-test/src/tests/command.rs for one way to do this.
 
     if env::var_os(ZEBRA_TEST_LIGHTWALLETD).is_none() {
         // This message is captured by the test runner, use
@@ -236,6 +236,14 @@ pub enum LightwalletdTestType {
     ///
     /// This test requires a cached Zebra and lightwalletd state.
     UpdateCachedState,
+
+    /// Launch `zebrad` and sync it to the tip, but don't launch `lightwalletd`.
+    ///
+    /// If this test fails, the failure is in `zebrad` without RPCs or `lightwalletd`.
+    /// If it succeeds, but the RPC tests fail, the problem is caused by RPCs or `lightwalletd`.
+    ///
+    /// This test requires a cached Zebra state.
+    UpdateZebraCachedStateNoRpc,
 }
 
 impl LightwalletdTestType {
@@ -243,26 +251,36 @@ impl LightwalletdTestType {
     pub fn needs_zebra_cached_state(&self) -> bool {
         match self {
             LaunchWithEmptyState => false,
-            FullSyncFromGenesis { .. } | UpdateCachedState => true,
+            FullSyncFromGenesis { .. } | UpdateCachedState | UpdateZebraCachedStateNoRpc => true,
         }
     }
 
-    /// Does this test need a lightwalletd cached state?
+    /// Does this test launch `lightwalletd`?
+    pub fn launches_lightwalletd(&self) -> bool {
+        match self {
+            UpdateZebraCachedStateNoRpc => false,
+            LaunchWithEmptyState | FullSyncFromGenesis { .. } | UpdateCachedState => true,
+        }
+    }
+
+    /// Does this test need a `lightwalletd` cached state?
     pub fn needs_lightwalletd_cached_state(&self) -> bool {
         match self {
-            LaunchWithEmptyState | FullSyncFromGenesis { .. } => false,
+            LaunchWithEmptyState | FullSyncFromGenesis { .. } | UpdateZebraCachedStateNoRpc => {
+                false
+            }
             UpdateCachedState => true,
         }
     }
 
-    /// Does this test allow a lightwalletd cached state, even if it is not required?
+    /// Does this test allow a `lightwalletd` cached state, even if it is not required?
     pub fn allow_lightwalletd_cached_state(&self) -> bool {
         match self {
             LaunchWithEmptyState => false,
             FullSyncFromGenesis {
                 allow_lightwalletd_cached_state,
             } => *allow_lightwalletd_cached_state,
-            UpdateCachedState => true,
+            UpdateCachedState | UpdateZebraCachedStateNoRpc => true,
         }
     }
 
@@ -287,13 +305,19 @@ impl LightwalletdTestType {
     /// Returns `None` if the test should be skipped,
     /// and `Some(Err(_))` if the config could not be created.
     pub fn zebrad_config(&self, test_name: String) -> Option<Result<ZebradConfig>> {
+        let config = if self.launches_lightwalletd() {
+            random_known_rpc_port_config()
+        } else {
+            default_test_config()
+        };
+
         if !self.needs_zebra_cached_state() {
-            return Some(random_known_rpc_port_config());
+            return Some(config);
         }
 
         let zebra_state_path = self.zebrad_state_path(test_name)?;
 
-        let mut config = match random_known_rpc_port_config() {
+        let mut config = match config {
             Ok(config) => config,
             Err(error) => return Some(Err(error)),
         };
@@ -307,8 +331,17 @@ impl LightwalletdTestType {
         Some(Ok(config))
     }
 
-    /// Returns the lightwalletd state path for this test, if set.
+    /// Returns the `lightwalletd` state path for this test, if set, and if allowed for this test.
     pub fn lightwalletd_state_path(&self, test_name: String) -> Option<PathBuf> {
+        if !self.launches_lightwalletd() {
+            tracing::info!(
+                "running {test_name:?} {self:?} lightwalletd test, \
+                 ignoring any cached state in the {LIGHTWALLETD_DATA_DIR:?} environment variable",
+            );
+
+            return None;
+        }
+
         match env::var_os(LIGHTWALLETD_DATA_DIR) {
             Some(path) => Some(path.into()),
             None => {
@@ -331,21 +364,29 @@ impl LightwalletdTestType {
 
     /// Returns the `zebrad` timeout for this test type.
     pub fn zebrad_timeout(&self) -> Duration {
-        // We use the same timeouts as lightwalletd,
+        let base_timeout = match self {
+            LaunchWithEmptyState => LIGHTWALLETD_DELAY,
+            FullSyncFromGenesis { .. } => LIGHTWALLETD_FULL_SYNC_TIP_DELAY,
+            UpdateCachedState | UpdateZebraCachedStateNoRpc => LIGHTWALLETD_UPDATE_TIP_DELAY,
+        };
+
+        // If lightwalletd hangs and times out, Zebra needs a bit of extra time to finish
+        base_timeout + ZEBRAD_EXTRA_DELAY_FOR_LIGHTWALLETD_WORKAROUND
+    }
+
+    /// Returns the `lightwalletd` timeout for this test type.
+    #[track_caller]
+    pub fn lightwalletd_timeout(&self) -> Duration {
+        if !self.launches_lightwalletd() {
+            panic!("lightwalletd must not be launched in the {self:?} test");
+        }
+
+        // We use the same timeouts for zebrad and lightwalletd,
         // because the tests swap between checking zebrad and lightwalletd.
         match self {
             LaunchWithEmptyState => LIGHTWALLETD_DELAY,
             FullSyncFromGenesis { .. } => LIGHTWALLETD_FULL_SYNC_TIP_DELAY,
-            UpdateCachedState => LIGHTWALLETD_UPDATE_TIP_DELAY,
-        }
-    }
-
-    /// Returns the `lightwalletd` timeout for this test type.
-    pub fn lightwalletd_timeout(&self) -> Duration {
-        match self {
-            LaunchWithEmptyState => LIGHTWALLETD_DELAY,
-            FullSyncFromGenesis { .. } => LIGHTWALLETD_FULL_SYNC_TIP_DELAY,
-            UpdateCachedState => LIGHTWALLETD_UPDATE_TIP_DELAY,
+            UpdateCachedState | UpdateZebraCachedStateNoRpc => LIGHTWALLETD_UPDATE_TIP_DELAY,
         }
     }
 
@@ -375,7 +416,12 @@ impl LightwalletdTestType {
 
     /// Returns `lightwalletd` log regexes that indicate the tests have failed,
     /// and regexes of any failures that should be ignored.
+    #[track_caller]
     pub fn lightwalletd_failure_messages(&self) -> (Vec<String>, Vec<String>) {
+        if !self.launches_lightwalletd() {
+            panic!("lightwalletd must not be launched in the {self:?} test");
+        }
+
         let mut lightwalletd_failure_messages: Vec<String> = LIGHTWALLETD_FAILURE_MESSAGES
             .iter()
             .chain(PROCESS_FAILURE_MESSAGES)

From ed553a9ecabcf1f975eb0cc88e6b290db41eed11 Mon Sep 17 00:00:00 2001
From: teor <teor@riseup.net>
Date: Tue, 26 Jul 2022 08:33:00 +1000
Subject: [PATCH 2/2] 2. add(log): Log when state requests take a long time
 (#4815)

* Fix clippy::let_and_return

* Increase lightwalletd test timeouts for zebrad slowness

* Add a `zebrad_update_sync()` test, that update syncs Zebra without lightwalletd

* Run the zebrad-update-sync test in CI

* Add extra zebrad time to workaround lightwalletd bugs

* Add a CodeTimer diagnostic struct for long-running code

* Time state init and each state request, log when it takes too long

* Add code locations to execution timers

* Instrument state futures and functions with tracing spans

* Only log each code timer once

* Make displayed times shorter
---
 zebra-chain/Cargo.toml        |   1 +
 zebra-chain/src/diagnostic.rs | 120 ++++++++++++++++
 zebra-chain/src/fmt.rs        |   3 +-
 zebra-chain/src/fmt/time.rs   |  16 +++
 zebra-chain/src/lib.rs        |   6 +-
 zebra-state/src/service.rs    | 253 +++++++++++++++++++++++++++-------
 6 files changed, 348 insertions(+), 51 deletions(-)
 create mode 100644 zebra-chain/src/diagnostic.rs

diff --git a/zebra-chain/Cargo.toml b/zebra-chain/Cargo.toml
index 6a31534c0..2e15b23d2 100644
--- a/zebra-chain/Cargo.toml
+++ b/zebra-chain/Cargo.toml
@@ -56,6 +56,7 @@ humantime = "2.1.0"
 displaydoc = "0.2.3"
 static_assertions = "1.1.0"
 thiserror = "1.0.31"
+tracing = "0.1.31"
 
 # Serialization
 hex = { version = "0.4.3", features = ["serde"] }
diff --git a/zebra-chain/src/diagnostic.rs b/zebra-chain/src/diagnostic.rs
new file mode 100644
index 000000000..2b41f6476
--- /dev/null
+++ b/zebra-chain/src/diagnostic.rs
@@ -0,0 +1,120 @@
+//! Tracing the execution time of functions.
+//!
+//! TODO: also trace polling time for futures, using a `Future` wrapper
+
+use std::time::{Duration, Instant};
+
+use crate::fmt::duration_short;
+
+/// The default minimum info-level message time.
+pub const DEFAULT_MIN_INFO_TIME: Duration = Duration::from_secs(5);
+
+/// The default minimum warning message time.
+pub const DEFAULT_MIN_WARN_TIME: Duration = Duration::from_secs(20);
+
+/// A guard that logs code execution time when dropped.
+#[derive(Debug)]
+pub struct CodeTimer {
+    /// The time that the code started executing.
+    start: Instant,
+
+    /// The minimum duration for info-level messages.
+    min_info_time: Duration,
+
+    /// The minimum duration for warning messages.
+    min_warn_time: Duration,
+
+    /// `true` if this timer has finished.
+    has_finished: bool,
+}
+
+impl CodeTimer {
+    /// Start timing the execution of a function, method, or other code region.
+    ///
+    /// Returns a guard that finishes timing the code when dropped,
+    /// or when [`CodeTimer::finish()`] is called.
+    #[track_caller]
+    pub fn start() -> Self {
+        let start = Instant::now();
+        trace!(?start, "starting code timer");
+
+        Self {
+            start,
+            min_info_time: DEFAULT_MIN_INFO_TIME,
+            min_warn_time: DEFAULT_MIN_WARN_TIME,
+            has_finished: false,
+        }
+    }
+
+    /// Finish timing the execution of a function, method, or other code region.
+    #[track_caller]
+    pub fn finish<S>(
+        mut self,
+        module_path: &'static str,
+        line: u32,
+        description: impl Into<Option<S>>,
+    ) where
+        S: ToString,
+    {
+        self.finish_inner(Some(module_path), Some(line), description);
+    }
+
+    /// Finish timing the execution of a function, method, or other code region.
+    ///
+    /// This private method can be called from [`CodeTimer::finish()`] or `drop()`.
+    #[track_caller]
+    fn finish_inner<S>(
+        &mut self,
+        module_path: impl Into<Option<&'static str>>,
+        line: impl Into<Option<u32>>,
+        description: impl Into<Option<S>>,
+    ) where
+        S: ToString,
+    {
+        if self.has_finished {
+            return;
+        }
+
+        self.has_finished = true;
+
+        let execution = self.start.elapsed();
+        let execution_time = duration_short(execution);
+
+        let module_path = module_path.into();
+        let line = line.into();
+        let description = description
+            .into()
+            .map(|desc| desc.to_string() + " ")
+            .unwrap_or_default();
+
+        if execution >= self.min_warn_time {
+            warn!(
+                ?execution_time,
+                ?module_path,
+                ?line,
+                "{description}code took a long time to execute",
+            );
+        } else if execution >= self.min_info_time {
+            info!(
+                ?execution_time,
+                ?module_path,
+                ?line,
+                "{description}code took longer than expected to execute",
+            );
+        } else {
+            trace!(
+                ?execution_time,
+                ?module_path,
+                ?line,
+                "{description}code timer finished",
+            );
+        }
+    }
+}
+
+impl Drop for CodeTimer {
+    #[track_caller]
+    fn drop(&mut self) {
+        self.finish_inner(None, None, "(dropped, cancelled, or aborted)")
+    }
+}
diff --git a/zebra-chain/src/fmt.rs b/zebra-chain/src/fmt.rs
index e5fa646c1..aaacf9737 100644
--- a/zebra-chain/src/fmt.rs
+++ b/zebra-chain/src/fmt.rs
@@ -8,7 +8,8 @@ use proptest::prelude::*;
 use proptest_derive::Arbitrary;
 
 pub mod time;
-pub use time::{humantime_milliseconds, humantime_seconds};
+
+pub use time::{duration_short, humantime_milliseconds, humantime_seconds};
 
 /// Wrapper to override `Debug`, redirecting it to only output the type's name.
 #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
diff --git a/zebra-chain/src/fmt/time.rs b/zebra-chain/src/fmt/time.rs
index 63a0b0ba8..4e8a3c340 100644
--- a/zebra-chain/src/fmt/time.rs
+++ b/zebra-chain/src/fmt/time.rs
@@ -2,6 +2,22 @@
 
 use std::time::Duration;
 
+/// The minimum amount of time displayed with only seconds (no milliseconds).
+pub const MIN_SECONDS_ONLY_TIME: Duration = Duration::from_secs(5);
+
+/// Returns a human-friendly formatted string for the whole number of seconds in `duration`.
+pub fn duration_short(duration: impl Into<Duration>) -> String {
+    let duration = duration.into();
+
+    if duration >= MIN_SECONDS_ONLY_TIME {
+        humantime_seconds(duration)
+    } else {
+        humantime_milliseconds(duration)
+    }
+}
+
+// TODO: rename these functions to duration_*
+
 /// Returns a human-friendly formatted string for the whole number of seconds in `duration`.
 pub fn humantime_seconds(duration: impl Into<Duration>) -> String {
     let duration = duration.into();
diff --git a/zebra-chain/src/lib.rs b/zebra-chain/src/lib.rs
index a0bd3da12..a9d20dc2e 100644
--- a/zebra-chain/src/lib.rs
+++ b/zebra-chain/src/lib.rs
@@ -9,15 +9,19 @@
 // Required by bitvec! macro
 #![recursion_limit = "256"]
 
+#[macro_use]
+extern crate bitflags;
+
 #[macro_use]
 extern crate serde;
 
 #[macro_use]
-extern crate bitflags;
+extern crate tracing;
 
 pub mod amount;
 pub mod block;
 pub mod chain_tip;
+pub mod diagnostic;
 pub mod fmt;
 pub mod history_tree;
 pub mod orchard;
diff --git a/zebra-state/src/service.rs b/zebra-state/src/service.rs
index d7e478ac6..8d440a25c 100644
--- a/zebra-state/src/service.rs
+++ b/zebra-state/src/service.rs
@@ -27,13 +27,14 @@ use std::{
 use futures::future::FutureExt;
 use tokio::sync::{oneshot, watch};
 use tower::{util::BoxService, Service};
-use tracing::instrument;
+use tracing::{instrument, Instrument, Span};
 
 #[cfg(any(test, feature = "proptest-impl"))]
 use tower::buffer::Buffer;
 
 use zebra_chain::{
     block,
+    diagnostic::CodeTimer,
     parameters::{Network, NetworkUpgrade},
     transparent,
 };
@@ -166,12 +167,19 @@ impl StateService {
         config: Config,
         network: Network,
     ) -> (Self, ReadStateService, LatestChainTip, ChainTipChange) {
+        let timer = CodeTimer::start();
         let disk = FinalizedState::new(&config, network);
+        timer.finish(module_path!(), line!(), "opening finalized state database");
+
+        let timer = CodeTimer::start();
         let initial_tip = disk
             .db()
             .tip_block()
             .map(FinalizedBlock::from)
             .map(ChainTipBlock::from);
+        timer.finish(module_path!(), line!(), "fetching database tip");
+
+        let timer = CodeTimer::start();
         let (chain_tip_sender, latest_chain_tip, chain_tip_change) =
             ChainTipSender::new(initial_tip, network);
 
@@ -192,8 +200,11 @@ impl StateService {
             chain_tip_sender,
             best_chain_sender,
         };
+        timer.finish(module_path!(), line!(), "initializing state service");
 
         tracing::info!("starting legacy chain check");
+        let timer = CodeTimer::start();
+
         if let Some(tip) = state.best_tip() {
             if let Some(nu5_activation_height) = NetworkUpgrade::Nu5.activation_height(network) {
                 if check::legacy_chain(
@@ -216,6 +227,7 @@ impl StateService {
             }
         }
         tracing::info!("no legacy chain found");
+        timer.finish(module_path!(), line!(), "legacy chain check");
 
         (state, read_only_service, latest_chain_tip, chain_tip_change)
     }
@@ -754,6 +766,8 @@ impl Service<Request> for StateService {
                     "type" => "commit_block",
                 );
 
+                let timer = CodeTimer::start();
+
                 self.assert_block_can_be_validated(&prepared);
 
                 self.pending_utxos
@@ -768,10 +782,16 @@ impl Service<Request> for StateService {
                 // Since each block is spawned into its own task,
                 // there shouldn't be any other code running in the same task,
                 // so we don't need to worry about blocking it:
-                // https://docs.rs/tokio/latest/tokio/task/fn.block_in_place.html#
-                let rsp_rx =
-                    tokio::task::block_in_place(|| self.queue_and_commit_non_finalized(prepared));
+                // https://docs.rs/tokio/latest/tokio/task/fn.block_in_place.html
+                let span = Span::current();
+                let rsp_rx = tokio::task::block_in_place(move || {
+                    span.in_scope(|| self.queue_and_commit_non_finalized(prepared))
+                });
 
+                // The work is all done, the future just waits on a channel for the result
+                timer.finish(module_path!(), line!(), "CommitBlock");
+
+                let span = Span::current();
                 async move {
                     rsp_rx
                         .await
@@ -784,6 +804,7 @@ impl Service<Request> for StateService {
                         .map(Response::Committed)
                         .map_err(Into::into)
                 }
+                .instrument(span)
                 .boxed()
             }
             Request::CommitFinalizedBlock(finalized) => {
@@ -794,6 +815,8 @@ impl Service<Request> for StateService {
                     "type" => "commit_finalized_block",
                 );
 
+                let timer = CodeTimer::start();
+
                 self.pending_utxos.check_against(&finalized.new_outputs);
 
                 // # Performance
@@ -802,9 +825,15 @@ impl Service<Request> for StateService {
                 // and written to disk.
                 //
                 // See the note in `CommitBlock` for more details.
-                let rsp_rx =
-                    tokio::task::block_in_place(|| self.queue_and_commit_finalized(finalized));
+                let span = Span::current();
+                let rsp_rx = tokio::task::block_in_place(move || {
+                    span.in_scope(|| self.queue_and_commit_finalized(finalized))
+                });
 
+                // The work is all done, the future just waits on a channel for the result
+                timer.finish(module_path!(), line!(), "CommitFinalizedBlock");
+
+                let span = Span::current();
                 async move {
                     rsp_rx
                         .await
@@ -819,6 +848,7 @@ impl Service<Request> for StateService {
                         .map(Response::Committed)
                         .map_err(Into::into)
                 }
+                .instrument(span)
                 .boxed()
             }
             Request::Depth(hash) => {
@@ -829,7 +859,14 @@ impl Service<Request> for StateService {
                     "type" => "depth",
                 );
 
+                let timer = CodeTimer::start();
+
+                // TODO: move this work into the future, like Block and Transaction?
                 let rsp = Ok(Response::Depth(self.best_depth(hash)));
+
+                // The work is all done, the future just returns the result.
+                timer.finish(module_path!(), line!(), "Depth");
+
                 async move { rsp }.boxed()
             }
             // TODO: consider spawning small reads into blocking tasks,
@@ -842,7 +879,14 @@ impl Service<Request> for StateService {
                     "type" => "tip",
                 );
 
+                let timer = CodeTimer::start();
+
+                // TODO: move this work into the future, like Block and Transaction?
                 let rsp = Ok(Response::Tip(self.best_tip()));
+
+                // The work is all done, the future just returns the result.
+                timer.finish(module_path!(), line!(), "Tip");
+
                 async move { rsp }.boxed()
             }
             Request::BlockLocator => {
@@ -853,9 +897,16 @@ impl Service<Request> for StateService {
                     "type" => "block_locator",
                 );
 
+                let timer = CodeTimer::start();
+
+                // TODO: move this work into the future, like Block and Transaction?
                 let rsp = Ok(Response::BlockLocator(
                     self.block_locator().unwrap_or_default(),
                 ));
+
+                // The work is all done, the future just returns the result.
+                timer.finish(module_path!(), line!(), "BlockLocator");
+
                 async move { rsp }.boxed()
             }
             Request::Transaction(hash) => {
@@ -866,6 +917,8 @@ impl Service<Request> for StateService {
                     "type" => "transaction",
                 );
 
+                let timer = CodeTimer::start();
+
                 // Prepare data for concurrent execution
                 let best_chain = self.mem.best_chain().cloned();
                 let db = self.disk.db().clone();
@@ -873,10 +926,16 @@ impl Service<Request> for StateService {
                 // # Performance
                 //
                 // Allow other async tasks to make progress while the transaction is being read from disk.
+                let span = Span::current();
                 tokio::task::spawn_blocking(move || {
-                    let rsp = read::transaction(best_chain, &db, hash);
+                    span.in_scope(|| {
+                        let rsp = read::transaction(best_chain, &db, hash);
 
-                    Ok(Response::Transaction(rsp.map(|(tx, _height)| tx)))
+                        // The work is done in the future.
+                        timer.finish(module_path!(), line!(), "Transaction");
+
+                        Ok(Response::Transaction(rsp.map(|(tx, _height)| tx)))
+                    })
                 })
                 .map(|join_result| join_result.expect("panic in Request::Transaction"))
                 .boxed()
@@ -889,6 +948,8 @@ impl Service<Request> for StateService {
                     "type" => "block",
                 );
 
+                let timer = CodeTimer::start();
+
                 // Prepare data for concurrent execution
                 let best_chain = self.mem.best_chain().cloned();
                 let db = self.disk.db().clone();
@@ -896,10 +957,16 @@ impl Service<Request> for StateService {
                 // # Performance
                 //
                 // Allow other async tasks to make progress while the block is being read from disk.
+                let span = Span::current();
                 tokio::task::spawn_blocking(move || {
-                    let rsp = read::block(best_chain, &db, hash_or_height);
+                    span.in_scope(move || {
+                        let rsp = read::block(best_chain, &db, hash_or_height);
 
-                    Ok(Response::Block(rsp))
+                        // The work is done in the future.
+                        timer.finish(module_path!(), line!(), "Block");
+
+                        Ok(Response::Block(rsp))
+                    })
                 })
                 .map(|join_result| join_result.expect("panic in Request::Block"))
                 .boxed()
@@ -912,13 +979,19 @@ impl Service<Request> for StateService {
                     "type" => "await_utxo",
                 );
 
+                let timer = CodeTimer::start();
+                let span = Span::current();
+
                 let fut = self.pending_utxos.queue(outpoint);
 
                 if let Some(utxo) = self.any_utxo(&outpoint) {
                     self.pending_utxos.respond(&outpoint, utxo);
                 }
 
-                fut.boxed()
+                // The future waits on a channel for a response.
+                timer.finish(module_path!(), line!(), "AwaitUtxo");
+
+                fut.instrument(span).boxed()
             }
             Request::FindBlockHashes { known_blocks, stop } => {
                 metrics::counter!(
@@ -929,8 +1002,16 @@ impl Service<Request> for StateService {
                 );
 
                 const MAX_FIND_BLOCK_HASHES_RESULTS: usize = 500;
+
+                let timer = CodeTimer::start();
+
+                // TODO: move this work into the future, like Block and Transaction?
                 let res =
                     self.find_best_chain_hashes(known_blocks, stop, MAX_FIND_BLOCK_HASHES_RESULTS);
+
+                // The work is all done, the future just returns the result.
+                timer.finish(module_path!(), line!(), "FindBlockHashes");
+
                 async move { Ok(Response::BlockHashes(res)) }.boxed()
             }
             Request::FindBlockHeaders { known_blocks, stop } => {
@@ -951,6 +1032,11 @@ impl Service<Request> for StateService {
                 //
                 // https://github.com/bitcoin/bitcoin/pull/4468/files#r17026905
                 let count = MAX_FIND_BLOCK_HEADERS_RESULTS - 2;
+
+                let timer = CodeTimer::start();
+
+                // TODO: move this work into the future, like Block and Transaction?
+                //       return heights instead, to improve lookup performance?
                 let res = self.find_best_chain_hashes(known_blocks, stop, count);
 
                 // And prepare data for concurrent execution
@@ -961,18 +1047,25 @@ impl Service<Request> for StateService {
                 //
                 // Now we have the chain hashes, we can read the headers concurrently,
                 // which allows other async tasks to make progress while data is being read from disk.
+                let span = Span::current();
                 tokio::task::spawn_blocking(move || {
-                    let res = res
-                        .iter()
-                        .map(|&hash| {
-                            let header = read::block_header(best_chain.clone(), &db, hash.into())
-                                .expect("block header for found hash is in the best chain");
+                    span.in_scope(move || {
+                        let res = res
+                            .iter()
+                            .map(|&hash| {
+                                let header =
+                                    read::block_header(best_chain.clone(), &db, hash.into())
+                                        .expect("block header for found hash is in the best chain");
 
-                            block::CountedHeader { header }
-                        })
-                        .collect();
+                                block::CountedHeader { header }
+                            })
+                            .collect();
 
-                    Ok(Response::BlockHeaders(res))
+                        // Some of the work is done in the future.
+                        timer.finish(module_path!(), line!(), "FindBlockHeaders");
+
+                        Ok(Response::BlockHeaders(res))
+                    })
                 })
                 .map(|join_result| join_result.expect("panic in Request::FindBlockHeaders"))
                 .boxed()
@@ -1003,17 +1096,25 @@ impl Service<ReadRequest> for ReadStateService {
                     "type" => "block",
                 );
 
+                let timer = CodeTimer::start();
+
                 let state = self.clone();
 
                 // # Performance
                 //
                 // Allow other async tasks to make progress while concurrently reading blocks from disk.
+                let span = Span::current();
                 tokio::task::spawn_blocking(move || {
-                    let block = state.best_chain_receiver.with_watch_data(|best_chain| {
-                        read::block(best_chain, &state.db, hash_or_height)
-                    });
+                    span.in_scope(move || {
+                        let block = state.best_chain_receiver.with_watch_data(|best_chain| {
+                            read::block(best_chain, &state.db, hash_or_height)
+                        });
 
-                    Ok(ReadResponse::Block(block))
+                        // The work is done in the future.
+                        timer.finish(module_path!(), line!(), "ReadRequest::Block");
+
+                        Ok(ReadResponse::Block(block))
+                    })
                 })
                 .map(|join_result| join_result.expect("panic in ReadRequest::Block"))
                 .boxed()
@@ -1028,18 +1129,26 @@ impl Service<ReadRequest> for ReadStateService {
                     "type" => "transaction",
                 );
 
+                let timer = CodeTimer::start();
+
                 let state = self.clone();
 
                 // # Performance
                 //
                 // Allow other async tasks to make progress while concurrently reading transactions from disk.
+                let span = Span::current();
                 tokio::task::spawn_blocking(move || {
-                    let transaction_and_height =
-                        state.best_chain_receiver.with_watch_data(|best_chain| {
-                            read::transaction(best_chain, &state.db, hash)
-                        });
+                    span.in_scope(move || {
+                        let transaction_and_height =
+                            state.best_chain_receiver.with_watch_data(|best_chain| {
+                                read::transaction(best_chain, &state.db, hash)
+                            });
 
-                    Ok(ReadResponse::Transaction(transaction_and_height))
+                        // The work is done in the future.
+                        timer.finish(module_path!(), line!(), "ReadRequest::Transaction");
+
+                        Ok(ReadResponse::Transaction(transaction_and_height))
+                    })
                 })
                 .map(|join_result| join_result.expect("panic in ReadRequest::Transaction"))
                 .boxed()
@@ -1053,17 +1162,26 @@ impl Service<ReadRequest> for ReadStateService {
                     "type" => "sapling_tree",
                 );
 
+                let timer = CodeTimer::start();
+
                 let state = self.clone();
 
                 // # Performance
                 //
                 // Allow other async tasks to make progress while concurrently reading trees from disk.
+                let span = Span::current();
                 tokio::task::spawn_blocking(move || {
-                    let sapling_tree = state.best_chain_receiver.with_watch_data(|best_chain| {
-                        read::sapling_tree(best_chain, &state.db, hash_or_height)
-                    });
+                    span.in_scope(move || {
+                        let sapling_tree =
+                            state.best_chain_receiver.with_watch_data(|best_chain| {
+                                read::sapling_tree(best_chain, &state.db, hash_or_height)
+                            });
 
-                    Ok(ReadResponse::SaplingTree(sapling_tree))
+                        // The work is done in the future.
+                        timer.finish(module_path!(), line!(), "ReadRequest::SaplingTree");
+
+                        Ok(ReadResponse::SaplingTree(sapling_tree))
+                    })
                 })
                 .map(|join_result| join_result.expect("panic in ReadRequest::SaplingTree"))
                 .boxed()
@@ -1077,17 +1195,26 @@ impl Service<ReadRequest> for ReadStateService {
                     "type" => "orchard_tree",
                 );
 
+                let timer = CodeTimer::start();
+
                 let state = self.clone();
 
                 // # Performance
                 //
                 // Allow other async tasks to make progress while concurrently reading trees from disk.
+                let span = Span::current();
                 tokio::task::spawn_blocking(move || {
-                    let orchard_tree = state.best_chain_receiver.with_watch_data(|best_chain| {
-                        read::orchard_tree(best_chain, &state.db, hash_or_height)
-                    });
+                    span.in_scope(move || {
+                        let orchard_tree =
+                            state.best_chain_receiver.with_watch_data(|best_chain| {
+                                read::orchard_tree(best_chain, &state.db, hash_or_height)
+                            });
 
-                    Ok(ReadResponse::OrchardTree(orchard_tree))
+                        // The work is done in the future.
+                        timer.finish(module_path!(), line!(), "ReadRequest::OrchardTree");
+
+                        Ok(ReadResponse::OrchardTree(orchard_tree))
+                    })
                 })
                 .map(|join_result| join_result.expect("panic in ReadRequest::OrchardTree"))
                 .boxed()
@@ -1105,17 +1232,29 @@ impl Service<ReadRequest> for ReadStateService {
                     "type" => "transaction_ids_by_addresses",
                 );
 
+                let timer = CodeTimer::start();
+
                 let state = self.clone();
 
                 // # Performance
                 //
                 // Allow other async tasks to make progress while concurrently reading transaction IDs from disk.
+                let span = Span::current();
                 tokio::task::spawn_blocking(move || {
-                    let tx_ids = state.best_chain_receiver.with_watch_data(|best_chain| {
-                        read::transparent_tx_ids(best_chain, &state.db, addresses, height_range)
-                    });
+                    span.in_scope(move || {
+                        let tx_ids = state.best_chain_receiver.with_watch_data(|best_chain| {
+                            read::transparent_tx_ids(best_chain, &state.db, addresses, height_range)
+                        });
 
-                    tx_ids.map(ReadResponse::AddressesTransactionIds)
+                        // The work is done in the future.
+                        timer.finish(
+                            module_path!(),
+                            line!(),
+                            "ReadRequest::TransactionIdsByAddresses",
+                        );
+
+                        tx_ids.map(ReadResponse::AddressesTransactionIds)
+                    })
                 })
                 .map(|join_result| {
                     join_result.expect("panic in ReadRequest::TransactionIdsByAddresses")
@@ -1132,17 +1271,25 @@ impl Service<ReadRequest> for ReadStateService {
                     "type" => "address_balance",
                 );
 
+                let timer = CodeTimer::start();
+
                 let state = self.clone();
 
                 // # Performance
                 //
                 // Allow other async tasks to make progress while concurrently reading balances from disk.
+                let span = Span::current();
                 tokio::task::spawn_blocking(move || {
-                    let balance = state.best_chain_receiver.with_watch_data(|best_chain| {
-                        read::transparent_balance(best_chain, &state.db, addresses)
-                    })?;
+                    span.in_scope(move || {
+                        let balance = state.best_chain_receiver.with_watch_data(|best_chain| {
+                            read::transparent_balance(best_chain, &state.db, addresses)
+                        })?;
 
-                    Ok(ReadResponse::AddressBalance(balance))
+                        // The work is done in the future.
+                        timer.finish(module_path!(), line!(), "ReadRequest::AddressBalance");
+
+                        Ok(ReadResponse::AddressBalance(balance))
+                    })
                 })
                 .map(|join_result| join_result.expect("panic in ReadRequest::AddressBalance"))
                 .boxed()
@@ -1157,17 +1304,25 @@ impl Service<ReadRequest> for ReadStateService {
                     "type" => "utxos_by_addresses",
                 );
 
+                let timer = CodeTimer::start();
+
                 let state = self.clone();
 
                 // # Performance
                 //
                 // Allow other async tasks to make progress while concurrently reading UTXOs from disk.
+                let span = Span::current();
                 tokio::task::spawn_blocking(move || {
-                    let utxos = state.best_chain_receiver.with_watch_data(|best_chain| {
-                        read::transparent_utxos(state.network, best_chain, &state.db, addresses)
-                    });
+                    span.in_scope(move || {
+                        let utxos = state.best_chain_receiver.with_watch_data(|best_chain| {
+                            read::transparent_utxos(state.network, best_chain, &state.db, addresses)
+                        });
 
-                    utxos.map(ReadResponse::Utxos)
+                        // The work is done in the future.
+                        timer.finish(module_path!(), line!(), "ReadRequest::UtxosByAddresses");
+
+                        utxos.map(ReadResponse::Utxos)
+                    })
                 })
                 .map(|join_result| join_result.expect("panic in ReadRequest::UtxosByAddresses"))
                 .boxed()