From 47cf0f475fe09992d3b20d9d2aeaedb865593db2 Mon Sep 17 00:00:00 2001
From: Deirdre Connolly <durumcrustulum@gmail.com>
Date: Mon, 13 Mar 2023 14:10:15 -0400
Subject: [PATCH] Halborn 2023 02 20 (#6297)

* Limit version user agents to 256 bytes, rather than 2MB, needs failure tests

* Limit all inv messages to 50,000 entries, existing tests cover this

* Limit reject message strings based on network protocol, needs success and failure tests

* Catch up as fast as possible if inventory rotation is delayed, existing tests cover this

* Truncate inv channel hashes to 1000, needs success and failure tests

* Limit inv registry size to 4 MB, needs over-limit tests for inv and addr

* Test size constraints on version user agent, reject command, and reject reason (#13)

* Test inventory registry memory limits for hashes and peers (#14)

Co-authored-by: Deirdre Connolly <durumcrustulum@gmail.com>

---------

Co-authored-by: teor <teor@riseup.net>
Co-authored-by: Arya <aryasolhi@gmail.com>
---
 zebra-chain/src/serialization.rs              |   5 +-
 .../src/serialization/zcash_deserialize.rs    |  23 ++-
 .../src/peer_set/inventory_registry.rs        | 130 +++++++++++--
 .../inventory_registry/tests/vectors.rs       | 102 +++++++++-
 zebra-network/src/protocol/external/codec.rs  |  85 ++++++++-
 .../protocol/external/codec/tests/vectors.rs  | 179 ++++++++++++++++++
 zebra-network/src/protocol/external/inv.rs    |  16 +-
 .../src/protocol/external/message.rs          |  18 +-
 .../protocol/external/tests/preallocate.rs    |   9 +-
 9 files changed, 531 insertions(+), 36 deletions(-)

diff --git a/zebra-chain/src/serialization.rs b/zebra-chain/src/serialization.rs
index b05f13bec..6a5155bc9 100644
--- a/zebra-chain/src/serialization.rs
+++ b/zebra-chain/src/serialization.rs
@@ -32,8 +32,9 @@ pub use error::SerializationError;
 pub use read_zcash::ReadZcashExt;
 pub use write_zcash::WriteZcashExt;
 pub use zcash_deserialize::{
-    zcash_deserialize_bytes_external_count, zcash_deserialize_external_count, TrustedPreallocate,
-    ZcashDeserialize, ZcashDeserializeInto,
+    zcash_deserialize_bytes_external_count, zcash_deserialize_external_count,
+    zcash_deserialize_string_external_count, TrustedPreallocate, ZcashDeserialize,
+    ZcashDeserializeInto,
 };
 pub use zcash_serialize::{
     zcash_serialize_bytes, zcash_serialize_bytes_external_count, zcash_serialize_empty_list,
diff --git a/zebra-chain/src/serialization/zcash_deserialize.rs b/zebra-chain/src/serialization/zcash_deserialize.rs
index 3f8444567..391b29382 100644
--- a/zebra-chain/src/serialization/zcash_deserialize.rs
+++ b/zebra-chain/src/serialization/zcash_deserialize.rs
@@ -120,11 +120,28 @@ pub fn zcash_deserialize_bytes_external_count<R: io::Read>(
     Ok(vec)
 }
 
+/// `zcash_deserialize_external_count`, specialised for [`String`].
+/// The external count is in bytes. (Not UTF-8 characters.)
+///
+/// This allows us to optimize the inner loop into a single call to `read_exact()`.
+///
+/// This function has a `zcash_` prefix to alert the reader that the
+/// serialization in use is consensus-critical serialization, rather than
+/// some other kind of serialization.
+pub fn zcash_deserialize_string_external_count<R: io::Read>(
+    external_byte_count: usize,
+    reader: R,
+) -> Result<String, SerializationError> {
+    let bytes = zcash_deserialize_bytes_external_count(external_byte_count, reader)?;
+
+    String::from_utf8(bytes).map_err(|_| SerializationError::Parse("invalid utf-8"))
+}
+
 /// Read a Bitcoin-encoded UTF-8 string.
 impl ZcashDeserialize for String {
-    fn zcash_deserialize<R: io::Read>(reader: R) -> Result<Self, SerializationError> {
-        let bytes: Vec<_> = Vec::zcash_deserialize(reader)?;
-        String::from_utf8(bytes).map_err(|_| SerializationError::Parse("invalid utf-8"))
+    fn zcash_deserialize<R: io::Read>(mut reader: R) -> Result<Self, SerializationError> {
+        let byte_count: CompactSizeMessage = (&mut reader).zcash_deserialize_into()?;
+        zcash_deserialize_string_external_count(byte_count.into(), reader)
     }
 }
 
diff --git a/zebra-network/src/peer_set/inventory_registry.rs b/zebra-network/src/peer_set/inventory_registry.rs
index ec7387c5d..4b19a72c0 100644
--- a/zebra-network/src/peer_set/inventory_registry.rs
+++ b/zebra-network/src/peer_set/inventory_registry.rs
@@ -3,14 +3,13 @@
 //! [RFC]: https://zebra.zfnd.org/dev/rfcs/0003-inventory-tracking.html
 
 use std::{
-    collections::HashMap,
-    convert::TryInto,
     net::SocketAddr,
     pin::Pin,
     task::{Context, Poll},
 };
 
 use futures::{FutureExt, Stream, StreamExt};
+use indexmap::IndexMap;
 use tokio::{
     sync::broadcast,
     time::{self, Instant},
@@ -35,6 +34,40 @@ pub mod update;
 #[cfg(test)]
 mod tests;
 
+/// The maximum number of inventory hashes we will track from a single peer.
+///
+/// # Security
+///
+/// This limits known memory denial of service attacks like <https://invdos.net/> to a total of:
+/// ```text
+/// 1000 inventory * 2 maps * 32-64 bytes per inventory = less than 1 MB
+/// 1000 inventory * 70 peers * 2 maps * 6-18 bytes per address = up to 3 MB
+/// ```
+///
+/// Since the inventory registry is an efficiency optimisation, which falls back to a
+/// random peer, we only need to track a small number of hashes for available inventory.
+///
+/// But we want to be able to track a significant amount of missing inventory,
+/// to limit queries for globally missing inventory.
+//
+// TODO: split this into available (25) and missing (1000 or more?)
+pub const MAX_INV_PER_MAP: usize = 1000;
+
+/// The maximum number of peers we will track inventory for.
+///
+/// # Security
+///
+/// This limits known memory denial of service attacks. See [`MAX_INV_PER_MAP`] for details.
+///
+/// Since the inventory registry is an efficiency optimisation, which falls back to a
+/// random peer, we only need to track a small number of peers per inv for available inventory.
+///
+/// But we want to be able to track missing inventory for almost all our peers,
+/// so we only query a few peers for inventory that is genuinely missing from the network.
+//
+// TODO: split this into available (25) and missing (70)
+pub const MAX_PEERS_PER_INV: usize = 70;
+
 /// A peer inventory status, which tracks a hash for both available and missing inventory.
 pub type InventoryStatus<T> = InventoryResponse<T, T>;
 
@@ -59,10 +92,12 @@ type InventoryMarker = InventoryStatus<()>;
 pub struct InventoryRegistry {
     /// Map tracking the latest inventory status from the current interval
     /// period.
-    current: HashMap<InventoryHash, HashMap<SocketAddr, InventoryMarker>>,
+    //
+    // TODO: split maps into available and missing, so we can limit them separately.
+    current: IndexMap<InventoryHash, IndexMap<SocketAddr, InventoryMarker>>,
 
     /// Map tracking inventory statuses from the previous interval period.
-    prev: HashMap<InventoryHash, HashMap<SocketAddr, InventoryMarker>>,
+    prev: IndexMap<InventoryHash, IndexMap<SocketAddr, InventoryMarker>>,
 
     /// Stream of incoming inventory statuses to register.
     inv_stream: Pin<
@@ -99,7 +134,17 @@ impl InventoryChange {
         hashes: impl IntoIterator<Item = &'a InventoryHash>,
         peer: SocketAddr,
     ) -> Option<Self> {
-        let hashes: Vec<InventoryHash> = hashes.into_iter().copied().collect();
+        let mut hashes: Vec<InventoryHash> = hashes.into_iter().copied().collect();
+
+        // # Security
+        //
+        // Don't send more hashes than we're going to store.
+        // It doesn't matter which hashes we choose, because this is an efficiency optimisation.
+        //
+        //  This limits known memory denial of service attacks to:
+        // `1000 hashes * 200 peers/channel capacity * 32-64 bytes = up to 12 MB`
+        hashes.truncate(MAX_INV_PER_MAP);
+
         let hashes = hashes.try_into().ok();
 
         hashes.map(|hashes| InventoryStatus::Available((hashes, peer)))
@@ -110,7 +155,14 @@ impl InventoryChange {
         hashes: impl IntoIterator<Item = &'a InventoryHash>,
         peer: SocketAddr,
     ) -> Option<Self> {
-        let hashes: Vec<InventoryHash> = hashes.into_iter().copied().collect();
+        let mut hashes: Vec<InventoryHash> = hashes.into_iter().copied().collect();
+
+        // # Security
+        //
+        // Don't send more hashes than we're going to store.
+        // It doesn't matter which hashes we choose, because this is an efficiency optimisation.
+        hashes.truncate(MAX_INV_PER_MAP);
+
         let hashes = hashes.try_into().ok();
 
         hashes.map(|hashes| InventoryStatus::Missing((hashes, peer)))
@@ -149,8 +201,15 @@ impl InventoryRegistry {
 
         // Don't do an immediate rotation, current and prev are already empty.
         let mut interval = tokio::time::interval_at(Instant::now() + interval, interval);
-        // SECURITY: if the rotation time is late, delay future rotations by the same amount
-        interval.set_missed_tick_behavior(time::MissedTickBehavior::Delay);
+        // # Security
+        //
+        // If the rotation time is late, execute as many ticks as needed to catch up.
+        // This is a tradeoff between memory usage and quickly accessing remote data
+        // under heavy load. Bursting prioritises lower memory usage.
+        //
+        // Skipping or delaying could keep peer inventory in memory for a longer time,
+        // further increasing memory load or delays due to virtual memory swapping.
+        interval.set_missed_tick_behavior(time::MissedTickBehavior::Burst);
 
         Self {
             current: Default::default(),
@@ -206,6 +265,17 @@ impl InventoryRegistry {
             .is_some()
     }
 
+    /// Returns an iterator over peer inventory status hashes.
+    ///
+    /// Yields current statuses first, then previously rotated statuses.
+    /// This can include multiple statuses for the same hash.
+    #[allow(dead_code)]
+    pub fn status_hashes(
+        &self,
+    ) -> impl Iterator<Item = (&InventoryHash, &IndexMap<SocketAddr, InventoryMarker>)> {
+        self.current.iter().chain(self.prev.iter())
+    }
+
     /// Returns a future that polls once for new registry updates.
     #[allow(dead_code)]
     pub fn update(&mut self) -> Update {
@@ -219,8 +289,19 @@ impl InventoryRegistry {
     /// - rotates HashMaps based on interval events
     /// - drains the inv_stream channel and registers all advertised inventory
     pub fn poll_inventory(&mut self, cx: &mut Context<'_>) -> Result<(), BoxError> {
-        // Correctness: Registers the current task for wakeup when the timer next becomes ready.
-        while Pin::new(&mut self.interval).poll_next(cx).is_ready() {
+        // # Correctness
+        //
+        // Registers the current task for wakeup when the timer next becomes ready.
+        //
+        // # Security
+        //
+        // Only rotate one inventory per peer request, to give the next inventory
+        // time to gather some peer advertisements. This is a tradeoff between
+        // memory usage and quickly accessing remote data under heavy load.
+        //
+        // This prevents a burst edge case where all inventory is emptied after
+        // two interval ticks are delayed.
+        if Pin::new(&mut self.interval).poll_next(cx).is_ready() {
             self.rotate();
         }
 
@@ -274,13 +355,13 @@ impl InventoryRegistry {
                 "unexpected inventory type: {inv:?} from peer: {addr:?}",
             );
 
-            let current = self.current.entry(inv).or_default();
+            let hash_peers = self.current.entry(inv).or_default();
 
             // # Security
             //
             // Prefer `missing` over `advertised`, so malicious peers can't reset their own entries,
             // and funnel multiple failing requests to themselves.
-            if let Some(old_status) = current.get(&addr) {
+            if let Some(old_status) = hash_peers.get(&addr) {
                 if old_status.is_missing() && new_status.is_available() {
                     debug!(?new_status, ?old_status, ?addr, ?inv, "skipping new status");
                     continue;
@@ -295,7 +376,8 @@ impl InventoryRegistry {
                 );
             }
 
-            let replaced_status = current.insert(addr, new_status);
+            let replaced_status = hash_peers.insert(addr, new_status);
+
             debug!(
                 ?new_status,
                 ?replaced_status,
@@ -303,6 +385,28 @@ impl InventoryRegistry {
                 ?inv,
                 "inserted new status"
             );
+
+            // # Security
+            //
+            // Limit the number of stored peers per hash, removing the oldest entries,
+            // because newer entries are likely to be more relevant.
+            //
+            // TODO: do random or weighted random eviction instead?
+            if hash_peers.len() > MAX_PEERS_PER_INV {
+                // Performance: `MAX_PEERS_PER_INV` is small, so O(n) performance is acceptable.
+                hash_peers.shift_remove_index(0);
+            }
+
+            // # Security
+            //
+            // Limit the number of stored inventory hashes, removing the oldest entries,
+            // because newer entries are likely to be more relevant.
+            //
+            // TODO: do random or weighted random eviction instead?
+            if self.current.len() > MAX_INV_PER_MAP {
+                // Performance: `MAX_INV_PER_MAP` is small, so O(n) performance is acceptable.
+                self.current.shift_remove_index(0);
+            }
         }
     }
 
diff --git a/zebra-network/src/peer_set/inventory_registry/tests/vectors.rs b/zebra-network/src/peer_set/inventory_registry/tests/vectors.rs
index 3d9180477..2aafc96d3 100644
--- a/zebra-network/src/peer_set/inventory_registry/tests/vectors.rs
+++ b/zebra-network/src/peer_set/inventory_registry/tests/vectors.rs
@@ -1,9 +1,14 @@
 //! Fixed test vectors for the inventory registry.
 
-use zebra_chain::block;
+use std::{cmp::min, net::SocketAddr};
+
+use zebra_chain::{block, serialization::AtLeastOne, transaction};
 
 use crate::{
-    peer_set::inventory_registry::{tests::new_inv_registry, InventoryStatus},
+    peer_set::inventory_registry::{
+        tests::new_inv_registry, InventoryMarker, InventoryStatus, MAX_INV_PER_MAP,
+        MAX_PEERS_PER_INV,
+    },
     protocol::external::InventoryHash,
 };
 
@@ -182,3 +187,96 @@ async fn inv_registry_prefer_current_order(missing_current: bool) {
         assert_eq!(inv_registry.missing_peers(test_hash).count(), 0);
     }
 }
+
+/// Check inventory registration limits.
+#[tokio::test]
+async fn inv_registry_limit() {
+    inv_registry_limit_for(InventoryMarker::Available(())).await;
+    inv_registry_limit_for(InventoryMarker::Missing(())).await;
+}
+
+/// Check the inventory registration limit for `status`.
+async fn inv_registry_limit_for(status: InventoryMarker) {
+    let single_test_hash = InventoryHash::Block(block::Hash([0xbb; 32]));
+    let single_test_peer = "1.1.1.1:1"
+        .parse()
+        .expect("unexpected invalid peer address");
+
+    let (mut inv_registry, inv_stream_tx) = new_inv_registry();
+
+    // Check hash limit
+    for hash_count in 0..(MAX_INV_PER_MAP + 10) {
+        let mut test_hash = hash_count.to_ne_bytes().to_vec();
+        test_hash.resize(32, 0);
+        let test_hash = InventoryHash::Tx(transaction::Hash(test_hash.try_into().unwrap()));
+
+        let test_change = status.map(|()| (AtLeastOne::from_one(test_hash), single_test_peer));
+
+        let receiver_count = inv_stream_tx
+            .send(test_change)
+            .expect("unexpected failed inventory status send");
+
+        assert_eq!(receiver_count, 1);
+
+        inv_registry
+            .update()
+            .await
+            .expect("unexpected dropped registry sender channel");
+
+        if status.is_available() {
+            assert_eq!(inv_registry.advertising_peers(test_hash).count(), 1);
+            assert_eq!(inv_registry.missing_peers(test_hash).count(), 0);
+        } else {
+            assert_eq!(inv_registry.advertising_peers(test_hash).count(), 0);
+            assert_eq!(inv_registry.missing_peers(test_hash).count(), 1);
+        }
+
+        // SECURITY: limit inventory memory usage
+        assert_eq!(
+            inv_registry.status_hashes().count(),
+            min(hash_count + 1, MAX_INV_PER_MAP),
+        );
+    }
+
+    // Check peer address per hash limit
+    let (mut inv_registry, inv_stream_tx) = new_inv_registry();
+
+    for peer_count in 0..(MAX_PEERS_PER_INV + 10) {
+        let test_peer = SocketAddr::new(
+            "2.2.2.2".parse().unwrap(),
+            peer_count.try_into().expect("fits in u16"),
+        );
+
+        let test_change = status.map(|()| (AtLeastOne::from_one(single_test_hash), test_peer));
+
+        let receiver_count = inv_stream_tx
+            .send(test_change)
+            .expect("unexpected failed inventory status send");
+
+        assert_eq!(receiver_count, 1);
+
+        inv_registry
+            .update()
+            .await
+            .expect("unexpected dropped registry sender channel");
+
+        assert_eq!(inv_registry.status_hashes().count(), 1);
+
+        let limited_count = min(peer_count + 1, MAX_PEERS_PER_INV);
+
+        // SECURITY: limit inventory memory usage
+        if status.is_available() {
+            assert_eq!(
+                inv_registry.advertising_peers(single_test_hash).count(),
+                limited_count,
+            );
+            assert_eq!(inv_registry.missing_peers(single_test_hash).count(), 0);
+        } else {
+            assert_eq!(inv_registry.advertising_peers(single_test_hash).count(), 0);
+            assert_eq!(
+                inv_registry.missing_peers(single_test_hash).count(),
+                limited_count,
+            );
+        }
+    }
+}
diff --git a/zebra-network/src/protocol/external/codec.rs b/zebra-network/src/protocol/external/codec.rs
index 8914a26a1..aec54772c 100644
--- a/zebra-network/src/protocol/external/codec.rs
+++ b/zebra-network/src/protocol/external/codec.rs
@@ -15,9 +15,9 @@ use zebra_chain::{
     block::{self, Block},
     parameters::Network,
     serialization::{
-        sha256d, zcash_deserialize_bytes_external_count, FakeWriter, ReadZcashExt,
-        SerializationError as Error, ZcashDeserialize, ZcashDeserializeInto, ZcashSerialize,
-        MAX_PROTOCOL_MESSAGE_LEN,
+        sha256d, zcash_deserialize_bytes_external_count, zcash_deserialize_string_external_count,
+        CompactSizeMessage, FakeWriter, ReadZcashExt, SerializationError as Error,
+        ZcashDeserialize, ZcashDeserializeInto, ZcashSerialize, MAX_PROTOCOL_MESSAGE_LEN,
     },
     transaction::Transaction,
 };
@@ -26,7 +26,10 @@ use crate::constants;
 
 use super::{
     addr::{AddrInVersion, AddrV1, AddrV2},
-    message::{Message, RejectReason, VersionMessage},
+    message::{
+        Message, RejectReason, VersionMessage, MAX_REJECT_MESSAGE_LENGTH, MAX_REJECT_REASON_LENGTH,
+        MAX_USER_AGENT_LENGTH,
+    },
     types::*,
 };
 
@@ -220,6 +223,14 @@ impl Codec {
                 address_from.zcash_serialize(&mut writer)?;
 
                 writer.write_u64::<LittleEndian>(nonce.0)?;
+
+                if user_agent.as_bytes().len() > MAX_USER_AGENT_LENGTH {
+                    // zcashd won't accept this version message
+                    return Err(Error::Parse(
+                        "user agent too long: must be 256 bytes or less",
+                    ));
+                }
+
                 user_agent.zcash_serialize(&mut writer)?;
                 writer.write_u32::<LittleEndian>(start_height.0)?;
                 writer.write_u8(*relay as u8)?;
@@ -237,8 +248,23 @@ impl Codec {
                 reason,
                 data,
             } => {
+                if message.as_bytes().len() > MAX_REJECT_MESSAGE_LENGTH {
+                    // zcashd won't accept this reject message
+                    return Err(Error::Parse(
+                        "reject message too long: must be 12 bytes or less",
+                    ));
+                }
+
                 message.zcash_serialize(&mut writer)?;
+
                 writer.write_u8(*ccode as u8)?;
+
+                if reason.as_bytes().len() > MAX_REJECT_REASON_LENGTH {
+                    return Err(Error::Parse(
+                        "reject reason too long: must be 111 bytes or less",
+                    ));
+                }
+
                 reason.zcash_serialize(&mut writer)?;
                 if let Some(data) = data {
                     writer.write_all(data)?;
@@ -487,7 +513,24 @@ impl Codec {
             address_recv: AddrInVersion::zcash_deserialize(&mut reader)?,
             address_from: AddrInVersion::zcash_deserialize(&mut reader)?,
             nonce: Nonce(reader.read_u64::<LittleEndian>()?),
-            user_agent: String::zcash_deserialize(&mut reader)?,
+            user_agent: {
+                let byte_count: CompactSizeMessage = (&mut reader).zcash_deserialize_into()?;
+                let byte_count: usize = byte_count.into();
+
+                // # Security
+                //
+                // Limit peer set memory usage, Zebra stores an `Arc<VersionMessage>` per
+                // connected peer.
+                //
+                // Without this check, we can use `200 peers * 2 MB message size limit = 400 MB`.
+                if byte_count > MAX_USER_AGENT_LENGTH {
+                    return Err(Error::Parse(
+                        "user agent too long: must be 256 bytes or less",
+                    ));
+                }
+
+                zcash_deserialize_string_external_count(byte_count, &mut reader)?
+            },
             start_height: block::Height(reader.read_u32::<LittleEndian>()?),
             relay: match reader.read_u8() {
                 Ok(val @ 0..=1) => val == 1,
@@ -513,7 +556,21 @@ impl Codec {
 
     fn read_reject<R: Read>(&self, mut reader: R) -> Result<Message, Error> {
         Ok(Message::Reject {
-            message: String::zcash_deserialize(&mut reader)?,
+            message: {
+                let byte_count: CompactSizeMessage = (&mut reader).zcash_deserialize_into()?;
+                let byte_count: usize = byte_count.into();
+
+                // # Security
+                //
+                // Limit log size on disk, Zebra might print large reject messages to disk.
+                if byte_count > MAX_REJECT_MESSAGE_LENGTH {
+                    return Err(Error::Parse(
+                        "reject message too long: must be 12 bytes or less",
+                    ));
+                }
+
+                zcash_deserialize_string_external_count(byte_count, &mut reader)?
+            },
             ccode: match reader.read_u8()? {
                 0x01 => RejectReason::Malformed,
                 0x10 => RejectReason::Invalid,
@@ -526,7 +583,21 @@ impl Codec {
                 0x50 => RejectReason::Other,
                 _ => return Err(Error::Parse("invalid RejectReason value in ccode field")),
             },
-            reason: String::zcash_deserialize(&mut reader)?,
+            reason: {
+                let byte_count: CompactSizeMessage = (&mut reader).zcash_deserialize_into()?;
+                let byte_count: usize = byte_count.into();
+
+                // # Security
+                //
+                // Limit log size on disk, Zebra might print large reject messages to disk.
+                if byte_count > MAX_REJECT_REASON_LENGTH {
+                    return Err(Error::Parse(
+                        "reject reason too long: must be 111 bytes or less",
+                    ));
+                }
+
+                zcash_deserialize_string_external_count(byte_count, &mut reader)?
+            },
             // Sometimes there's data, sometimes there isn't. There's no length
             // field, this is just implicitly encoded by the body_len.
             // Apparently all existing implementations only supply 32 bytes of
diff --git a/zebra-network/src/protocol/external/codec/tests/vectors.rs b/zebra-network/src/protocol/external/codec/tests/vectors.rs
index 0804c6337..89c6b08f2 100644
--- a/zebra-network/src/protocol/external/codec/tests/vectors.rs
+++ b/zebra-network/src/protocol/external/codec/tests/vectors.rs
@@ -406,3 +406,182 @@ fn version_message_with_relay() {
 
     assert!(!relay, "relay should be false");
 }
+
+/// Check that the codec enforces size limits on `user_agent` field of version messages.
+#[test]
+fn version_user_agent_size_limits() {
+    let _init_guard = zebra_test::init();
+    let codec = Codec::builder().finish();
+    let mut bytes = BytesMut::new();
+    let [valid_version_message, invalid_version_message]: [Message; 2] = {
+        let services = PeerServices::NODE_NETWORK;
+        let timestamp = Utc
+            .timestamp_opt(1_568_000_000, 0)
+            .single()
+            .expect("in-range number of seconds and valid nanosecond");
+
+        [
+            "X".repeat(MAX_USER_AGENT_LENGTH),
+            "X".repeat(MAX_USER_AGENT_LENGTH + 1),
+        ]
+        .map(|user_agent| {
+            VersionMessage {
+                version: crate::constants::CURRENT_NETWORK_PROTOCOL_VERSION,
+                services,
+                timestamp,
+                address_recv: AddrInVersion::new(
+                    SocketAddr::new(IpAddr::V4(Ipv4Addr::new(203, 0, 113, 6)), 8233),
+                    services,
+                ),
+                address_from: AddrInVersion::new(
+                    SocketAddr::new(IpAddr::V4(Ipv4Addr::new(203, 0, 113, 6)), 8233),
+                    services,
+                ),
+                nonce: Nonce(0x9082_4908_8927_9238),
+                user_agent,
+                start_height: block::Height(540_000),
+                relay: true,
+            }
+            .into()
+        })
+    };
+
+    // Check that encoding and decoding will succeed when the user_agent is not longer than MAX_USER_AGENT_LENGTH
+    codec
+        .write_body(&valid_version_message, &mut (&mut bytes).writer())
+        .expect("encoding valid version msg should succeed");
+    codec
+        .read_version(Cursor::new(&bytes))
+        .expect("decoding valid version msg should succeed");
+
+    bytes.clear();
+
+    let mut writer = (&mut bytes).writer();
+
+    // Check that encoding will return an error when the user_agent is longer than MAX_USER_AGENT_LENGTH
+    match codec.write_body(&invalid_version_message, &mut writer) {
+        Err(Error::Parse(error_msg)) if error_msg.contains("user agent too long") => {}
+        result => panic!("expected write error: user agent too long, got: {result:?}"),
+    };
+
+    // Encode the rest of the message onto `bytes` (relay should be optional)
+    {
+        let Message::Version(VersionMessage {
+        user_agent,
+        start_height,
+        ..
+    }) = invalid_version_message else {
+        unreachable!("version_message is a version");
+    };
+
+        user_agent
+            .zcash_serialize(&mut writer)
+            .expect("writing user_agent should succeed");
+        writer
+            .write_u32::<LittleEndian>(start_height.0)
+            .expect("writing start_height should succeed");
+    }
+
+    // Check that decoding will return an error when the user_agent is longer than MAX_USER_AGENT_LENGTH
+    match codec.read_version(Cursor::new(&bytes)) {
+        Err(Error::Parse(error_msg)) if error_msg.contains("user agent too long") => {}
+        result => panic!("expected read error: user agent too long, got: {result:?}"),
+    };
+}
+
+/// Check that the codec enforces size limits on `message` and `reason` fields of reject messages.
+#[test]
+fn reject_command_and_reason_size_limits() {
+    let _init_guard = zebra_test::init();
+    let codec = Codec::builder().finish();
+    let mut bytes = BytesMut::new();
+
+    let valid_message = "X".repeat(MAX_REJECT_MESSAGE_LENGTH);
+    let invalid_message = "X".repeat(MAX_REJECT_MESSAGE_LENGTH + 1);
+    let valid_reason = "X".repeat(MAX_REJECT_REASON_LENGTH);
+    let invalid_reason = "X".repeat(MAX_REJECT_REASON_LENGTH + 1);
+
+    let valid_reject_message = Message::Reject {
+        message: valid_message.clone(),
+        ccode: RejectReason::Invalid,
+        reason: valid_reason.clone(),
+        data: None,
+    };
+
+    // Check that encoding and decoding will succeed when `message` and `reason` fields are within size limits.
+    codec
+        .write_body(&valid_reject_message, &mut (&mut bytes).writer())
+        .expect("encoding valid reject msg should succeed");
+    codec
+        .read_reject(Cursor::new(&bytes))
+        .expect("decoding valid reject msg should succeed");
+
+    let invalid_reject_messages = [
+        (
+            "reject message too long",
+            Message::Reject {
+                message: invalid_message,
+                ccode: RejectReason::Invalid,
+                reason: valid_reason,
+                data: None,
+            },
+        ),
+        (
+            "reject reason too long",
+            Message::Reject {
+                message: valid_message,
+                ccode: RejectReason::Invalid,
+                reason: invalid_reason,
+                data: None,
+            },
+        ),
+    ];
+
+    for (expected_err_msg, invalid_reject_message) in invalid_reject_messages {
+        // Check that encoding will return an error when the reason or message are too long.
+        match codec.write_body(&invalid_reject_message, &mut (&mut bytes).writer()) {
+            Err(Error::Parse(error_msg)) if error_msg.contains(expected_err_msg) => {}
+            result => panic!("expected write error: {expected_err_msg}, got: {result:?}"),
+        };
+
+        bytes.clear();
+
+        // Encode the message onto `bytes` without size checks
+        {
+            let Message::Reject {
+                message,
+                ccode,
+                reason,
+                data,
+            } = invalid_reject_message else {
+                unreachable!("invalid_reject_message is a reject");
+            };
+
+            let mut writer = (&mut bytes).writer();
+
+            message
+                .zcash_serialize(&mut writer)
+                .expect("writing message should succeed");
+
+            writer
+                .write_u8(ccode as u8)
+                .expect("writing ccode should succeed");
+
+            reason
+                .zcash_serialize(&mut writer)
+                .expect("writing reason should succeed");
+
+            if let Some(data) = data {
+                writer
+                    .write_all(&data)
+                    .expect("writing data should succeed");
+            }
+        }
+
+        // Check that decoding will return an error when the reason or message are too long.
+        match codec.read_reject(Cursor::new(&bytes)) {
+            Err(Error::Parse(error_msg)) if error_msg.contains(expected_err_msg) => {}
+            result => panic!("expected read error: {expected_err_msg}, got: {result:?}"),
+        };
+    }
+}
diff --git a/zebra-network/src/protocol/external/inv.rs b/zebra-network/src/protocol/external/inv.rs
index 7f3713391..d112b4991 100644
--- a/zebra-network/src/protocol/external/inv.rs
+++ b/zebra-network/src/protocol/external/inv.rs
@@ -1,6 +1,9 @@
 //! Inventory items for the Zcash network protocol.
 
-use std::io::{Read, Write};
+use std::{
+    cmp::min,
+    io::{Read, Write},
+};
 
 use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
 
@@ -176,11 +179,20 @@ impl ZcashDeserialize for InventoryHash {
 /// The minimum serialized size of an [`InventoryHash`].
 pub(crate) const MIN_INV_HASH_SIZE: usize = 36;
 
+/// The maximum number of transaction inventory items in a network message.
+/// We also use this limit for block inventory, because it is typically much smaller.
+///
+/// Same as `MAX_INV_SZ` in `zcashd`:
+/// <https://github.com/zcash/zcash/blob/adfc7218435faa1c8985a727f997a795dcffa0c7/src/net.h#L50>
+pub const MAX_TX_INV_IN_MESSAGE: u64 = 50_000;
+
 impl TrustedPreallocate for InventoryHash {
     fn max_allocation() -> u64 {
         // An Inventory hash takes at least 36 bytes, and we reserve at least one byte for the
         // Vector length so we can never receive more than ((MAX_PROTOCOL_MESSAGE_LEN - 1) / 36) in
         // a single message
-        ((MAX_PROTOCOL_MESSAGE_LEN - 1) / MIN_INV_HASH_SIZE) as u64
+        let message_size_limit = ((MAX_PROTOCOL_MESSAGE_LEN - 1) / MIN_INV_HASH_SIZE) as u64;
+
+        min(message_size_limit, MAX_TX_INV_IN_MESSAGE)
     }
 }
diff --git a/zebra-network/src/protocol/external/message.rs b/zebra-network/src/protocol/external/message.rs
index 2ea54698a..6a6a1e295 100644
--- a/zebra-network/src/protocol/external/message.rs
+++ b/zebra-network/src/protocol/external/message.rs
@@ -293,6 +293,12 @@ pub enum Message {
     FilterClear,
 }
 
+/// The maximum size of the user agent string.
+///
+/// This is equivalent to `MAX_SUBVERSION_LENGTH` in `zcashd`:
+/// <https://github.com/zcash/zcash/blob/adfc7218435faa1c8985a727f997a795dcffa0c7/src/net.h#L56>
+pub const MAX_USER_AGENT_LENGTH: usize = 256;
+
 /// A `version` message.
 ///
 /// Note that although this is called `version` in Bitcoin, its role is really
@@ -358,13 +364,17 @@ pub struct VersionMessage {
 
 /// The maximum size of the rejection message.
 ///
-/// This is equivalent to `COMMAND_SIZE` in zcashd.
-const MAX_REJECT_MESSAGE_LENGTH: usize = 12;
+/// This is equivalent to `COMMAND_SIZE` in zcashd:
+/// <https://github.com/zcash/zcash/blob/adfc7218435faa1c8985a727f997a795dcffa0c7/src/protocol.h#L33>
+/// <https://github.com/zcash/zcash/blob/c0fbeb809bf2303e30acef0d2b74db11e9177427/src/main.cpp#L7544>
+pub const MAX_REJECT_MESSAGE_LENGTH: usize = 12;
 
 /// The maximum size of the rejection reason.
 ///
-/// This is equivalent to `MAX_REJECT_MESSAGE_LENGTH` in zcashd.
-const MAX_REJECT_REASON_LENGTH: usize = 111;
+/// This is equivalent to `MAX_REJECT_MESSAGE_LENGTH` in zcashd:
+/// <https://github.com/zcash/zcash/blob/adfc7218435faa1c8985a727f997a795dcffa0c7/src/main.h#L126>
+/// <https://github.com/zcash/zcash/blob/c0fbeb809bf2303e30acef0d2b74db11e9177427/src/main.cpp#L7544>
+pub const MAX_REJECT_REASON_LENGTH: usize = 111;
 
 impl From<VersionMessage> for Message {
     fn from(version_message: VersionMessage) -> Self {
diff --git a/zebra-network/src/protocol/external/tests/preallocate.rs b/zebra-network/src/protocol/external/tests/preallocate.rs
index 172072575..132fdb015 100644
--- a/zebra-network/src/protocol/external/tests/preallocate.rs
+++ b/zebra-network/src/protocol/external/tests/preallocate.rs
@@ -1,6 +1,6 @@
 //! Tests for trusted preallocation during deserialization.
 
-use std::env;
+use std::{cmp::min, env};
 
 use proptest::prelude::*;
 
@@ -16,7 +16,7 @@ use crate::{
     meta_addr::MetaAddr,
     protocol::external::{
         addr::{AddrV1, AddrV2, ADDR_V1_SIZE, ADDR_V2_MIN_SIZE},
-        inv::InventoryHash,
+        inv::{InventoryHash, MAX_TX_INV_IN_MESSAGE},
     },
 };
 
@@ -63,7 +63,10 @@ proptest! {
         // Check that our smallest_disallowed_vec is only one item larger than the limit
         prop_assert!(((smallest_disallowed_vec.len() - 1) as u64) == InventoryHash::max_allocation());
         // Check that our smallest_disallowed_vec is too big to fit in a Zcash message.
-        prop_assert!(smallest_disallowed_serialized.len() > MAX_PROTOCOL_MESSAGE_LEN);
+        //
+        // Special case: Zcash has a slightly smaller limit for transaction invs,
+        // so we use it for all invs.
+        prop_assert!(smallest_disallowed_serialized.len() > min(MAX_PROTOCOL_MESSAGE_LEN, usize::try_from(MAX_TX_INV_IN_MESSAGE).expect("fits in usize")));
 
         // Create largest_allowed_vec by removing one element from smallest_disallowed_vec without copying (for efficiency)
         smallest_disallowed_vec.pop();