MultiIteratorScanner - improve banking stage performance with high contention

2022-11-14 13:04:21 -06:00 · 2022-11-14 13:04:21 -06:00 · ee2f760d3d
parent e269fe3383
commit ee2f760d3d
5 changed files with 737 additions and 294 deletions
--- a/core/benches/banking_stage.rs
+++ b/core/benches/banking_stage.rs
@ -104,7 +104,6 @@ fn bench_consume_buffered(bencher: &mut Bencher) {
                &recorder,
                &QosService::new(Arc::new(RwLock::new(CostModel::default())), 1),
                &mut LeaderSlotMetricsTracker::new(0),
                10,
                None,
            );
        });
--- a/core/src/banking_stage.rs
+++ b/core/src/banking_stage.rs
@ -17,7 +17,7 @@ use {
        tracer_packet_stats::TracerPacketStats,
        unprocessed_packet_batches::*,
        unprocessed_transaction_storage::{
-            ThreadType, UnprocessedTransactionStorage, UNPROCESSED_BUFFER_STEP_SIZE,
+            ConsumeScannerPayload, ThreadType, UnprocessedTransactionStorage,
        },
    },
    core::iter::repeat,
@ -161,7 +161,7 @@ pub struct BankingStageStats {
    consume_buffered_packets_elapsed: AtomicU64,
    receive_and_buffer_packets_elapsed: AtomicU64,
    filter_pending_packets_elapsed: AtomicU64,
-    packet_conversion_elapsed: AtomicU64,
+    pub(crate) packet_conversion_elapsed: AtomicU64,
    transaction_processing_elapsed: AtomicU64,
 }
@ -616,8 +616,9 @@ impl BankingStage {
    #[allow(clippy::too_many_arguments)]
    fn do_process_packets(
        max_tx_ingestion_ns: u128,
-        poh_recorder: &Arc<RwLock<PohRecorder>>,
+        working_bank: &Arc<Bank>,
-        slot_metrics_tracker: &mut LeaderSlotMetricsTracker,
+        bank_creation_time: &Arc<Instant>,
        payload: &mut ConsumeScannerPayload,
        recorder: &TransactionRecorder,
        transaction_status_sender: &Option<TransactionStatusSender>,
        gossip_vote_sender: &ReplayVoteSender,
@ -626,90 +627,71 @@ impl BankingStage {
        log_messages_bytes_limit: Option<usize>,
        consumed_buffered_packets_count: &mut usize,
        rebuffered_packet_count: &mut usize,
        reached_end_of_slot: &mut bool,
        test_fn: &Option<impl Fn()>,
        packets_to_process: &Vec<Arc<ImmutableDeserializedPacket>>,
    ) -> Option<Vec<usize>> {
-        // TODO: Right now we iterate through buffer and try the highest weighted transaction once
+        if payload.reached_end_of_slot {
-        // but we should retry the highest weighted transactions more often.
+            return None;
-        let (bank_start, poh_recorder_lock_time) = measure!(
+        }
            poh_recorder.read().unwrap().bank_start(),
            "poh_recorder.read",
        );
        slot_metrics_tracker.increment_consume_buffered_packets_poh_recorder_lock_us(
            poh_recorder_lock_time.as_us(),
        );
        let packets_to_process_len = packets_to_process.len();
-        if let Some(BankStart {
+        let (process_transactions_summary, process_packets_transactions_time) = measure!(
-            working_bank,
+            Self::process_packets_transactions(
-            bank_creation_time,
+                working_bank,
-        }) = bank_start
+                bank_creation_time,
                recorder,
                &payload.sanitized_transactions,
                transaction_status_sender,
                gossip_vote_sender,
                banking_stage_stats,
                qos_service,
                payload.slot_metrics_tracker,
                log_messages_bytes_limit
            ),
            "process_packets_transactions",
        );
        payload
            .slot_metrics_tracker
            .increment_process_packets_transactions_us(process_packets_transactions_time.as_us());
        // Clear payload for next iteration
        payload.sanitized_transactions.clear();
        payload.write_accounts.clear();
        let ProcessTransactionsSummary {
            reached_max_poh_height,
            retryable_transaction_indexes,
            ..
        } = process_transactions_summary;
        if reached_max_poh_height
            || !Bank::should_bank_still_be_processing_txs(bank_creation_time, max_tx_ingestion_ns)
        {
-            let (process_transactions_summary, process_packets_transactions_time) = measure!(
+            payload.reached_end_of_slot = true;
                Self::process_packets_transactions(
                    &working_bank,
                    &bank_creation_time,
                    recorder,
                    packets_to_process.iter().map(|p| &**p),
                    transaction_status_sender,
                    gossip_vote_sender,
                    banking_stage_stats,
                    qos_service,
                    slot_metrics_tracker,
                    log_messages_bytes_limit
                ),
                "process_packets_transactions",
            );
            slot_metrics_tracker.increment_process_packets_transactions_us(
                process_packets_transactions_time.as_us(),
            );
            let ProcessTransactionsSummary {
                reached_max_poh_height,
                retryable_transaction_indexes,
                ..
            } = process_transactions_summary;
            if reached_max_poh_height
                || !Bank::should_bank_still_be_processing_txs(
                    &bank_creation_time,
                    max_tx_ingestion_ns,
                )
            {
                *reached_end_of_slot = true;
            }
            // The difference between all transactions passed to execution and the ones that
            // are retryable were the ones that were either:
            // 1) Committed into the block
            // 2) Dropped without being committed because they had some fatal error (too old,
            // duplicate signature, etc.)
            //
            // Note: This assumes that every packet deserializes into one transaction!
            *consumed_buffered_packets_count +=
                packets_to_process_len.saturating_sub(retryable_transaction_indexes.len());
            // Out of the buffered packets just retried, collect any still unprocessed
            // transactions in this batch for forwarding
            *rebuffered_packet_count += retryable_transaction_indexes.len();
            if let Some(test_fn) = test_fn {
                test_fn();
            }
            slot_metrics_tracker
                .increment_retryable_packets_count(retryable_transaction_indexes.len() as u64);
            Some(retryable_transaction_indexes)
        } else if *reached_end_of_slot {
            None
        } else {
            // mark as end-of-slot to avoid aggressively lock poh for the remaining for
            // packet batches in buffer
            *reached_end_of_slot = true;
            None
        }
        // The difference between all transactions passed to execution and the ones that
        // are retryable were the ones that were either:
        // 1) Committed into the block
        // 2) Dropped without being committed because they had some fatal error (too old,
        // duplicate signature, etc.)
        //
        // Note: This assumes that every packet deserializes into one transaction!
        *consumed_buffered_packets_count +=
            packets_to_process_len.saturating_sub(retryable_transaction_indexes.len());
        // Out of the buffered packets just retried, collect any still unprocessed
        // transactions in this batch for forwarding
        *rebuffered_packet_count += retryable_transaction_indexes.len();
        if let Some(test_fn) = test_fn {
            test_fn();
        }
        payload
            .slot_metrics_tracker
            .increment_retryable_packets_count(retryable_transaction_indexes.len() as u64);
        Some(retryable_transaction_indexes)
    }
    #[allow(clippy::too_many_arguments)]
@ -725,38 +707,57 @@ impl BankingStage {
        recorder: &TransactionRecorder,
        qos_service: &QosService,
        slot_metrics_tracker: &mut LeaderSlotMetricsTracker,
        num_packets_to_process_per_iteration: usize,
        log_messages_bytes_limit: Option<usize>,
    ) {
        let mut rebuffered_packet_count = 0;
        let mut consumed_buffered_packets_count = 0;
        let mut proc_start = Measure::start("consume_buffered_process");
-        let mut reached_end_of_slot = false;
+        let reached_end_of_slot;
        let num_packets_to_process = unprocessed_transaction_storage.len();
        let bank = poh_recorder.read().unwrap().bank();
-        unprocessed_transaction_storage.process_packets(
+        // TODO: Right now we iterate through buffer and try the highest weighted transaction once
-            bank,
+        // but we should retry the highest weighted transactions more often.
-            num_packets_to_process_per_iteration,
+        let (bank_start, poh_recorder_lock_time) = measure!(
-            |packets_to_process| {
+            poh_recorder.read().unwrap().bank_start(),
-                Self::do_process_packets(
+            "poh_recorder.read",
                    max_tx_ingestion_ns,
                    poh_recorder,
                    slot_metrics_tracker,
                    recorder,
                    transaction_status_sender,
                    gossip_vote_sender,
                    banking_stage_stats,
                    qos_service,
                    log_messages_bytes_limit,
                    &mut consumed_buffered_packets_count,
                    &mut rebuffered_packet_count,
                    &mut reached_end_of_slot,
                    &test_fn,
                    packets_to_process,
                )
            },
        );
        slot_metrics_tracker.increment_consume_buffered_packets_poh_recorder_lock_us(
            poh_recorder_lock_time.as_us(),
        );
        if let Some(BankStart {
            working_bank,
            bank_creation_time,
        }) = bank_start
        {
            let returned_payload = unprocessed_transaction_storage.process_packets(
                working_bank.clone(),
                banking_stage_stats,
                slot_metrics_tracker,
                |packets_to_process, payload| {
                    Self::do_process_packets(
                        max_tx_ingestion_ns,
                        &working_bank,
                        &bank_creation_time,
                        payload,
                        recorder,
                        transaction_status_sender,
                        gossip_vote_sender,
                        banking_stage_stats,
                        qos_service,
                        log_messages_bytes_limit,
                        &mut consumed_buffered_packets_count,
                        &mut rebuffered_packet_count,
                        &test_fn,
                        packets_to_process,
                    )
                },
            );
            reached_end_of_slot = returned_payload.reached_end_of_slot;
        } else {
            reached_end_of_slot = true;
        }
        if reached_end_of_slot {
            slot_metrics_tracker.set_end_of_slot_unprocessed_buffer_len(
@ -900,7 +901,6 @@ impl BankingStage {
                        recorder,
                        qos_service,
                        slot_metrics_tracker,
                        UNPROCESSED_BUFFER_STEP_SIZE,
                        log_messages_bytes_limit
                    ),
                    "consume_buffered_packets",
@ -1827,27 +1827,21 @@ impl BankingStage {
    /// This function returns a vector containing index of all valid transactions. A valid
    /// transaction has result Ok() as the value
-    fn filter_valid_transaction_indexes(
+    fn filter_valid_transaction_indexes(valid_txs: &[TransactionCheckResult]) -> Vec<usize> {
        valid_txs: &[TransactionCheckResult],
        transaction_indexes: &[usize],
    ) -> Vec<usize> {
        valid_txs
            .iter()
            .enumerate()
            .filter_map(|(index, (x, _h))| if x.is_ok() { Some(index) } else { None })
            .map(|x| transaction_indexes[x])
            .collect_vec()
    }
    /// This function filters pending packets that are still valid
    /// # Arguments
    /// * `transactions` - a batch of transactions deserialized from packets
    /// * `transaction_to_packet_indexes` - maps each transaction to a packet index
    /// * `pending_indexes` - identifies which indexes in the `transactions` list are still pending
    fn filter_pending_packets_from_pending_txs(
        bank: &Arc<Bank>,
        transactions: &[SanitizedTransaction],
        transaction_to_packet_indexes: &[usize],
        pending_indexes: &[usize],
    ) -> Vec<usize> {
        let filter =
@ -1859,7 +1853,7 @@ impl BankingStage {
            FORWARD_TRANSACTIONS_TO_LEADER_AT_SLOT_OFFSET,
        );
-        Self::filter_valid_transaction_indexes(&results, transaction_to_packet_indexes)
+        Self::filter_valid_transaction_indexes(&results)
    }
    #[allow(clippy::too_many_arguments)]
@ -1867,7 +1861,7 @@ impl BankingStage {
        bank: &'a Arc<Bank>,
        bank_creation_time: &Instant,
        poh: &'a TransactionRecorder,
-        deserialized_packets: impl Iterator<Item = &'a ImmutableDeserializedPacket>,
+        sanitized_transactions: &[SanitizedTransaction],
        transaction_status_sender: &Option<TransactionStatusSender>,
        gossip_vote_sender: &'a ReplayVoteSender,
        banking_stage_stats: &'a BankingStageStats,
@ -1875,39 +1869,12 @@ impl BankingStage {
        slot_metrics_tracker: &'a mut LeaderSlotMetricsTracker,
        log_messages_bytes_limit: Option<usize>,
    ) -> ProcessTransactionsSummary {
        // Convert packets to transactions
        let ((transactions, transaction_to_packet_indexes), packet_conversion_time): (
            (Vec<SanitizedTransaction>, Vec<usize>),
            _,
        ) = measure!(
            deserialized_packets
                .enumerate()
                .filter_map(|(i, deserialized_packet)| {
                    deserialized_packet
                        .build_sanitized_transaction(
                            &bank.feature_set,
                            bank.vote_only_bank(),
                            bank.as_ref(),
                        )
                        .map(|transaction| (transaction, i))
                })
                .unzip(),
            "packet_conversion",
        );
        let packet_conversion_us = packet_conversion_time.as_us();
        slot_metrics_tracker.increment_transactions_from_packets_us(packet_conversion_us);
        banking_stage_stats
            .packet_conversion_elapsed
            .fetch_add(packet_conversion_us, Ordering::Relaxed);
        inc_new_counter_info!("banking_stage-packet_conversion", 1);
        // Process transactions
        let (mut process_transactions_summary, process_transactions_time) = measure!(
            Self::process_transactions(
                bank,
                bank_creation_time,
-                &transactions,
+                sanitized_transactions,
                poh,
                transaction_status_sender,
                gossip_vote_sender,
@ -1938,8 +1905,7 @@ impl BankingStage {
        let (filtered_retryable_transaction_indexes, filter_retryable_packets_time) = measure!(
            Self::filter_pending_packets_from_pending_txs(
                bank,
-                &transactions,
+                sanitized_transactions,
                &transaction_to_packet_indexes,
                retryable_transaction_indexes,
            ),
            "filter_pending_packets_time",
@ -2164,7 +2130,6 @@ mod tests {
        },
        std::{
            borrow::Cow,
            collections::HashSet,
            path::Path,
            sync::atomic::{AtomicBool, Ordering},
            thread::sleep,
@ -2639,33 +2604,27 @@ mod tests {
    #[test]
    fn test_bank_filter_valid_transaction_indexes() {
        assert_eq!(
-            BankingStage::filter_valid_transaction_indexes(
+            BankingStage::filter_valid_transaction_indexes(&[
-                &[
+                (Err(TransactionError::BlockhashNotFound), None),
-                    (Err(TransactionError::BlockhashNotFound), None),
+                (Err(TransactionError::BlockhashNotFound), None),
-                    (Err(TransactionError::BlockhashNotFound), None),
+                (Ok(()), None),
-                    (Ok(()), None),
+                (Err(TransactionError::BlockhashNotFound), None),
-                    (Err(TransactionError::BlockhashNotFound), None),
+                (Ok(()), None),
-                    (Ok(()), None),
+                (Ok(()), None),
-                    (Ok(()), None),
+            ]),
-                ],
+            [2, 4, 5]
                &[2, 4, 5, 9, 11, 13]
            ),
            [5, 11, 13]
        );
        assert_eq!(
-            BankingStage::filter_valid_transaction_indexes(
+            BankingStage::filter_valid_transaction_indexes(&[
-                &[
+                (Ok(()), None),
-                    (Ok(()), None),
+                (Err(TransactionError::BlockhashNotFound), None),
-                    (Err(TransactionError::BlockhashNotFound), None),
+                (Err(TransactionError::BlockhashNotFound), None),
-                    (Err(TransactionError::BlockhashNotFound), None),
+                (Ok(()), None),
-                    (Ok(()), None),
+                (Ok(()), None),
-                    (Ok(()), None),
+                (Ok(()), None),
-                    (Ok(()), None),
+            ]),
-                ],
+            [0, 3, 4, 5]
                &[1, 6, 7, 9, 31, 43]
            ),
            [1, 9, 31, 43]
        );
    }
@ -3844,36 +3803,27 @@ mod tests {
                &recorder,
                &QosService::new(Arc::new(RwLock::new(CostModel::default())), 1),
                &mut LeaderSlotMetricsTracker::new(0),
                num_conflicting_transactions,
                None,
            );
            assert_eq!(buffered_packet_batches.len(), num_conflicting_transactions);
-            // When the poh recorder has a bank, should process all non conflicting buffered packets.
+            // When the working bank in poh_recorder is Some, all packets should be processed.
-            // Processes one packet per iteration of the loop
+            // Multi-Iterator will process them 1-by-1 if all txs are conflicting.
-            let num_packets_to_process_per_iteration = num_conflicting_transactions;
+            poh_recorder.write().unwrap().set_bank(&bank, false);
-            for num_expected_unprocessed in (0..num_conflicting_transactions).rev() {
+            BankingStage::consume_buffered_packets(
-                poh_recorder.write().unwrap().set_bank(&bank, false);
+                &Pubkey::default(),
-                BankingStage::consume_buffered_packets(
+                max_tx_processing_ns,
-                    &Pubkey::default(),
+                &poh_recorder,
-                    max_tx_processing_ns,
+                &mut buffered_packet_batches,
-                    &poh_recorder,
+                &None,
-                    &mut buffered_packet_batches,
+                &gossip_vote_sender,
-                    &None,
+                None::<Box<dyn Fn()>>,
-                    &gossip_vote_sender,
+                &BankingStageStats::default(),
-                    None::<Box<dyn Fn()>>,
+                &recorder,
-                    &BankingStageStats::default(),
+                &QosService::new(Arc::new(RwLock::new(CostModel::default())), 1),
-                    &recorder,
+                &mut LeaderSlotMetricsTracker::new(0),
-                    &QosService::new(Arc::new(RwLock::new(CostModel::default())), 1),
+                None,
-                    &mut LeaderSlotMetricsTracker::new(0),
+            );
-                    num_packets_to_process_per_iteration,
+            assert!(buffered_packet_batches.is_empty());
                    None,
                );
                if num_expected_unprocessed == 0 {
                    assert!(buffered_packet_batches.is_empty())
                } else {
                    assert_eq!(buffered_packet_batches.len(), num_expected_unprocessed);
                }
            }
            poh_recorder
                .read()
                .unwrap()
@ -3897,11 +3847,8 @@ mod tests {
                finished_packet_sender.send(()).unwrap();
                continue_receiver.recv().unwrap();
            });
-            // When the poh recorder has a bank, it should process all non conflicting buffered packets.
+            // When the poh recorder has a bank, it should process all buffered packets.
-            // Because each conflicting transaction is in it's own `Packet` within a `PacketBatch`, then
+            let num_conflicting_transactions = transactions.len();
            // each iteration of this loop will process one element of the batch per iteration of the
            // loop.
            let interrupted_iteration = 1;
            poh_recorder.write().unwrap().set_bank(&bank, false);
            let poh_recorder_ = poh_recorder.clone();
            let recorder = poh_recorder_.read().unwrap().recorder();
@ -3917,19 +3864,14 @@ mod tests {
                        )
                        .unwrap();
                    assert_eq!(deserialized_packets.len(), num_conflicting_transactions);
                    let num_packets_to_process_per_iteration = 1;
                    let mut buffered_packet_batches =
                        UnprocessedTransactionStorage::new_transaction_storage(
                            UnprocessedPacketBatches::from_iter(
-                                deserialized_packets.clone().into_iter(),
+                                deserialized_packets.into_iter(),
                                num_conflicting_transactions,
                            ),
                            ThreadType::Transactions,
                        );
                    let all_packet_message_hashes: HashSet<Hash> = buffered_packet_batches
                        .iter()
                        .map(|packet| *packet.immutable_section().message_hash())
                        .collect();
                    BankingStage::consume_buffered_packets(
                        &Pubkey::default(),
                        std::u128::MAX,
@ -3942,40 +3884,28 @@ mod tests {
                        &recorder,
                        &QosService::new(Arc::new(RwLock::new(CostModel::default())), 1),
                        &mut LeaderSlotMetricsTracker::new(0),
                        num_packets_to_process_per_iteration,
                        None,
                    );
-                    // Check everything is correct. All indexes after `interrupted_iteration`
+                    // Check everything is correct. All valid packets should be processed.
-                    // should still be unprocessed
+                    assert!(buffered_packet_batches.is_empty());
                    assert_eq!(
                        buffered_packet_batches.len(),
                        deserialized_packets[interrupted_iteration + 1..].len()
                    );
                    for packet in buffered_packet_batches.iter() {
                        assert!(all_packet_message_hashes
                            .contains(packet.immutable_section().message_hash()));
                    }
                })
                .unwrap();
-            for i in 0..=interrupted_iteration {
+            // Should be calling `test_fn` for each non-conflicting batch.
            // In this case each batch is of size 1.
            for i in 0..num_conflicting_transactions {
                finished_packet_receiver.recv().unwrap();
-                if i == interrupted_iteration {
+                if i + 1 == num_conflicting_transactions {
                    poh_recorder
-                        .write()
+                        .read()
                        .unwrap()
-                        .schedule_dummy_max_height_reached_failure();
+                        .is_exited
                        .store(true, Ordering::Relaxed);
                }
                continue_sender.send(()).unwrap();
            }
            t_consume.join().unwrap();
            poh_recorder
                .read()
                .unwrap()
                .is_exited
                .store(true, Ordering::Relaxed);
            let _ = poh_simulator.join();
        }
        Blockstore::destroy(ledger_path.path()).unwrap();
--- a/core/src/lib.rs
+++ b/core/src/lib.rs
@ -37,6 +37,7 @@ pub mod leader_slot_banking_stage_metrics;
 pub mod leader_slot_banking_stage_timing_metrics;
 pub mod ledger_cleanup_service;
 pub mod ledger_metric_report_service;
 pub mod multi_iterator_scanner;
 pub mod optimistic_confirmation_verifier;
 pub mod outstanding_requests;
 pub mod packet_deserializer;
--- a/core/src/multi_iterator_scanner.rs
+++ b/core/src/multi_iterator_scanner.rs
@ -0,0 +1,346 @@
 //! Provides an iterator interface that create non-conflicting batches of elements to process.
 //!
 //! The problem that this structure is targetting is as following:
 //!     We have a slice of transactions we want to process in batches where transactions
 //!     in the same batch do not conflict with each other. This allows us process them in
 //!     parallel. The original slice is ordered by priority, and it is often the case
 //!     that transactions with high-priority are conflicting with each other. This means
 //!     we cannot simply grab chunks of transactions.
 //! The solution is to create a MultiIteratorScanner that will use multiple iterators, up
 //! to the desired batch size, and create batches of transactions that do not conflict with
 //! each other. The MultiIteratorScanner stores state for the current positions of each iterator,
 //! as well as which transactions have already been handled. If a transaction is invalid it can
 //! also be skipped without being considered for future batches.
 //!
 /// Output from the element checker used in `MultiIteratorScanner::iterate`.
 pub enum ProcessingDecision {
    /// Should be processed by the scanner.
    Now,
    /// Should be skipped by the scanner on this pass - process later.
    Later,
    /// Should be skipped and marked as handled so we don't try processing it again.
    Never,
 }
 /// Iterates over a slice creating valid non-self-conflicting batches of elements to process,
 /// elements between batches are not guaranteed to be non-conflicting.
 /// Conflicting elements are guaranteed to be processed in the order they appear in the slice,
 /// as long as the `should_process` function is appropriately marking resources as used.
 /// It is also guaranteed that elements within the batch are in the order they appear in
 /// the slice. The batch size is not guaranteed to be `max_iterators` - it can be smaller.
 ///
 /// # Example:
 ///
 /// Assume transactions with same letter conflict with each other. A typical priority ordered
 /// buffer might look like:
 ///
 ///     // [A, A, B, A, C, D, B, C, D]
 ///
 /// If we want to have batches of size 4, the MultiIteratorScanner will proceed as follows:
 ///
 ///     // [A, A, B, A, C, D, B, C, D]
 ///     //  ^     ^     ^  ^
 ///
 ///     // [A, A, B, A, C, D, B, C, D]
 ///     //     ^              ^  ^  ^
 ///
 ///     // [A, A, B, A, C, D, B, C, D]
 ///     //           ^
 /// The iterator will iterate with batches:
 ///
 ///     // [[A, B, C, D], [A, B, C, D], [A]]
 ///
 pub struct MultiIteratorScanner<'a, T, U, F>
 where
    F: FnMut(&T, &mut U) -> ProcessingDecision,
 {
    /// Maximum number of iterators to use.
    max_iterators: usize,
    /// Slice that we're iterating over
    slice: &'a [T],
    /// Payload - used to store shared mutable state between scanner and the processing function.
    payload: U,
    /// Function that checks if an element should be processed. This function is also responsible
    /// for marking resources, such as locks, as used.
    should_process: F,
    /// Store whether an element has already been handled
    already_handled: Vec<bool>,
    /// Current indices inside `slice` for multiple iterators
    current_positions: Vec<usize>,
    /// Container to store items for iteration - Should only be used in `get_current_items()`
    current_items: Vec<&'a T>,
    /// Initialized
    initialized: bool,
 }
 impl<'a, T, U, F> MultiIteratorScanner<'a, T, U, F>
 where
    F: FnMut(&T, &mut U) -> ProcessingDecision,
 {
    pub fn new(slice: &'a [T], max_iterators: usize, payload: U, should_process: F) -> Self {
        assert!(max_iterators > 0);
        Self {
            max_iterators,
            slice,
            payload,
            should_process,
            already_handled: vec![false; slice.len()],
            current_positions: Vec::with_capacity(max_iterators),
            current_items: Vec::with_capacity(max_iterators),
            initialized: false,
        }
    }
    /// Returns a slice of the item references at the current positions of the iterators
    /// and a mutable reference to the payload.
    ///
    /// Returns None if the scanner is done iterating.
    pub fn iterate(&mut self) -> Option<(&[&'a T], &mut U)> {
        if !self.initialized {
            self.initialized = true;
            self.initialize_current_positions();
        } else {
            self.advance_current_positions();
        }
        self.get_current_items()
    }
    /// Consume the iterator and return the payload.
    pub fn finalize(self) -> U {
        self.payload
    }
    /// Initialize the `current_positions` vector for the first batch.
    fn initialize_current_positions(&mut self) {
        let mut last_index = 0;
        for _iterator_index in 0..self.max_iterators {
            match self.march_iterator(last_index) {
                Some(index) => {
                    self.current_positions.push(index);
                    last_index = index.saturating_add(1);
                }
                None => break,
            }
        }
    }
    /// March iterators forward to find the next batch of items.
    fn advance_current_positions(&mut self) {
        if let Some(mut prev_index) = self.current_positions.first().copied() {
            for iterator_index in 0..self.current_positions.len() {
                // If the previous iterator has passed this iterator, we should start
                // at it's position + 1 to avoid duplicate re-traversal.
                let start_index = (self.current_positions[iterator_index].saturating_add(1))
                    .max(prev_index.saturating_add(1));
                match self.march_iterator(start_index) {
                    Some(index) => {
                        self.current_positions[iterator_index] = index;
                        prev_index = index;
                    }
                    None => {
                        // Drop current positions that go past the end of the slice
                        self.current_positions.truncate(iterator_index);
                        break;
                    }
                }
            }
        }
    }
    /// Get the current items from the slice using `self.current_positions`.
    /// Returns `None` if there are no more items.
    fn get_current_items(&mut self) -> Option<(&[&'a T], &mut U)> {
        self.current_items.clear();
        for index in &self.current_positions {
            self.current_items.push(&self.slice[*index]);
        }
        (!self.current_items.is_empty()).then_some((&self.current_items, &mut self.payload))
    }
    /// Moves the iterator to its' next position. If we've reached the end of the slice, we return None
    fn march_iterator(&mut self, starting_index: usize) -> Option<usize> {
        let mut found = None;
        for index in starting_index..self.slice.len() {
            if !self.already_handled[index] {
                match (self.should_process)(&self.slice[index], &mut self.payload) {
                    ProcessingDecision::Now => {
                        self.already_handled[index] = true;
                        found = Some(index);
                        break;
                    }
                    ProcessingDecision::Later => {
                        // Do nothing - iterator will try this element in a future batch
                    }
                    ProcessingDecision::Never => {
                        self.already_handled[index] = true;
                    }
                }
            }
        }
        found
    }
 }
 #[cfg(test)]
 mod tests {
    use {super::MultiIteratorScanner, crate::multi_iterator_scanner::ProcessingDecision};
    struct TestScannerPayload {
        locks: Vec<bool>,
    }
    fn test_scanner_locking_should_process(
        item: &i32,
        payload: &mut TestScannerPayload,
    ) -> ProcessingDecision {
        if payload.locks[*item as usize] {
            ProcessingDecision::Later
        } else {
            payload.locks[*item as usize] = true;
            ProcessingDecision::Now
        }
    }
    #[test]
    fn test_multi_iterator_scanner_empty() {
        let slice: Vec<i32> = vec![];
        let mut scanner = MultiIteratorScanner::new(&slice, 2, (), |_, _| ProcessingDecision::Now);
        assert!(scanner.iterate().is_none());
    }
    #[test]
    fn test_multi_iterator_scanner_iterate() {
        let slice = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
        let should_process = |_item: &i32, _payload: &mut ()| ProcessingDecision::Now;
        let mut scanner = MultiIteratorScanner::new(&slice, 2, (), should_process);
        let mut actual_batches = vec![];
        while let Some((batch, _payload)) = scanner.iterate() {
            actual_batches.push(batch.to_vec());
        }
        // Batch 1: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
        //           ^  ^
        // Batch 2: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
        //                 ^  ^
        // Batch 3: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
        //                       ^  ^
        // Batch 4: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
        //                             ^  ^
        // Batch 5: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
        //                                   ^   ^
        // Batch 6: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
        //                                           ^
        let expected_batches = vec![
            vec![&1, &2],
            vec![&3, &4],
            vec![&5, &6],
            vec![&7, &8],
            vec![&9, &10],
            vec![&11],
        ];
        assert_eq!(actual_batches, expected_batches);
    }
    #[test]
    fn test_multi_iterator_scanner_iterate_with_gaps() {
        let slice = [0, 0, 0, 1, 2, 3, 1];
        let payload = TestScannerPayload {
            locks: vec![false; 4],
        };
        let mut scanner =
            MultiIteratorScanner::new(&slice, 2, payload, test_scanner_locking_should_process);
        let mut actual_batches = vec![];
        while let Some((batch, payload)) = scanner.iterate() {
            // free the resources
            for item in batch {
                payload.locks[**item as usize] = false;
            }
            actual_batches.push(batch.to_vec());
        }
        // Batch 1: [0, 0, 0, 1, 2, 3, 4]
        //           ^        ^
        // Batch 2: [0, 0, 0, 1, 2, 3, 4]
        //              ^        ^
        // Batch 3: [0, 0, 0, 1, 2, 3, 4]
        //                 ^        ^
        // Batch 4: [0, 0, 0, 1, 2, 3, 4]
        //                  -----------^ (--- indicates where the 0th iterator marched from)
        let expected_batches = vec![vec![&0, &1], vec![&0, &2], vec![&0, &3], vec![&1]];
        assert_eq!(actual_batches, expected_batches);
        let TestScannerPayload { locks } = scanner.finalize();
        assert_eq!(locks, vec![false; 4]);
    }
    #[test]
    fn test_multi_iterator_scanner_iterate_conflicts_not_at_front() {
        let slice = [1, 2, 3, 0, 0, 0, 3, 2, 1];
        let payload = TestScannerPayload {
            locks: vec![false; 4],
        };
        let mut scanner =
            MultiIteratorScanner::new(&slice, 2, payload, test_scanner_locking_should_process);
        let mut actual_batches = vec![];
        while let Some((batch, payload)) = scanner.iterate() {
            // free the resources
            for item in batch {
                payload.locks[**item as usize] = false;
            }
            actual_batches.push(batch.to_vec());
        }
        // Batch 1: [1, 2, 3, 0, 0, 0, 3, 2, 1]
        //           ^  ^
        // Batch 2: [1, 2, 3, 0, 0, 0, 3, 2, 1]
        //                 ^  ^
        // Batch 3: [1, 2, 3, 0, 0, 0, 3, 2, 1]
        //                       ^     ^
        // Batch 4: [1, 2, 3, 0, 0, 0, 3, 2, 1]
        //                          ^     ^
        // Batch 5: [1, 2, 3, 0, 0, 0, 3, 2, 1]
        //                                   ^
        let expected_batches = vec![
            vec![&1, &2],
            vec![&3, &0],
            vec![&0, &3],
            vec![&0, &2],
            vec![&1],
        ];
        assert_eq!(actual_batches, expected_batches);
        let TestScannerPayload { locks } = scanner.finalize();
        assert_eq!(locks, vec![false; 4]);
    }
    #[test]
    fn test_multi_iterator_scanner_iterate_with_never_process() {
        let slice = [0, 4, 1, 2];
        let should_process = |item: &i32, _payload: &mut ()| match item {
            4 => ProcessingDecision::Never,
            _ => ProcessingDecision::Now,
        };
        let mut scanner = MultiIteratorScanner::new(&slice, 2, (), should_process);
        let mut actual_batches = vec![];
        while let Some((batch, _payload)) = scanner.iterate() {
            actual_batches.push(batch.to_vec());
        }
        // Batch 1: [0, 4, 1, 2]
        //           ^     ^
        // Batch 2: [0, 4, 1, 2]
        //                    ^
        let expected_batches = vec![vec![&0, &1], vec![&2]];
        assert_eq!(actual_batches, expected_batches);
    }
 }
--- a/core/src/unprocessed_transaction_storage.rs
+++ b/core/src/unprocessed_transaction_storage.rs
@ -1,12 +1,14 @@
 use {
    crate::{
-        banking_stage::{FilterForwardingResults, ForwardOption},
+        banking_stage::{BankingStageStats, FilterForwardingResults, ForwardOption},
        forward_packet_batches_by_accounts::ForwardPacketBatchesByAccounts,
        immutable_deserialized_packet::ImmutableDeserializedPacket,
        latest_unprocessed_votes::{
            LatestUnprocessedVotes, LatestValidatorVotePacket, VoteBatchInsertionMetrics,
            VoteSource,
        },
        leader_slot_banking_stage_metrics::LeaderSlotMetricsTracker,
        multi_iterator_scanner::{MultiIteratorScanner, ProcessingDecision},
        unprocessed_packet_batches::{
            DeserializedPacket, PacketBatchInsertionMetrics, UnprocessedPacketBatches,
        },
@ -16,13 +18,19 @@ use {
    solana_measure::measure,
    solana_runtime::bank::Bank,
    solana_sdk::{
-        clock::FORWARD_TRANSACTIONS_TO_LEADER_AT_SLOT_OFFSET, saturating_add_assign,
+        clock::FORWARD_TRANSACTIONS_TO_LEADER_AT_SLOT_OFFSET, pubkey::Pubkey,
-        transaction::SanitizedTransaction,
+        saturating_add_assign, transaction::SanitizedTransaction,
    },
    std::{
        collections::HashSet,
        sync::{atomic::Ordering, Arc},
    },
    std::sync::Arc,
 };
-pub const UNPROCESSED_BUFFER_STEP_SIZE: usize = 128;
+// Step-size set to be 64, equal to the maximum batch/entry size. With the
 // multi-iterator change, there's no point in getting larger batches of
 // non-conflicting transactions.
 pub const UNPROCESSED_BUFFER_STEP_SIZE: usize = 64;
 /// Maximum numer of votes a single receive call will accept
 const MAX_NUM_VOTES_RECEIVE: usize = 10_000;
@ -119,6 +127,112 @@ fn filter_processed_packets<'a, F>(
    }
 }
 /// Convenient wrapper for shared-state between banking stage processing and the
 /// multi-iterator checking function.
 pub struct ConsumeScannerPayload<'a> {
    pub reached_end_of_slot: bool,
    pub write_accounts: HashSet<Pubkey>,
    pub sanitized_transactions: Vec<SanitizedTransaction>,
    pub slot_metrics_tracker: &'a mut LeaderSlotMetricsTracker,
 }
 fn consume_scan_should_process_packet(
    bank: &Bank,
    banking_stage_stats: &BankingStageStats,
    packet: &ImmutableDeserializedPacket,
    payload: &mut ConsumeScannerPayload,
 ) -> ProcessingDecision {
    // If end of the slot, return should process (quick loop after reached end of slot)
    if payload.reached_end_of_slot {
        return ProcessingDecision::Now;
    }
    // Before sanitization, let's quickly check the static keys (performance optimization)
    let message = &packet.transaction().get_message().message;
    let static_keys = message.static_account_keys();
    for key in static_keys.iter().enumerate().filter_map(|(idx, key)| {
        if message.is_maybe_writable(idx) {
            Some(key)
        } else {
            None
        }
    }) {
        if payload.write_accounts.contains(key) {
            return ProcessingDecision::Later;
        }
    }
    // Try to deserialize the packet
    let (maybe_sanitized_transaction, sanitization_time) = measure!(
        packet.build_sanitized_transaction(&bank.feature_set, bank.vote_only_bank(), bank)
    );
    let sanitization_time_us = sanitization_time.as_us();
    payload
        .slot_metrics_tracker
        .increment_transactions_from_packets_us(sanitization_time_us);
    banking_stage_stats
        .packet_conversion_elapsed
        .fetch_add(sanitization_time_us, Ordering::Relaxed);
    if let Some(sanitized_transaction) = maybe_sanitized_transaction {
        let message = sanitized_transaction.message();
        let conflicts_with_batch = message.account_keys().iter().enumerate().any(|(idx, key)| {
            if message.is_writable(idx) {
                payload.write_accounts.contains(key)
            } else {
                false
            }
        });
        if conflicts_with_batch {
            ProcessingDecision::Later
        } else {
            message
                .account_keys()
                .iter()
                .enumerate()
                .for_each(|(idx, key)| {
                    if message.is_writable(idx) {
                        payload.write_accounts.insert(*key);
                    }
                });
            payload.sanitized_transactions.push(sanitized_transaction);
            ProcessingDecision::Now
        }
    } else {
        ProcessingDecision::Never
    }
 }
 fn create_consume_multi_iterator<'a, 'b, F>(
    packets: &'a [Arc<ImmutableDeserializedPacket>],
    slot_metrics_tracker: &'b mut LeaderSlotMetricsTracker,
    should_process_packet: F,
 ) -> MultiIteratorScanner<'a, Arc<ImmutableDeserializedPacket>, ConsumeScannerPayload<'b>, F>
 where
    F: FnMut(
        &Arc<ImmutableDeserializedPacket>,
        &mut ConsumeScannerPayload<'b>,
    ) -> ProcessingDecision,
    'b: 'a,
 {
    let payload = ConsumeScannerPayload {
        reached_end_of_slot: false,
        write_accounts: HashSet::new(),
        sanitized_transactions: Vec::with_capacity(UNPROCESSED_BUFFER_STEP_SIZE),
        slot_metrics_tracker,
    };
    MultiIteratorScanner::new(
        packets,
        UNPROCESSED_BUFFER_STEP_SIZE,
        payload,
        should_process_packet,
    )
 }
 impl UnprocessedTransactionStorage {
    pub fn new_transaction_storage(
        unprocessed_packet_batches: UnprocessedPacketBatches,
@ -233,22 +347,34 @@ impl UnprocessedTransactionStorage {
    /// The processing function takes a stream of packets ready to process, and returns the indices
    /// of the unprocessed packets that are eligible for retry. A return value of None means that
    /// all packets are unprocessed and eligible for retry.
-    pub fn process_packets<F>(
+    #[must_use]
    pub fn process_packets<'a, F>(
        &mut self,
-        bank: Option<Arc<Bank>>,
+        bank: Arc<Bank>,
-        batch_size: usize,
+        banking_stage_stats: &BankingStageStats,
        slot_metrics_tracker: &'a mut LeaderSlotMetricsTracker,
        processing_function: F,
-    ) where
+    ) -> ConsumeScannerPayload<'a>
-        F: FnMut(&Vec<Arc<ImmutableDeserializedPacket>>) -> Option<Vec<usize>>,
+    where
        F: FnMut(
            &Vec<Arc<ImmutableDeserializedPacket>>,
            &mut ConsumeScannerPayload,
        ) -> Option<Vec<usize>>,
    {
-        match (self, bank) {
+        match self {
-            (Self::LocalTransactionStorage(transaction_storage), _) => {
+            Self::LocalTransactionStorage(transaction_storage) => transaction_storage
-                transaction_storage.process_packets(batch_size, processing_function)
+                .process_packets(
-            }
+                    &bank,
-            (Self::VoteStorage(vote_storage), Some(bank)) => {
+                    banking_stage_stats,
-                vote_storage.process_packets(bank, batch_size, processing_function)
+                    slot_metrics_tracker,
-            }
+                    processing_function,
-            _ => {}
+                ),
            Self::VoteStorage(vote_storage) => vote_storage.process_packets(
                bank,
                banking_stage_stats,
                slot_metrics_tracker,
                processing_function,
            ),
        }
    }
 }
@ -312,39 +438,62 @@ impl VoteStorage {
        FilterForwardingResults::default()
    }
-    fn process_packets<F>(&mut self, bank: Arc<Bank>, batch_size: usize, mut processing_function: F)
+    fn process_packets<'a, F>(
        &mut self,
        bank: Arc<Bank>,
        banking_stage_stats: &BankingStageStats,
        slot_metrics_tracker: &'a mut LeaderSlotMetricsTracker,
        mut processing_function: F,
    ) -> ConsumeScannerPayload<'a>
    where
-        F: FnMut(&Vec<Arc<ImmutableDeserializedPacket>>) -> Option<Vec<usize>>,
+        F: FnMut(
            &Vec<Arc<ImmutableDeserializedPacket>>,
            &mut ConsumeScannerPayload,
        ) -> Option<Vec<usize>>,
    {
        if matches!(self.vote_source, VoteSource::Gossip) {
            panic!("Gossip vote thread should not be processing transactions");
        }
-        // Insert the retryable votes back in
+        let should_process_packet =
-        self.latest_unprocessed_votes.insert_batch(
+            |packet: &Arc<ImmutableDeserializedPacket>, payload: &mut ConsumeScannerPayload| {
-            // Based on the stake distribution present in the supplied bank, drain the unprocessed votes
+                consume_scan_should_process_packet(&bank, banking_stage_stats, packet, payload)
-            // from each validator using a weighted random ordering. Votes from validators with
+            };
-            // 0 stake are ignored.
+
-            self.latest_unprocessed_votes
+        // Based on the stake distribution present in the supplied bank, drain the unprocessed votes
-                .drain_unprocessed(bank)
+        // from each validator using a weighted random ordering. Votes from validators with
-                .into_iter()
+        // 0 stake are ignored.
-                .chunks(batch_size)
+        let all_vote_packets = self
-                .into_iter()
+            .latest_unprocessed_votes
-                .flat_map(|vote_packets| {
+            .drain_unprocessed(bank.clone());
-                    let vote_packets = vote_packets.into_iter().collect_vec();
+        let mut scanner = create_consume_multi_iterator(
-                    if let Some(retryable_vote_indices) = processing_function(&vote_packets) {
+            &all_vote_packets,
-                        retryable_vote_indices
+            slot_metrics_tracker,
-                            .iter()
+            should_process_packet,
                            .map(|i| vote_packets[*i].clone())
                            .collect_vec()
                    } else {
                        vote_packets
                    }
                })
                .filter_map(|packet| {
                    LatestValidatorVotePacket::new_from_immutable(packet, self.vote_source).ok()
                }),
        );
        while let Some((packets, payload)) = scanner.iterate() {
            let vote_packets = packets.iter().map(|p| (*p).clone()).collect_vec();
            if let Some(retryable_vote_indices) = processing_function(&vote_packets, payload) {
                self.latest_unprocessed_votes.insert_batch(
                    retryable_vote_indices.iter().filter_map(|i| {
                        LatestValidatorVotePacket::new_from_immutable(
                            vote_packets[*i].clone(),
                            self.vote_source,
                        )
                        .ok()
                    }),
                );
            } else {
                self.latest_unprocessed_votes
                    .insert_batch(vote_packets.into_iter().filter_map(|packet| {
                        LatestValidatorVotePacket::new_from_immutable(packet, self.vote_source).ok()
                    }));
            }
        }
        scanner.finalize()
    }
 }
@ -711,35 +860,53 @@ impl ThreadLocalUnprocessedPackets {
        )
    }
-    fn process_packets<F>(&mut self, batch_size: usize, mut processing_function: F)
+    fn process_packets<'a, F>(
        &mut self,
        bank: &Bank,
        banking_stage_stats: &BankingStageStats,
        slot_metrics_tracker: &'a mut LeaderSlotMetricsTracker,
        mut processing_function: F,
    ) -> ConsumeScannerPayload<'a>
    where
-        F: FnMut(&Vec<Arc<ImmutableDeserializedPacket>>) -> Option<Vec<usize>>,
+        F: FnMut(
            &Vec<Arc<ImmutableDeserializedPacket>>,
            &mut ConsumeScannerPayload,
        ) -> Option<Vec<usize>>,
    {
        let mut retryable_packets = self.take_priority_queue();
        let original_capacity = retryable_packets.capacity();
        let mut new_retryable_packets = MinMaxHeap::with_capacity(original_capacity);
-        new_retryable_packets.extend(
+        let all_packets_to_process = retryable_packets.drain_desc().collect_vec();
-            retryable_packets
+
-                .drain_desc()
+        let should_process_packet =
-                .chunks(batch_size)
+            |packet: &Arc<ImmutableDeserializedPacket>, payload: &mut ConsumeScannerPayload| {
-                .into_iter()
+                consume_scan_should_process_packet(bank, banking_stage_stats, packet, payload)
-                .flat_map(|packets_to_process| {
+            };
-                    let packets_to_process = packets_to_process.into_iter().collect_vec();
+        let mut scanner = create_consume_multi_iterator(
-                    if let Some(retryable_transaction_indexes) =
+            &all_packets_to_process,
-                        processing_function(&packets_to_process)
+            slot_metrics_tracker,
-                    {
+            should_process_packet,
                        self.collect_retained_packets(
                            &packets_to_process,
                            &retryable_transaction_indexes,
                        )
                    } else {
                        packets_to_process
                    }
                }),
        );
        while let Some((packets_to_process, payload)) = scanner.iterate() {
            let packets_to_process = packets_to_process
                .iter()
                .map(|p| (*p).clone())
                .collect_vec();
            let retryable_packets = if let Some(retryable_transaction_indexes) =
                processing_function(&packets_to_process, payload)
            {
                self.collect_retained_packets(&packets_to_process, &retryable_transaction_indexes)
            } else {
                packets_to_process
            };
            new_retryable_packets.extend(retryable_packets);
        }
        self.unprocessed_packet_batches.packet_priority_queue = new_retryable_packets;
        self.verify_priority_queue(original_capacity);
        scanner.finalize()
    }
    /// Prepare a chunk of packets for forwarding, filter out already forwarded packets while