solana/core/src/banking_stage/multi_iterator_scanner.rs

//! Provides an iterator interface that create non-conflicting batches of elements to process.
//!
//! The problem that this structure is targetting is as following:
//!     We have a slice of transactions we want to process in batches where transactions
//!     in the same batch do not conflict with each other. This allows us process them in
//!     parallel. The original slice is ordered by priority, and it is often the case
//!     that transactions with high-priority are conflicting with each other. This means
//!     we cannot simply grab chunks of transactions.
//! The solution is to create a MultiIteratorScanner that will use multiple iterators, up
//! to the desired batch size, and create batches of transactions that do not conflict with
//! each other. The MultiIteratorScanner stores state for the current positions of each iterator,
//! as well as which transactions have already been handled. If a transaction is invalid it can
//! also be skipped without being considered for future batches.
//!

/// Output from the element checker used in `MultiIteratorScanner::iterate`.
#[derive(Debug)]
pub enum ProcessingDecision {
    /// Should be processed by the scanner.
    Now,
    /// Should be skipped by the scanner on this pass - process later.
    Later,
    /// Should be skipped and marked as handled so we don't try processing it again.
    Never,
}

/// Iterates over a slice creating valid non-self-conflicting batches of elements to process,
/// elements between batches are not guaranteed to be non-conflicting.
/// Conflicting elements are guaranteed to be processed in the order they appear in the slice,
/// as long as the `should_process` function is appropriately marking resources as used.
/// It is also guaranteed that elements within the batch are in the order they appear in
/// the slice. The batch size is not guaranteed to be `max_iterators` - it can be smaller.
///
/// # Example:
///
/// Assume transactions with same letter conflict with each other. A typical priority ordered
/// buffer might look like:
///
/// ```text
/// [A, A, B, A, C, D, B, C, D]
/// ```
///
/// If we want to have batches of size 4, the MultiIteratorScanner will proceed as follows:
///
/// ```text
/// [A, A, B, A, C, D, B, C, D]
///  ^     ^     ^  ^
///
/// [A, A, B, A, C, D, B, C, D]
///     ^              ^  ^  ^
///
/// [A, A, B, A, C, D, B, C, D]
///           ^
/// ```
///
/// The iterator will iterate with batches:
///
/// ```text
/// [[A, B, C, D], [A, B, C, D], [A]]
/// ```
///
pub struct MultiIteratorScanner<'a, T, U, F>
where
    F: FnMut(&T, &mut U) -> ProcessingDecision,
{
    /// Maximum number of iterators to use.
    max_iterators: usize,
    /// Slice that we're iterating over
    slice: &'a [T],
    /// Payload - used to store shared mutable state between scanner and the processing function.
    payload: U,
    /// Function that checks if an element should be processed. This function is also responsible
    /// for marking resources, such as locks, as used.
    should_process: F,
    /// Store whether an element has already been handled
    already_handled: Vec<bool>,
    /// Current indices inside `slice` for multiple iterators
    current_positions: Vec<usize>,
    /// Container to store items for iteration - Should only be used in `get_current_items()`
    current_items: Vec<&'a T>,
    /// Initialized
    initialized: bool,
}

pub struct PayloadAndAlreadyHandled<U> {
    pub payload: U,
    pub already_handled: Vec<bool>,
}

impl<'a, T, U, F> MultiIteratorScanner<'a, T, U, F>
where
    F: FnMut(&T, &mut U) -> ProcessingDecision,
{
    pub fn new(slice: &'a [T], max_iterators: usize, payload: U, should_process: F) -> Self {
        assert!(max_iterators > 0);
        Self {
            max_iterators,
            slice,
            payload,
            should_process,
            already_handled: vec![false; slice.len()],
            current_positions: Vec::with_capacity(max_iterators),
            current_items: Vec::with_capacity(max_iterators),
            initialized: false,
        }
    }

    /// Returns a slice of the item references at the current positions of the iterators
    /// and a mutable reference to the payload.
    ///
    /// Returns None if the scanner is done iterating.
    pub fn iterate(&mut self) -> Option<(&[&'a T], &mut U)> {
        if !self.initialized {
            self.initialized = true;
            self.initialize_current_positions();
        } else {
            self.advance_current_positions();
        }
        self.get_current_items()
    }

    /// Consume the iterator. Return the payload, and a vector of booleans
    /// indicating which items have been handled.
    pub fn finalize(self) -> PayloadAndAlreadyHandled<U> {
        PayloadAndAlreadyHandled {
            payload: self.payload,
            already_handled: self.already_handled,
        }
    }

    /// Initialize the `current_positions` vector for the first batch.
    fn initialize_current_positions(&mut self) {
        let mut last_index = 0;
        for _iterator_index in 0..self.max_iterators {
            match self.march_iterator(last_index) {
                Some(index) => {
                    self.current_positions.push(index);
                    last_index = index.saturating_add(1);
                }
                None => break,
            }
        }
    }

    /// March iterators forward to find the next batch of items.
    fn advance_current_positions(&mut self) {
        if let Some(mut prev_index) = self.current_positions.first().copied() {
            for iterator_index in 0..self.current_positions.len() {
                // If the previous iterator has passed this iterator, we should start
                // at it's position + 1 to avoid duplicate re-traversal.
                let start_index = (self.current_positions[iterator_index].saturating_add(1))
                    .max(prev_index.saturating_add(1));
                match self.march_iterator(start_index) {
                    Some(index) => {
                        self.current_positions[iterator_index] = index;
                        prev_index = index;
                    }
                    None => {
                        // Drop current positions that go past the end of the slice
                        self.current_positions.truncate(iterator_index);
                        break;
                    }
                }
            }
        }
    }

    /// Get the current items from the slice using `self.current_positions`.
    /// Returns `None` if there are no more items.
    fn get_current_items(&mut self) -> Option<(&[&'a T], &mut U)> {
        self.current_items.clear();
        for index in &self.current_positions {
            self.current_items.push(&self.slice[*index]);
        }
        (!self.current_items.is_empty()).then_some((&self.current_items, &mut self.payload))
    }

    /// Moves the iterator to its' next position. If we've reached the end of the slice, we return None
    fn march_iterator(&mut self, starting_index: usize) -> Option<usize> {
        let mut found = None;
        for index in starting_index..self.slice.len() {
            if !self.already_handled[index] {
                match (self.should_process)(&self.slice[index], &mut self.payload) {
                    ProcessingDecision::Now => {
                        self.already_handled[index] = true;
                        found = Some(index);
                        break;
                    }
                    ProcessingDecision::Later => {
                        // Do nothing - iterator will try this element in a future batch
                    }
                    ProcessingDecision::Never => {
                        self.already_handled[index] = true;
                    }
                }
            }
        }

        found
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    struct TestScannerPayload {
        locks: Vec<bool>,
    }

    fn test_scanner_locking_should_process(
        item: &i32,
        payload: &mut TestScannerPayload,
    ) -> ProcessingDecision {
        if payload.locks[*item as usize] {
            ProcessingDecision::Later
        } else {
            payload.locks[*item as usize] = true;
            ProcessingDecision::Now
        }
    }

    #[test]
    fn test_multi_iterator_scanner_empty() {
        let slice: Vec<i32> = vec![];
        let mut scanner = MultiIteratorScanner::new(&slice, 2, (), |_, _| ProcessingDecision::Now);
        assert!(scanner.iterate().is_none());
    }

    #[test]
    fn test_multi_iterator_scanner_iterate() {
        let slice = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
        let should_process = |_item: &i32, _payload: &mut ()| ProcessingDecision::Now;

        let mut scanner = MultiIteratorScanner::new(&slice, 2, (), should_process);
        let mut actual_batches = vec![];
        while let Some((batch, _payload)) = scanner.iterate() {
            actual_batches.push(batch.to_vec());
        }

        // Batch 1: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
        //           ^  ^
        // Batch 2: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
        //                 ^  ^
        // Batch 3: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
        //                       ^  ^
        // Batch 4: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
        //                             ^  ^
        // Batch 5: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
        //                                   ^   ^
        // Batch 6: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
        //                                           ^
        let expected_batches = vec![
            vec![&1, &2],
            vec![&3, &4],
            vec![&5, &6],
            vec![&7, &8],
            vec![&9, &10],
            vec![&11],
        ];
        assert_eq!(actual_batches, expected_batches);
    }

    #[test]
    fn test_multi_iterator_scanner_iterate_with_gaps() {
        let slice = [0, 0, 0, 1, 2, 3, 1];

        let payload = TestScannerPayload {
            locks: vec![false; 4],
        };

        let mut scanner =
            MultiIteratorScanner::new(&slice, 2, payload, test_scanner_locking_should_process);
        let mut actual_batches = vec![];
        while let Some((batch, payload)) = scanner.iterate() {
            // free the resources
            for item in batch {
                payload.locks[**item as usize] = false;
            }

            actual_batches.push(batch.to_vec());
        }

        // Batch 1: [0, 0, 0, 1, 2, 3, 4]
        //           ^        ^
        // Batch 2: [0, 0, 0, 1, 2, 3, 4]
        //              ^        ^
        // Batch 3: [0, 0, 0, 1, 2, 3, 4]
        //                 ^        ^
        // Batch 4: [0, 0, 0, 1, 2, 3, 4]
        //                  -----------^ (--- indicates where the 0th iterator marched from)
        let expected_batches = vec![vec![&0, &1], vec![&0, &2], vec![&0, &3], vec![&1]];
        assert_eq!(actual_batches, expected_batches);

        let PayloadAndAlreadyHandled {
            payload: TestScannerPayload { locks },
            already_handled,
        } = scanner.finalize();
        assert_eq!(locks, vec![false; 4]);
        assert!(already_handled.into_iter().all(|x| x));
    }

    #[test]
    fn test_multi_iterator_scanner_iterate_conflicts_not_at_front() {
        let slice = [1, 2, 3, 0, 0, 0, 3, 2, 1];

        let payload = TestScannerPayload {
            locks: vec![false; 4],
        };

        let mut scanner =
            MultiIteratorScanner::new(&slice, 2, payload, test_scanner_locking_should_process);
        let mut actual_batches = vec![];
        while let Some((batch, payload)) = scanner.iterate() {
            // free the resources
            for item in batch {
                payload.locks[**item as usize] = false;
            }

            actual_batches.push(batch.to_vec());
        }

        // Batch 1: [1, 2, 3, 0, 0, 0, 3, 2, 1]
        //           ^  ^
        // Batch 2: [1, 2, 3, 0, 0, 0, 3, 2, 1]
        //                 ^  ^
        // Batch 3: [1, 2, 3, 0, 0, 0, 3, 2, 1]
        //                       ^     ^
        // Batch 4: [1, 2, 3, 0, 0, 0, 3, 2, 1]
        //                          ^     ^
        // Batch 5: [1, 2, 3, 0, 0, 0, 3, 2, 1]
        //                                   ^
        let expected_batches = vec![
            vec![&1, &2],
            vec![&3, &0],
            vec![&0, &3],
            vec![&0, &2],
            vec![&1],
        ];
        assert_eq!(actual_batches, expected_batches);

        let PayloadAndAlreadyHandled {
            payload: TestScannerPayload { locks },
            already_handled,
        } = scanner.finalize();
        assert_eq!(locks, vec![false; 4]);
        assert!(already_handled.into_iter().all(|x| x));
    }

    #[test]
    fn test_multi_iterator_scanner_iterate_with_never_process() {
        let slice = [0, 4, 1, 2];
        let should_process = |item: &i32, _payload: &mut ()| match item {
            4 => ProcessingDecision::Never,
            _ => ProcessingDecision::Now,
        };

        let mut scanner = MultiIteratorScanner::new(&slice, 2, (), should_process);
        let mut actual_batches = vec![];
        while let Some((batch, _payload)) = scanner.iterate() {
            actual_batches.push(batch.to_vec());
        }

        // Batch 1: [0, 4, 1, 2]
        //           ^     ^
        // Batch 2: [0, 4, 1, 2]
        //                    ^
        let expected_batches = vec![vec![&0, &1], vec![&2]];
        assert_eq!(actual_batches, expected_batches);
    }

    #[test]
    fn test_multi_iterator_scanner_iterate_not_handled() {
        let slice = [0, 1, 2];

        // 0 and 2 will always be marked as later, and never actually handled
        let should_process = |item: &i32, _payload: &mut ()| match item {
            1 => ProcessingDecision::Now,
            _ => ProcessingDecision::Later,
        };

        let mut scanner = MultiIteratorScanner::new(&slice, 2, (), should_process);
        let mut actual_batches = vec![];
        while let Some((batch, _payload)) = scanner.iterate() {
            actual_batches.push(batch.to_vec());
        }

        // Batch 1: [1]
        let expected_batches = vec![vec![&1]];
        assert_eq!(actual_batches, expected_batches);

        let PayloadAndAlreadyHandled {
            already_handled, ..
        } = scanner.finalize();
        assert_eq!(already_handled, vec![false, true, false]);
    }
}