// Module for cuda-related helper functions and wrappers. // // cudaHostRegister/cudaHostUnregister - // apis for page-pinning memory. Cuda driver/hardware cannot overlap // copies from host memory to GPU memory unless the memory is page-pinned and // cannot be paged to disk. The cuda driver provides these interfaces to pin and unpin memory. use { crate::{ perf_libs, recycler::{RecyclerX, Reset}, }, rand::{seq::SliceRandom, Rng}, rayon::prelude::*, serde::{Deserialize, Serialize}, std::{ ops::{Index, IndexMut}, os::raw::c_int, slice::{Iter, IterMut, SliceIndex}, sync::Weak, }, }; const CUDA_SUCCESS: c_int = 0; fn pin(_mem: &mut Vec) { if let Some(api) = perf_libs::api() { use std::{ffi::c_void, mem::size_of}; let ptr = _mem.as_mut_ptr(); let size = _mem.capacity().saturating_mul(size_of::()); let err = unsafe { (api.cuda_host_register)(ptr as *mut c_void, size, /*flags=*/ 0) }; assert!( err == CUDA_SUCCESS, "cudaHostRegister error: {err} ptr: {ptr:?} bytes: {size}" ); } } fn unpin(_mem: *mut T) { if let Some(api) = perf_libs::api() { use std::ffi::c_void; let err = unsafe { (api.cuda_host_unregister)(_mem as *mut c_void) }; assert!( err == CUDA_SUCCESS, "cudaHostUnregister returned: {err} ptr: {_mem:?}" ); } } // A vector wrapper where the underlying memory can be // page-pinned. Controlled by flags in case user only wants // to pin in certain circumstances. #[derive(Debug, Default, Serialize, Deserialize)] pub struct PinnedVec { x: Vec, pinned: bool, pinnable: bool, #[serde(skip)] recycler: Weak>>, } impl Reset for PinnedVec { fn reset(&mut self) { self.resize(0, T::default()); } fn warm(&mut self, size_hint: usize) { self.set_pinnable(); self.resize(size_hint, T::default()); } fn set_recycler(&mut self, recycler: Weak>) { self.recycler = recycler; } } impl From> for Vec { fn from(mut pinned_vec: PinnedVec) -> Self { if pinned_vec.pinned { // If the vector is pinned and has a recycler, just return a clone // so that the next allocation of a PinnedVec will recycle an // already pinned one. if pinned_vec.recycler.strong_count() != 0 { return pinned_vec.x.clone(); } unpin(pinned_vec.x.as_mut_ptr()); pinned_vec.pinned = false; } pinned_vec.pinnable = false; pinned_vec.recycler = Weak::default(); std::mem::take(&mut pinned_vec.x) } } impl<'a, T: Clone + Default + Sized> IntoIterator for &'a PinnedVec { type Item = &'a T; type IntoIter = Iter<'a, T>; fn into_iter(self) -> Self::IntoIter { self.x.iter() } } impl> Index for PinnedVec { type Output = I::Output; #[inline] fn index(&self, index: I) -> &Self::Output { &self.x[index] } } impl> IndexMut for PinnedVec { #[inline] fn index_mut(&mut self, index: I) -> &mut Self::Output { &mut self.x[index] } } impl PinnedVec { pub fn iter(&self) -> Iter<'_, T> { self.x.iter() } pub fn iter_mut(&mut self) -> IterMut<'_, T> { self.x.iter_mut() } pub fn capacity(&self) -> usize { self.x.capacity() } } impl<'a, T: Clone + Send + Sync + Default + Sized> IntoParallelIterator for &'a PinnedVec { type Iter = rayon::slice::Iter<'a, T>; type Item = &'a T; fn into_par_iter(self) -> Self::Iter { self.x.par_iter() } } impl<'a, T: Clone + Send + Sync + Default + Sized> IntoParallelIterator for &'a mut PinnedVec { type Iter = rayon::slice::IterMut<'a, T>; type Item = &'a mut T; fn into_par_iter(self) -> Self::Iter { self.x.par_iter_mut() } } impl PinnedVec { pub fn reserve(&mut self, size: usize) { self.x.reserve(size); } pub fn reserve_and_pin(&mut self, size: usize) { if self.x.capacity() < size { if self.pinned { unpin(self.x.as_mut_ptr()); self.pinned = false; } self.x.reserve(size); } self.set_pinnable(); if !self.pinned { pin(&mut self.x); self.pinned = true; } } pub fn set_pinnable(&mut self) { self.pinnable = true; } pub fn copy_from_slice(&mut self, data: &[T]) where T: Copy, { self.x.copy_from_slice(data); } pub fn from_vec(source: Vec) -> Self { Self { x: source, pinned: false, pinnable: false, recycler: Weak::default(), } } pub fn with_capacity(capacity: usize) -> Self { Self::from_vec(Vec::with_capacity(capacity)) } pub fn is_empty(&self) -> bool { self.x.is_empty() } pub fn len(&self) -> usize { self.x.len() } pub fn as_ptr(&self) -> *const T { self.x.as_ptr() } pub fn as_mut_ptr(&mut self) -> *mut T { self.x.as_mut_ptr() } fn prepare_realloc(&mut self, new_size: usize) -> (*mut T, usize) { let old_ptr = self.x.as_mut_ptr(); let old_capacity = self.x.capacity(); // Predict realloc and unpin. if self.pinned && self.x.capacity() < new_size { unpin(old_ptr); self.pinned = false; } (old_ptr, old_capacity) } pub fn push(&mut self, x: T) { let (old_ptr, old_capacity) = self.prepare_realloc(self.x.len().saturating_add(1)); self.x.push(x); self.check_ptr(old_ptr, old_capacity, "push"); } pub fn truncate(&mut self, size: usize) { self.x.truncate(size); } pub fn resize(&mut self, size: usize, elem: T) { let (old_ptr, old_capacity) = self.prepare_realloc(size); self.x.resize(size, elem); self.check_ptr(old_ptr, old_capacity, "resize"); } pub fn append(&mut self, other: &mut Vec) { let (old_ptr, old_capacity) = self.prepare_realloc(self.x.len().saturating_add(other.len())); self.x.append(other); self.check_ptr(old_ptr, old_capacity, "resize"); } pub fn append_pinned(&mut self, other: &mut Self) { let (old_ptr, old_capacity) = self.prepare_realloc(self.x.len().saturating_add(other.len())); self.x.append(&mut other.x); self.check_ptr(old_ptr, old_capacity, "resize"); } /// Forces the length of the vector to `new_len`. /// /// This is a low-level operation that maintains none of the normal /// invariants of the type. Normally changing the length of a vector /// is done using one of the safe operations instead, such as /// [`truncate`], [`resize`], [`extend`], or [`clear`]. /// /// [`truncate`]: Vec::truncate /// [`resize`]: Vec::resize /// [`extend`]: Extend::extend /// [`clear`]: Vec::clear /// /// # Safety /// /// - `new_len` must be less than or equal to [`capacity()`]. /// - The elements at `old_len..new_len` must be initialized. /// /// [`capacity()`]: Vec::capacity /// pub unsafe fn set_len(&mut self, size: usize) { self.x.set_len(size); } pub fn shuffle(&mut self, rng: &mut R) { self.x.shuffle(rng) } fn check_ptr(&mut self, _old_ptr: *mut T, _old_capacity: usize, _from: &'static str) { let api = perf_libs::api(); if api.is_some() && self.pinnable && (self.x.as_ptr() != _old_ptr || self.x.capacity() != _old_capacity) { if self.pinned { unpin(_old_ptr); } trace!( "pinning from check_ptr old: {} size: {} from: {}", _old_capacity, self.x.capacity(), _from ); pin(&mut self.x); self.pinned = true; } } } impl Clone for PinnedVec { fn clone(&self) -> Self { let mut x = self.x.clone(); let pinned = if self.pinned { pin(&mut x); true } else { false }; debug!( "clone PinnedVec: size: {} pinned?: {} pinnable?: {}", self.x.capacity(), self.pinned, self.pinnable ); Self { x, pinned, pinnable: self.pinnable, recycler: self.recycler.clone(), } } } impl Drop for PinnedVec { fn drop(&mut self) { if let Some(recycler) = self.recycler.upgrade() { recycler.recycle(std::mem::take(self)); } else if self.pinned { unpin(self.x.as_mut_ptr()); } } } #[cfg(test)] mod tests { use super::*; #[test] fn test_pinned_vec() { let mut mem = PinnedVec::with_capacity(10); mem.set_pinnable(); mem.push(50); mem.resize(2, 10); assert_eq!(mem[0], 50); assert_eq!(mem[1], 10); assert_eq!(mem.len(), 2); assert!(!mem.is_empty()); let mut iter = mem.iter(); assert_eq!(*iter.next().unwrap(), 50); assert_eq!(*iter.next().unwrap(), 10); assert_eq!(iter.next(), None); } }