Use memory map to speed up snapshot untar (#24889)
* mmap * add no_os_memory_reporting_args to ledger-tool * use safe memory map package for unix. use regular file for windows * fallback to untaring with filebuf when mmap fails * update comments
This commit is contained in:
parent
cde15ff687
commit
3367e44671
|
@ -2461,6 +2461,15 @@ dependencies = [
|
|||
"winapi 0.3.9",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mmarinus"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2b8098e6e7a3823fa237ef9569010d3af3894a1ae54c92f0c220b9e6357f9473"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "modular-bitfield"
|
||||
version = "0.11.2"
|
||||
|
@ -5625,6 +5634,7 @@ dependencies = [
|
|||
"libsecp256k1",
|
||||
"log",
|
||||
"memmap2",
|
||||
"mmarinus",
|
||||
"num-derive",
|
||||
"num-traits",
|
||||
"num_cpus",
|
||||
|
|
|
@ -2191,6 +2191,15 @@ dependencies = [
|
|||
"winapi 0.3.9",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mmarinus"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2b8098e6e7a3823fa237ef9569010d3af3894a1ae54c92f0c220b9e6357f9473"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "modular-bitfield"
|
||||
version = "0.11.2"
|
||||
|
@ -5002,6 +5011,7 @@ dependencies = [
|
|||
"lazy_static",
|
||||
"log",
|
||||
"memmap2",
|
||||
"mmarinus",
|
||||
"num-derive",
|
||||
"num-traits",
|
||||
"num_cpus",
|
||||
|
|
|
@ -62,6 +62,9 @@ zstd = "0.11.1"
|
|||
crate-type = ["lib"]
|
||||
name = "solana_runtime"
|
||||
|
||||
[target.'cfg(unix)'.dependencies]
|
||||
mmarinus = "0.4.0"
|
||||
|
||||
[dev-dependencies]
|
||||
assert_matches = "1.5.0"
|
||||
ed25519-dalek = "=1.0.1"
|
||||
|
|
|
@ -24,7 +24,7 @@ use {
|
|||
// # bytes allocated and populated by reading ahead
|
||||
const TOTAL_BUFFER_BUDGET_DEFAULT: usize = 2_000_000_000;
|
||||
// data is read-ahead and saved in chunks of this many bytes
|
||||
const CHUNK_SIZE_DEFAULT: usize = 100_000_000;
|
||||
const CHUNK_SIZE_DEFAULT: usize = 50_000_000;
|
||||
|
||||
type OneSharedBuffer = Arc<Vec<u8>>;
|
||||
|
||||
|
|
|
@ -781,8 +781,8 @@ pub struct BankFromArchiveTimings {
|
|||
pub verify_snapshot_bank_us: u64,
|
||||
}
|
||||
|
||||
// From testing, 4 seems to be a sweet spot for ranges of 60M-360M accounts and 16-64 cores. This may need to be tuned later.
|
||||
const PARALLEL_UNTAR_READERS_DEFAULT: usize = 4;
|
||||
// From testing, 8 seems to be a sweet spot for ranges of 60M-360M accounts and 16-64 cores. This may need to be tuned later.
|
||||
const PARALLEL_UNTAR_READERS_DEFAULT: usize = 8;
|
||||
|
||||
/// Rebuild bank from snapshot archives. Handles either just a full snapshot, or both a full
|
||||
/// snapshot and an incremental snapshot.
|
||||
|
@ -1451,6 +1451,7 @@ fn unpack_snapshot_local<T: 'static + Read + std::marker::Send, F: Fn() -> T>(
|
|||
unpack_snapshot(&mut archive, ledger_dir, account_paths, parallel_selector)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let mut unpacked_append_vec_map = UnpackedAppendVecMap::new();
|
||||
for h in all_unpacked_append_vec_map {
|
||||
unpacked_append_vec_map.extend(h?);
|
||||
|
@ -1459,12 +1460,29 @@ fn unpack_snapshot_local<T: 'static + Read + std::marker::Send, F: Fn() -> T>(
|
|||
Ok(unpacked_append_vec_map)
|
||||
}
|
||||
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
fn untar_snapshot_in<P: AsRef<Path>>(
|
||||
snapshot_tar: P,
|
||||
unpack_dir: &Path,
|
||||
account_paths: &[PathBuf],
|
||||
archive_format: ArchiveFormat,
|
||||
parallel_divisions: usize,
|
||||
) -> Result<UnpackedAppendVecMap> {
|
||||
untar_snapshot_file(
|
||||
snapshot_tar.as_ref(),
|
||||
unpack_dir,
|
||||
account_paths,
|
||||
archive_format,
|
||||
parallel_divisions,
|
||||
)
|
||||
}
|
||||
|
||||
fn untar_snapshot_file(
|
||||
snapshot_tar: &Path,
|
||||
unpack_dir: &Path,
|
||||
account_paths: &[PathBuf],
|
||||
archive_format: ArchiveFormat,
|
||||
parallel_divisions: usize,
|
||||
) -> Result<UnpackedAppendVecMap> {
|
||||
let open_file = || File::open(&snapshot_tar).unwrap();
|
||||
let account_paths_map = match archive_format {
|
||||
|
@ -1496,6 +1514,99 @@ fn untar_snapshot_in<P: AsRef<Path>>(
|
|||
Ok(account_paths_map)
|
||||
}
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
fn untar_snapshot_in<P: AsRef<Path>>(
|
||||
snapshot_tar: P,
|
||||
unpack_dir: &Path,
|
||||
account_paths: &[PathBuf],
|
||||
archive_format: ArchiveFormat,
|
||||
parallel_divisions: usize,
|
||||
) -> Result<UnpackedAppendVecMap> {
|
||||
let ret = untar_snapshot_mmap(
|
||||
snapshot_tar.as_ref(),
|
||||
unpack_dir,
|
||||
account_paths,
|
||||
archive_format,
|
||||
parallel_divisions,
|
||||
);
|
||||
|
||||
if ret.is_ok() {
|
||||
ret
|
||||
} else {
|
||||
warn!(
|
||||
"Failed to memory map the snapshot file: {}",
|
||||
snapshot_tar.as_ref().display(),
|
||||
);
|
||||
|
||||
untar_snapshot_file(
|
||||
snapshot_tar.as_ref(),
|
||||
unpack_dir,
|
||||
account_paths,
|
||||
archive_format,
|
||||
parallel_divisions,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
impl<T> From<mmarinus::Error<T>> for SnapshotError {
|
||||
fn from(_: mmarinus::Error<T>) -> SnapshotError {
|
||||
SnapshotError::Io(std::io::Error::new(ErrorKind::Other, "mmap failure"))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
fn untar_snapshot_mmap(
|
||||
snapshot_tar: &Path,
|
||||
unpack_dir: &Path,
|
||||
account_paths: &[PathBuf],
|
||||
archive_format: ArchiveFormat,
|
||||
parallel_divisions: usize,
|
||||
) -> Result<UnpackedAppendVecMap> {
|
||||
use {
|
||||
mmarinus::{perms, Map, Private},
|
||||
std::slice,
|
||||
};
|
||||
|
||||
let mmap = Map::load(&snapshot_tar, Private, perms::Read)?;
|
||||
|
||||
// `unpack_snapshot_local` takes a BufReader creator, which requires a
|
||||
// static lifetime because of its background reader thread. Therefore, we
|
||||
// can't pass the &mmap. Instead, we construct and pass a a slice
|
||||
// explicitly. However, the following code is guaranteed to be safe because
|
||||
// the lifetime of mmap last till the end of the function while the usage of
|
||||
// mmap, BufReader's lifetime only last within fn unpack_snapshot_local.
|
||||
let len = &mmap[..].len();
|
||||
let ptr = &mmap[0] as *const u8;
|
||||
let slice = unsafe { slice::from_raw_parts(ptr, *len) };
|
||||
|
||||
let account_paths_map = match archive_format {
|
||||
ArchiveFormat::TarBzip2 => unpack_snapshot_local(
|
||||
|| BzDecoder::new(slice),
|
||||
unpack_dir,
|
||||
account_paths,
|
||||
parallel_divisions,
|
||||
)?,
|
||||
ArchiveFormat::TarGzip => unpack_snapshot_local(
|
||||
|| GzDecoder::new(slice),
|
||||
unpack_dir,
|
||||
account_paths,
|
||||
parallel_divisions,
|
||||
)?,
|
||||
ArchiveFormat::TarZstd => unpack_snapshot_local(
|
||||
|| zstd::stream::read::Decoder::new(slice).unwrap(),
|
||||
unpack_dir,
|
||||
account_paths,
|
||||
parallel_divisions,
|
||||
)?,
|
||||
ArchiveFormat::Tar => {
|
||||
unpack_snapshot_local(|| slice, unpack_dir, account_paths, parallel_divisions)?
|
||||
}
|
||||
};
|
||||
|
||||
Ok(account_paths_map)
|
||||
}
|
||||
|
||||
fn verify_unpacked_snapshots_dir_and_version(
|
||||
unpacked_snapshots_dir_and_version: &UnpackedSnapshotsDirAndVersion,
|
||||
) -> Result<(SnapshotVersion, BankSnapshotInfo)> {
|
||||
|
|
Loading…
Reference in New Issue