Use memory map to speed up snapshot untar (#24889)
* mmap * add no_os_memory_reporting_args to ledger-tool * use safe memory map package for unix. use regular file for windows * fallback to untaring with filebuf when mmap fails * update comments
This commit is contained in:
parent
cde15ff687
commit
3367e44671
|
@ -2461,6 +2461,15 @@ dependencies = [
|
||||||
"winapi 0.3.9",
|
"winapi 0.3.9",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "mmarinus"
|
||||||
|
version = "0.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2b8098e6e7a3823fa237ef9569010d3af3894a1ae54c92f0c220b9e6357f9473"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "modular-bitfield"
|
name = "modular-bitfield"
|
||||||
version = "0.11.2"
|
version = "0.11.2"
|
||||||
|
@ -5625,6 +5634,7 @@ dependencies = [
|
||||||
"libsecp256k1",
|
"libsecp256k1",
|
||||||
"log",
|
"log",
|
||||||
"memmap2",
|
"memmap2",
|
||||||
|
"mmarinus",
|
||||||
"num-derive",
|
"num-derive",
|
||||||
"num-traits",
|
"num-traits",
|
||||||
"num_cpus",
|
"num_cpus",
|
||||||
|
|
|
@ -2191,6 +2191,15 @@ dependencies = [
|
||||||
"winapi 0.3.9",
|
"winapi 0.3.9",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "mmarinus"
|
||||||
|
version = "0.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2b8098e6e7a3823fa237ef9569010d3af3894a1ae54c92f0c220b9e6357f9473"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "modular-bitfield"
|
name = "modular-bitfield"
|
||||||
version = "0.11.2"
|
version = "0.11.2"
|
||||||
|
@ -5002,6 +5011,7 @@ dependencies = [
|
||||||
"lazy_static",
|
"lazy_static",
|
||||||
"log",
|
"log",
|
||||||
"memmap2",
|
"memmap2",
|
||||||
|
"mmarinus",
|
||||||
"num-derive",
|
"num-derive",
|
||||||
"num-traits",
|
"num-traits",
|
||||||
"num_cpus",
|
"num_cpus",
|
||||||
|
|
|
@ -62,6 +62,9 @@ zstd = "0.11.1"
|
||||||
crate-type = ["lib"]
|
crate-type = ["lib"]
|
||||||
name = "solana_runtime"
|
name = "solana_runtime"
|
||||||
|
|
||||||
|
[target.'cfg(unix)'.dependencies]
|
||||||
|
mmarinus = "0.4.0"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
assert_matches = "1.5.0"
|
assert_matches = "1.5.0"
|
||||||
ed25519-dalek = "=1.0.1"
|
ed25519-dalek = "=1.0.1"
|
||||||
|
|
|
@ -24,7 +24,7 @@ use {
|
||||||
// # bytes allocated and populated by reading ahead
|
// # bytes allocated and populated by reading ahead
|
||||||
const TOTAL_BUFFER_BUDGET_DEFAULT: usize = 2_000_000_000;
|
const TOTAL_BUFFER_BUDGET_DEFAULT: usize = 2_000_000_000;
|
||||||
// data is read-ahead and saved in chunks of this many bytes
|
// data is read-ahead and saved in chunks of this many bytes
|
||||||
const CHUNK_SIZE_DEFAULT: usize = 100_000_000;
|
const CHUNK_SIZE_DEFAULT: usize = 50_000_000;
|
||||||
|
|
||||||
type OneSharedBuffer = Arc<Vec<u8>>;
|
type OneSharedBuffer = Arc<Vec<u8>>;
|
||||||
|
|
||||||
|
|
|
@ -781,8 +781,8 @@ pub struct BankFromArchiveTimings {
|
||||||
pub verify_snapshot_bank_us: u64,
|
pub verify_snapshot_bank_us: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
// From testing, 4 seems to be a sweet spot for ranges of 60M-360M accounts and 16-64 cores. This may need to be tuned later.
|
// From testing, 8 seems to be a sweet spot for ranges of 60M-360M accounts and 16-64 cores. This may need to be tuned later.
|
||||||
const PARALLEL_UNTAR_READERS_DEFAULT: usize = 4;
|
const PARALLEL_UNTAR_READERS_DEFAULT: usize = 8;
|
||||||
|
|
||||||
/// Rebuild bank from snapshot archives. Handles either just a full snapshot, or both a full
|
/// Rebuild bank from snapshot archives. Handles either just a full snapshot, or both a full
|
||||||
/// snapshot and an incremental snapshot.
|
/// snapshot and an incremental snapshot.
|
||||||
|
@ -1451,6 +1451,7 @@ fn unpack_snapshot_local<T: 'static + Read + std::marker::Send, F: Fn() -> T>(
|
||||||
unpack_snapshot(&mut archive, ledger_dir, account_paths, parallel_selector)
|
unpack_snapshot(&mut archive, ledger_dir, account_paths, parallel_selector)
|
||||||
})
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
let mut unpacked_append_vec_map = UnpackedAppendVecMap::new();
|
let mut unpacked_append_vec_map = UnpackedAppendVecMap::new();
|
||||||
for h in all_unpacked_append_vec_map {
|
for h in all_unpacked_append_vec_map {
|
||||||
unpacked_append_vec_map.extend(h?);
|
unpacked_append_vec_map.extend(h?);
|
||||||
|
@ -1459,12 +1460,29 @@ fn unpack_snapshot_local<T: 'static + Read + std::marker::Send, F: Fn() -> T>(
|
||||||
Ok(unpacked_append_vec_map)
|
Ok(unpacked_append_vec_map)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(not(target_os = "linux"))]
|
||||||
fn untar_snapshot_in<P: AsRef<Path>>(
|
fn untar_snapshot_in<P: AsRef<Path>>(
|
||||||
snapshot_tar: P,
|
snapshot_tar: P,
|
||||||
unpack_dir: &Path,
|
unpack_dir: &Path,
|
||||||
account_paths: &[PathBuf],
|
account_paths: &[PathBuf],
|
||||||
archive_format: ArchiveFormat,
|
archive_format: ArchiveFormat,
|
||||||
parallel_divisions: usize,
|
parallel_divisions: usize,
|
||||||
|
) -> Result<UnpackedAppendVecMap> {
|
||||||
|
untar_snapshot_file(
|
||||||
|
snapshot_tar.as_ref(),
|
||||||
|
unpack_dir,
|
||||||
|
account_paths,
|
||||||
|
archive_format,
|
||||||
|
parallel_divisions,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn untar_snapshot_file(
|
||||||
|
snapshot_tar: &Path,
|
||||||
|
unpack_dir: &Path,
|
||||||
|
account_paths: &[PathBuf],
|
||||||
|
archive_format: ArchiveFormat,
|
||||||
|
parallel_divisions: usize,
|
||||||
) -> Result<UnpackedAppendVecMap> {
|
) -> Result<UnpackedAppendVecMap> {
|
||||||
let open_file = || File::open(&snapshot_tar).unwrap();
|
let open_file = || File::open(&snapshot_tar).unwrap();
|
||||||
let account_paths_map = match archive_format {
|
let account_paths_map = match archive_format {
|
||||||
|
@ -1496,6 +1514,99 @@ fn untar_snapshot_in<P: AsRef<Path>>(
|
||||||
Ok(account_paths_map)
|
Ok(account_paths_map)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(target_os = "linux")]
|
||||||
|
fn untar_snapshot_in<P: AsRef<Path>>(
|
||||||
|
snapshot_tar: P,
|
||||||
|
unpack_dir: &Path,
|
||||||
|
account_paths: &[PathBuf],
|
||||||
|
archive_format: ArchiveFormat,
|
||||||
|
parallel_divisions: usize,
|
||||||
|
) -> Result<UnpackedAppendVecMap> {
|
||||||
|
let ret = untar_snapshot_mmap(
|
||||||
|
snapshot_tar.as_ref(),
|
||||||
|
unpack_dir,
|
||||||
|
account_paths,
|
||||||
|
archive_format,
|
||||||
|
parallel_divisions,
|
||||||
|
);
|
||||||
|
|
||||||
|
if ret.is_ok() {
|
||||||
|
ret
|
||||||
|
} else {
|
||||||
|
warn!(
|
||||||
|
"Failed to memory map the snapshot file: {}",
|
||||||
|
snapshot_tar.as_ref().display(),
|
||||||
|
);
|
||||||
|
|
||||||
|
untar_snapshot_file(
|
||||||
|
snapshot_tar.as_ref(),
|
||||||
|
unpack_dir,
|
||||||
|
account_paths,
|
||||||
|
archive_format,
|
||||||
|
parallel_divisions,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(target_os = "linux")]
|
||||||
|
impl<T> From<mmarinus::Error<T>> for SnapshotError {
|
||||||
|
fn from(_: mmarinus::Error<T>) -> SnapshotError {
|
||||||
|
SnapshotError::Io(std::io::Error::new(ErrorKind::Other, "mmap failure"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(target_os = "linux")]
|
||||||
|
fn untar_snapshot_mmap(
|
||||||
|
snapshot_tar: &Path,
|
||||||
|
unpack_dir: &Path,
|
||||||
|
account_paths: &[PathBuf],
|
||||||
|
archive_format: ArchiveFormat,
|
||||||
|
parallel_divisions: usize,
|
||||||
|
) -> Result<UnpackedAppendVecMap> {
|
||||||
|
use {
|
||||||
|
mmarinus::{perms, Map, Private},
|
||||||
|
std::slice,
|
||||||
|
};
|
||||||
|
|
||||||
|
let mmap = Map::load(&snapshot_tar, Private, perms::Read)?;
|
||||||
|
|
||||||
|
// `unpack_snapshot_local` takes a BufReader creator, which requires a
|
||||||
|
// static lifetime because of its background reader thread. Therefore, we
|
||||||
|
// can't pass the &mmap. Instead, we construct and pass a a slice
|
||||||
|
// explicitly. However, the following code is guaranteed to be safe because
|
||||||
|
// the lifetime of mmap last till the end of the function while the usage of
|
||||||
|
// mmap, BufReader's lifetime only last within fn unpack_snapshot_local.
|
||||||
|
let len = &mmap[..].len();
|
||||||
|
let ptr = &mmap[0] as *const u8;
|
||||||
|
let slice = unsafe { slice::from_raw_parts(ptr, *len) };
|
||||||
|
|
||||||
|
let account_paths_map = match archive_format {
|
||||||
|
ArchiveFormat::TarBzip2 => unpack_snapshot_local(
|
||||||
|
|| BzDecoder::new(slice),
|
||||||
|
unpack_dir,
|
||||||
|
account_paths,
|
||||||
|
parallel_divisions,
|
||||||
|
)?,
|
||||||
|
ArchiveFormat::TarGzip => unpack_snapshot_local(
|
||||||
|
|| GzDecoder::new(slice),
|
||||||
|
unpack_dir,
|
||||||
|
account_paths,
|
||||||
|
parallel_divisions,
|
||||||
|
)?,
|
||||||
|
ArchiveFormat::TarZstd => unpack_snapshot_local(
|
||||||
|
|| zstd::stream::read::Decoder::new(slice).unwrap(),
|
||||||
|
unpack_dir,
|
||||||
|
account_paths,
|
||||||
|
parallel_divisions,
|
||||||
|
)?,
|
||||||
|
ArchiveFormat::Tar => {
|
||||||
|
unpack_snapshot_local(|| slice, unpack_dir, account_paths, parallel_divisions)?
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(account_paths_map)
|
||||||
|
}
|
||||||
|
|
||||||
fn verify_unpacked_snapshots_dir_and_version(
|
fn verify_unpacked_snapshots_dir_and_version(
|
||||||
unpacked_snapshots_dir_and_version: &UnpackedSnapshotsDirAndVersion,
|
unpacked_snapshots_dir_and_version: &UnpackedSnapshotsDirAndVersion,
|
||||||
) -> Result<(SnapshotVersion, BankSnapshotInfo)> {
|
) -> Result<(SnapshotVersion, BankSnapshotInfo)> {
|
||||||
|
|
Loading…
Reference in New Issue