Support lz4 for snapshot archives (#25089)
* add lz4 * add cargo package file * fix tests * use fast lz4 compression * report snapshot archive format * add test * code review feedback * add cargo.lock * fix var name * refactor archive format parsing and add default compress cli arg * clippy * add from_cli_arg test * update cargo.lock * add lz4 support for mmap * cargo.lock * clippy
This commit is contained in:
parent
f81c5df1f0
commit
3e44046a73
|
@ -2358,6 +2358,26 @@ dependencies = [
|
|||
"hashbrown",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lz4"
|
||||
version = "1.23.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4edcb94251b1c375c459e5abe9fb0168c1c826c3370172684844f8f3f8d1a885"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"lz4-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lz4-sys"
|
||||
version = "1.9.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d7be8908e2ed6f31c02db8a9fa962f03e36c53fbfde437363eae3306b85d7e17"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "maplit"
|
||||
version = "1.0.2"
|
||||
|
@ -5629,6 +5649,7 @@ dependencies = [
|
|||
"lazy_static",
|
||||
"libsecp256k1",
|
||||
"log",
|
||||
"lz4",
|
||||
"memmap2",
|
||||
"num-derive",
|
||||
"num-traits",
|
||||
|
@ -5657,6 +5678,8 @@ dependencies = [
|
|||
"solana-vote-program",
|
||||
"solana-zk-token-proof-program",
|
||||
"solana-zk-token-sdk 1.11.0",
|
||||
"strum",
|
||||
"strum_macros",
|
||||
"symlink",
|
||||
"tar",
|
||||
"tempfile",
|
||||
|
@ -6352,6 +6375,28 @@ version = "0.10.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
|
||||
|
||||
[[package]]
|
||||
name = "strum"
|
||||
version = "0.24.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e96acfc1b70604b8b2f1ffa4c57e59176c7dbb05d556c71ecd2f5498a1dee7f8"
|
||||
dependencies = [
|
||||
"strum_macros",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "strum_macros"
|
||||
version = "0.24.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6878079b17446e4d3eba6192bb0a2950d5b14f0ed8424b852310e5a94345d0ef"
|
||||
dependencies = [
|
||||
"heck 0.4.0",
|
||||
"proc-macro2 1.0.38",
|
||||
"quote 1.0.18",
|
||||
"rustversion",
|
||||
"syn 1.0.93",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "subtle"
|
||||
version = "2.4.1"
|
||||
|
|
|
@ -281,6 +281,7 @@ pub fn download_snapshot_archive<'a, 'b>(
|
|||
ArchiveFormat::TarZstd,
|
||||
ArchiveFormat::TarGzip,
|
||||
ArchiveFormat::TarBzip2,
|
||||
ArchiveFormat::TarLz4,
|
||||
ArchiveFormat::Tar, // `solana-test-validator` creates uncompressed snapshots
|
||||
] {
|
||||
let destination_path = match snapshot_type {
|
||||
|
|
|
@ -44,8 +44,9 @@ use {
|
|||
snapshot_config::SnapshotConfig,
|
||||
snapshot_hash::StartingSnapshotHashes,
|
||||
snapshot_utils::{
|
||||
self, ArchiveFormat, SnapshotVersion, DEFAULT_MAX_FULL_SNAPSHOT_ARCHIVES_TO_RETAIN,
|
||||
DEFAULT_MAX_INCREMENTAL_SNAPSHOT_ARCHIVES_TO_RETAIN,
|
||||
self, ArchiveFormat, SnapshotVersion, DEFAULT_ARCHIVE_COMPRESSION,
|
||||
DEFAULT_MAX_FULL_SNAPSHOT_ARCHIVES_TO_RETAIN,
|
||||
DEFAULT_MAX_INCREMENTAL_SNAPSHOT_ARCHIVES_TO_RETAIN, SUPPORTED_ARCHIVE_COMPRESSION,
|
||||
},
|
||||
},
|
||||
solana_sdk::{
|
||||
|
@ -1524,7 +1525,17 @@ fn main() {
|
|||
base for the incremental snapshot.")
|
||||
.conflicts_with("no_snapshot")
|
||||
)
|
||||
).subcommand(
|
||||
.arg(
|
||||
Arg::with_name("snapshot_archive_format")
|
||||
.long("snapshot-archive-format")
|
||||
.possible_values(SUPPORTED_ARCHIVE_COMPRESSION)
|
||||
.default_value(DEFAULT_ARCHIVE_COMPRESSION)
|
||||
.value_name("ARCHIVE_TYPE")
|
||||
.takes_value(true)
|
||||
.help("Snapshot archive format to use.")
|
||||
.conflicts_with("no_snapshot")
|
||||
)
|
||||
).subcommand(
|
||||
SubCommand::with_name("accounts")
|
||||
.about("Print account stats and contents after processing the ledger")
|
||||
.arg(&no_snapshot_arg)
|
||||
|
@ -2292,6 +2303,14 @@ fn main() {
|
|||
},
|
||||
);
|
||||
|
||||
let snapshot_archive_format = {
|
||||
let archive_format_str =
|
||||
value_t_or_exit!(matches, "snapshot_archive_format", String);
|
||||
ArchiveFormat::from_cli_arg(&archive_format_str).unwrap_or_else(|| {
|
||||
panic!("Archive format not recognized: {}", archive_format_str)
|
||||
})
|
||||
};
|
||||
|
||||
let maximum_full_snapshot_archives_to_retain =
|
||||
value_t_or_exit!(arg_matches, "maximum_full_snapshots_to_retain", usize);
|
||||
let maximum_incremental_snapshot_archives_to_retain = value_t_or_exit!(
|
||||
|
@ -2568,7 +2587,7 @@ fn main() {
|
|||
Some(snapshot_version),
|
||||
output_directory.clone(),
|
||||
output_directory,
|
||||
ArchiveFormat::TarZstd,
|
||||
snapshot_archive_format,
|
||||
maximum_full_snapshot_archives_to_retain,
|
||||
maximum_incremental_snapshot_archives_to_retain,
|
||||
)
|
||||
|
@ -2592,7 +2611,7 @@ fn main() {
|
|||
Some(snapshot_version),
|
||||
output_directory.clone(),
|
||||
output_directory,
|
||||
ArchiveFormat::TarZstd,
|
||||
snapshot_archive_format,
|
||||
maximum_full_snapshot_archives_to_retain,
|
||||
maximum_incremental_snapshot_archives_to_retain,
|
||||
)
|
||||
|
|
|
@ -2088,6 +2088,26 @@ dependencies = [
|
|||
"hashbrown",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lz4"
|
||||
version = "1.23.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4edcb94251b1c375c459e5abe9fb0168c1c826c3370172684844f8f3f8d1a885"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"lz4-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lz4-sys"
|
||||
version = "1.9.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d7be8908e2ed6f31c02db8a9fa962f03e36c53fbfde437363eae3306b85d7e17"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "maplit"
|
||||
version = "1.0.2"
|
||||
|
@ -5001,6 +5021,7 @@ dependencies = [
|
|||
"itertools",
|
||||
"lazy_static",
|
||||
"log",
|
||||
"lz4",
|
||||
"memmap2",
|
||||
"num-derive",
|
||||
"num-traits",
|
||||
|
@ -5027,6 +5048,8 @@ dependencies = [
|
|||
"solana-vote-program",
|
||||
"solana-zk-token-proof-program",
|
||||
"solana-zk-token-sdk 1.11.0",
|
||||
"strum",
|
||||
"strum_macros",
|
||||
"symlink",
|
||||
"tar",
|
||||
"tempfile",
|
||||
|
@ -5604,6 +5627,28 @@ version = "0.10.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
|
||||
|
||||
[[package]]
|
||||
name = "strum"
|
||||
version = "0.24.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e96acfc1b70604b8b2f1ffa4c57e59176c7dbb05d556c71ecd2f5498a1dee7f8"
|
||||
dependencies = [
|
||||
"strum_macros",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "strum_macros"
|
||||
version = "0.24.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6878079b17446e4d3eba6192bb0a2950d5b14f0ed8424b852310e5a94345d0ef"
|
||||
dependencies = [
|
||||
"heck 0.4.0",
|
||||
"proc-macro2 1.0.38",
|
||||
"quote 1.0.18",
|
||||
"rustversion",
|
||||
"syn 1.0.93",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "subtle"
|
||||
version = "2.4.1"
|
||||
|
|
|
@ -57,6 +57,9 @@ tar = "0.4.38"
|
|||
tempfile = "3.3.0"
|
||||
thiserror = "1.0"
|
||||
zstd = "0.11.2"
|
||||
lz4 = "1.23.3"
|
||||
strum_macros = "0.24"
|
||||
strum = { version = "0.24", features = ["derive"] }
|
||||
|
||||
[lib]
|
||||
crate-type = ["lib"]
|
||||
|
|
|
@ -370,6 +370,12 @@ pub fn archive_snapshot_package(
|
|||
do_archive_files(&mut encoder)?;
|
||||
encoder.finish()?;
|
||||
}
|
||||
ArchiveFormat::TarLz4 => {
|
||||
let mut encoder = lz4::EncoderBuilder::new().level(1).build(archive_file)?;
|
||||
do_archive_files(&mut encoder)?;
|
||||
let (_output, result) = encoder.finish();
|
||||
result?
|
||||
}
|
||||
ArchiveFormat::Tar => {
|
||||
do_archive_files(&mut archive_file)?;
|
||||
}
|
||||
|
@ -401,6 +407,11 @@ pub fn archive_snapshot_package(
|
|||
datapoint_info!(
|
||||
"archive-snapshot-package",
|
||||
("slot", snapshot_package.slot(), i64),
|
||||
(
|
||||
"archive_format",
|
||||
snapshot_package.archive_format().to_string(),
|
||||
String
|
||||
),
|
||||
("duration_ms", timer.as_ms(), i64),
|
||||
(
|
||||
if snapshot_package.snapshot_type.is_full_snapshot() {
|
||||
|
@ -1489,6 +1500,12 @@ fn untar_snapshot_file(
|
|||
account_paths,
|
||||
parallel_divisions,
|
||||
)?,
|
||||
ArchiveFormat::TarLz4 => unpack_snapshot_local(
|
||||
|| lz4::Decoder::new(BufReader::new(open_file())).unwrap(),
|
||||
unpack_dir,
|
||||
account_paths,
|
||||
parallel_divisions,
|
||||
)?,
|
||||
ArchiveFormat::Tar => unpack_snapshot_local(
|
||||
|| BufReader::new(open_file()),
|
||||
unpack_dir,
|
||||
|
@ -1579,6 +1596,12 @@ fn untar_snapshot_mmap(
|
|||
account_paths,
|
||||
parallel_divisions,
|
||||
)?,
|
||||
ArchiveFormat::TarLz4 => unpack_snapshot_local(
|
||||
|| lz4::Decoder::new(slice).unwrap(),
|
||||
unpack_dir,
|
||||
account_paths,
|
||||
parallel_divisions,
|
||||
)?,
|
||||
ArchiveFormat::Tar => {
|
||||
unpack_snapshot_local(|| slice, unpack_dir, account_paths, parallel_divisions)?
|
||||
}
|
||||
|
|
|
@ -1,16 +1,24 @@
|
|||
use std::str::FromStr;
|
||||
use {
|
||||
std::{fmt, str::FromStr},
|
||||
strum::Display,
|
||||
};
|
||||
|
||||
pub const SUPPORTED_ARCHIVE_COMPRESSION: &[&str] = &["bz2", "gzip", "zstd", "lz4", "tar", "none"];
|
||||
pub const DEFAULT_ARCHIVE_COMPRESSION: &str = "zstd";
|
||||
|
||||
pub const TAR_BZIP2_EXTENSION: &str = "tar.bz2";
|
||||
pub const TAR_GZIP_EXTENSION: &str = "tar.gz";
|
||||
pub const TAR_ZSTD_EXTENSION: &str = "tar.zst";
|
||||
pub const TAR_LZ4_EXTENSION: &str = "tar.lz4";
|
||||
pub const TAR_EXTENSION: &str = "tar";
|
||||
|
||||
/// The different archive formats used for snapshots
|
||||
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
|
||||
#[derive(Copy, Clone, Debug, Eq, PartialEq, Display)]
|
||||
pub enum ArchiveFormat {
|
||||
TarBzip2,
|
||||
TarGzip,
|
||||
TarZstd,
|
||||
TarLz4,
|
||||
Tar,
|
||||
}
|
||||
|
||||
|
@ -21,9 +29,21 @@ impl ArchiveFormat {
|
|||
ArchiveFormat::TarBzip2 => TAR_BZIP2_EXTENSION,
|
||||
ArchiveFormat::TarGzip => TAR_GZIP_EXTENSION,
|
||||
ArchiveFormat::TarZstd => TAR_ZSTD_EXTENSION,
|
||||
ArchiveFormat::TarLz4 => TAR_LZ4_EXTENSION,
|
||||
ArchiveFormat::Tar => TAR_EXTENSION,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_cli_arg(archive_format_str: &str) -> Option<ArchiveFormat> {
|
||||
match archive_format_str {
|
||||
"bz2" => Some(ArchiveFormat::TarBzip2),
|
||||
"gzip" => Some(ArchiveFormat::TarGzip),
|
||||
"zstd" => Some(ArchiveFormat::TarZstd),
|
||||
"lz4" => Some(ArchiveFormat::TarLz4),
|
||||
"tar" | "none" => Some(ArchiveFormat::Tar),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Change this to `impl<S: AsRef<str>> TryFrom<S> for ArchiveFormat [...]`
|
||||
|
@ -36,8 +56,9 @@ impl TryFrom<&str> for ArchiveFormat {
|
|||
TAR_BZIP2_EXTENSION => Ok(ArchiveFormat::TarBzip2),
|
||||
TAR_GZIP_EXTENSION => Ok(ArchiveFormat::TarGzip),
|
||||
TAR_ZSTD_EXTENSION => Ok(ArchiveFormat::TarZstd),
|
||||
TAR_LZ4_EXTENSION => Ok(ArchiveFormat::TarLz4),
|
||||
TAR_EXTENSION => Ok(ArchiveFormat::Tar),
|
||||
_ => Err(ParseError::InvalidExtension),
|
||||
_ => Err(ParseError::InvalidExtension(extension.to_string())),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -50,14 +71,24 @@ impl FromStr for ArchiveFormat {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
||||
#[derive(Debug, Clone, Eq, PartialEq)]
|
||||
pub enum ParseError {
|
||||
InvalidExtension,
|
||||
InvalidExtension(String),
|
||||
}
|
||||
|
||||
impl fmt::Display for ParseError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
ParseError::InvalidExtension(extension) => {
|
||||
write!(f, "Invalid archive extension: {}", extension)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use {super::*, std::iter::zip};
|
||||
const INVALID_EXTENSION: &str = "zip";
|
||||
|
||||
#[test]
|
||||
|
@ -65,6 +96,7 @@ mod tests {
|
|||
assert_eq!(ArchiveFormat::TarBzip2.extension(), TAR_BZIP2_EXTENSION);
|
||||
assert_eq!(ArchiveFormat::TarGzip.extension(), TAR_GZIP_EXTENSION);
|
||||
assert_eq!(ArchiveFormat::TarZstd.extension(), TAR_ZSTD_EXTENSION);
|
||||
assert_eq!(ArchiveFormat::TarLz4.extension(), TAR_LZ4_EXTENSION);
|
||||
assert_eq!(ArchiveFormat::Tar.extension(), TAR_EXTENSION);
|
||||
}
|
||||
|
||||
|
@ -82,13 +114,17 @@ mod tests {
|
|||
ArchiveFormat::try_from(TAR_ZSTD_EXTENSION),
|
||||
Ok(ArchiveFormat::TarZstd)
|
||||
);
|
||||
assert_eq!(
|
||||
ArchiveFormat::try_from(TAR_LZ4_EXTENSION),
|
||||
Ok(ArchiveFormat::TarLz4)
|
||||
);
|
||||
assert_eq!(
|
||||
ArchiveFormat::try_from(TAR_EXTENSION),
|
||||
Ok(ArchiveFormat::Tar)
|
||||
);
|
||||
assert_eq!(
|
||||
ArchiveFormat::try_from(INVALID_EXTENSION),
|
||||
Err(ParseError::InvalidExtension)
|
||||
Err(ParseError::InvalidExtension(INVALID_EXTENSION.to_string()))
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -106,13 +142,35 @@ mod tests {
|
|||
ArchiveFormat::from_str(TAR_ZSTD_EXTENSION),
|
||||
Ok(ArchiveFormat::TarZstd)
|
||||
);
|
||||
assert_eq!(
|
||||
ArchiveFormat::from_str(TAR_LZ4_EXTENSION),
|
||||
Ok(ArchiveFormat::TarLz4)
|
||||
);
|
||||
assert_eq!(
|
||||
ArchiveFormat::from_str(TAR_EXTENSION),
|
||||
Ok(ArchiveFormat::Tar)
|
||||
);
|
||||
assert_eq!(
|
||||
ArchiveFormat::from_str(INVALID_EXTENSION),
|
||||
Err(ParseError::InvalidExtension)
|
||||
Err(ParseError::InvalidExtension(INVALID_EXTENSION.to_string()))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_cli_arg() {
|
||||
let golden = [
|
||||
Some(ArchiveFormat::TarBzip2),
|
||||
Some(ArchiveFormat::TarGzip),
|
||||
Some(ArchiveFormat::TarZstd),
|
||||
Some(ArchiveFormat::TarLz4),
|
||||
Some(ArchiveFormat::Tar),
|
||||
Some(ArchiveFormat::Tar),
|
||||
];
|
||||
|
||||
for (arg, expected) in zip(SUPPORTED_ARCHIVE_COMPRESSION.iter(), golden.into_iter()) {
|
||||
assert_eq!(ArchiveFormat::from_cli_arg(arg), expected);
|
||||
}
|
||||
|
||||
assert_eq!(ArchiveFormat::from_cli_arg("bad"), None);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -54,10 +54,11 @@ use {
|
|||
runtime_config::RuntimeConfig,
|
||||
snapshot_config::SnapshotConfig,
|
||||
snapshot_utils::{
|
||||
self, ArchiveFormat, SnapshotVersion, DEFAULT_FULL_SNAPSHOT_ARCHIVE_INTERVAL_SLOTS,
|
||||
self, ArchiveFormat, SnapshotVersion, DEFAULT_ARCHIVE_COMPRESSION,
|
||||
DEFAULT_FULL_SNAPSHOT_ARCHIVE_INTERVAL_SLOTS,
|
||||
DEFAULT_INCREMENTAL_SNAPSHOT_ARCHIVE_INTERVAL_SLOTS,
|
||||
DEFAULT_MAX_FULL_SNAPSHOT_ARCHIVES_TO_RETAIN,
|
||||
DEFAULT_MAX_INCREMENTAL_SNAPSHOT_ARCHIVES_TO_RETAIN,
|
||||
DEFAULT_MAX_INCREMENTAL_SNAPSHOT_ARCHIVES_TO_RETAIN, SUPPORTED_ARCHIVE_COMPRESSION,
|
||||
},
|
||||
},
|
||||
solana_sdk::{
|
||||
|
@ -1483,8 +1484,8 @@ pub fn main() {
|
|||
Arg::with_name("snapshot_archive_format")
|
||||
.long("snapshot-archive-format")
|
||||
.alias("snapshot-compression") // Legacy name used by Solana v1.5.x and older
|
||||
.possible_values(&["bz2", "gzip", "zstd", "tar", "none"])
|
||||
.default_value("zstd")
|
||||
.possible_values(SUPPORTED_ARCHIVE_COMPRESSION)
|
||||
.default_value(DEFAULT_ARCHIVE_COMPRESSION)
|
||||
.value_name("ARCHIVE_TYPE")
|
||||
.takes_value(true)
|
||||
.help("Snapshot archive format to use."),
|
||||
|
@ -2653,13 +2654,8 @@ pub fn main() {
|
|||
|
||||
let archive_format = {
|
||||
let archive_format_str = value_t_or_exit!(matches, "snapshot_archive_format", String);
|
||||
match archive_format_str.as_str() {
|
||||
"bz2" => ArchiveFormat::TarBzip2,
|
||||
"gzip" => ArchiveFormat::TarGzip,
|
||||
"zstd" => ArchiveFormat::TarZstd,
|
||||
"tar" | "none" => ArchiveFormat::Tar,
|
||||
_ => panic!("Archive format not recognized: {}", archive_format_str),
|
||||
}
|
||||
ArchiveFormat::from_cli_arg(&archive_format_str)
|
||||
.unwrap_or_else(|| panic!("Archive format not recognized: {}", archive_format_str))
|
||||
};
|
||||
|
||||
let snapshot_version =
|
||||
|
|
Loading…
Reference in New Issue