Support lz4 for snapshot archives (#25089)

* add lz4

* add cargo package file

* fix tests

* use fast lz4 compression

* report snapshot archive format

* add test

* code review feedback

* add cargo.lock

* fix var name

* refactor archive format parsing and add default compress cli arg

* clippy

* add from_cli_arg test

* update cargo.lock

* add lz4 support for mmap

* cargo.lock

* clippy
This commit is contained in:
HaoranYi 2022-05-16 12:44:15 -05:00 committed by GitHub
parent f81c5df1f0
commit 3e44046a73
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 214 additions and 24 deletions

45
Cargo.lock generated
View File

@ -2358,6 +2358,26 @@ dependencies = [
"hashbrown", "hashbrown",
] ]
[[package]]
name = "lz4"
version = "1.23.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4edcb94251b1c375c459e5abe9fb0168c1c826c3370172684844f8f3f8d1a885"
dependencies = [
"libc",
"lz4-sys",
]
[[package]]
name = "lz4-sys"
version = "1.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7be8908e2ed6f31c02db8a9fa962f03e36c53fbfde437363eae3306b85d7e17"
dependencies = [
"cc",
"libc",
]
[[package]] [[package]]
name = "maplit" name = "maplit"
version = "1.0.2" version = "1.0.2"
@ -5629,6 +5649,7 @@ dependencies = [
"lazy_static", "lazy_static",
"libsecp256k1", "libsecp256k1",
"log", "log",
"lz4",
"memmap2", "memmap2",
"num-derive", "num-derive",
"num-traits", "num-traits",
@ -5657,6 +5678,8 @@ dependencies = [
"solana-vote-program", "solana-vote-program",
"solana-zk-token-proof-program", "solana-zk-token-proof-program",
"solana-zk-token-sdk 1.11.0", "solana-zk-token-sdk 1.11.0",
"strum",
"strum_macros",
"symlink", "symlink",
"tar", "tar",
"tempfile", "tempfile",
@ -6352,6 +6375,28 @@ version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
[[package]]
name = "strum"
version = "0.24.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e96acfc1b70604b8b2f1ffa4c57e59176c7dbb05d556c71ecd2f5498a1dee7f8"
dependencies = [
"strum_macros",
]
[[package]]
name = "strum_macros"
version = "0.24.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6878079b17446e4d3eba6192bb0a2950d5b14f0ed8424b852310e5a94345d0ef"
dependencies = [
"heck 0.4.0",
"proc-macro2 1.0.38",
"quote 1.0.18",
"rustversion",
"syn 1.0.93",
]
[[package]] [[package]]
name = "subtle" name = "subtle"
version = "2.4.1" version = "2.4.1"

View File

@ -281,6 +281,7 @@ pub fn download_snapshot_archive<'a, 'b>(
ArchiveFormat::TarZstd, ArchiveFormat::TarZstd,
ArchiveFormat::TarGzip, ArchiveFormat::TarGzip,
ArchiveFormat::TarBzip2, ArchiveFormat::TarBzip2,
ArchiveFormat::TarLz4,
ArchiveFormat::Tar, // `solana-test-validator` creates uncompressed snapshots ArchiveFormat::Tar, // `solana-test-validator` creates uncompressed snapshots
] { ] {
let destination_path = match snapshot_type { let destination_path = match snapshot_type {

View File

@ -44,8 +44,9 @@ use {
snapshot_config::SnapshotConfig, snapshot_config::SnapshotConfig,
snapshot_hash::StartingSnapshotHashes, snapshot_hash::StartingSnapshotHashes,
snapshot_utils::{ snapshot_utils::{
self, ArchiveFormat, SnapshotVersion, DEFAULT_MAX_FULL_SNAPSHOT_ARCHIVES_TO_RETAIN, self, ArchiveFormat, SnapshotVersion, DEFAULT_ARCHIVE_COMPRESSION,
DEFAULT_MAX_INCREMENTAL_SNAPSHOT_ARCHIVES_TO_RETAIN, DEFAULT_MAX_FULL_SNAPSHOT_ARCHIVES_TO_RETAIN,
DEFAULT_MAX_INCREMENTAL_SNAPSHOT_ARCHIVES_TO_RETAIN, SUPPORTED_ARCHIVE_COMPRESSION,
}, },
}, },
solana_sdk::{ solana_sdk::{
@ -1524,7 +1525,17 @@ fn main() {
base for the incremental snapshot.") base for the incremental snapshot.")
.conflicts_with("no_snapshot") .conflicts_with("no_snapshot")
) )
).subcommand( .arg(
Arg::with_name("snapshot_archive_format")
.long("snapshot-archive-format")
.possible_values(SUPPORTED_ARCHIVE_COMPRESSION)
.default_value(DEFAULT_ARCHIVE_COMPRESSION)
.value_name("ARCHIVE_TYPE")
.takes_value(true)
.help("Snapshot archive format to use.")
.conflicts_with("no_snapshot")
)
).subcommand(
SubCommand::with_name("accounts") SubCommand::with_name("accounts")
.about("Print account stats and contents after processing the ledger") .about("Print account stats and contents after processing the ledger")
.arg(&no_snapshot_arg) .arg(&no_snapshot_arg)
@ -2292,6 +2303,14 @@ fn main() {
}, },
); );
let snapshot_archive_format = {
let archive_format_str =
value_t_or_exit!(matches, "snapshot_archive_format", String);
ArchiveFormat::from_cli_arg(&archive_format_str).unwrap_or_else(|| {
panic!("Archive format not recognized: {}", archive_format_str)
})
};
let maximum_full_snapshot_archives_to_retain = let maximum_full_snapshot_archives_to_retain =
value_t_or_exit!(arg_matches, "maximum_full_snapshots_to_retain", usize); value_t_or_exit!(arg_matches, "maximum_full_snapshots_to_retain", usize);
let maximum_incremental_snapshot_archives_to_retain = value_t_or_exit!( let maximum_incremental_snapshot_archives_to_retain = value_t_or_exit!(
@ -2568,7 +2587,7 @@ fn main() {
Some(snapshot_version), Some(snapshot_version),
output_directory.clone(), output_directory.clone(),
output_directory, output_directory,
ArchiveFormat::TarZstd, snapshot_archive_format,
maximum_full_snapshot_archives_to_retain, maximum_full_snapshot_archives_to_retain,
maximum_incremental_snapshot_archives_to_retain, maximum_incremental_snapshot_archives_to_retain,
) )
@ -2592,7 +2611,7 @@ fn main() {
Some(snapshot_version), Some(snapshot_version),
output_directory.clone(), output_directory.clone(),
output_directory, output_directory,
ArchiveFormat::TarZstd, snapshot_archive_format,
maximum_full_snapshot_archives_to_retain, maximum_full_snapshot_archives_to_retain,
maximum_incremental_snapshot_archives_to_retain, maximum_incremental_snapshot_archives_to_retain,
) )

View File

@ -2088,6 +2088,26 @@ dependencies = [
"hashbrown", "hashbrown",
] ]
[[package]]
name = "lz4"
version = "1.23.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4edcb94251b1c375c459e5abe9fb0168c1c826c3370172684844f8f3f8d1a885"
dependencies = [
"libc",
"lz4-sys",
]
[[package]]
name = "lz4-sys"
version = "1.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7be8908e2ed6f31c02db8a9fa962f03e36c53fbfde437363eae3306b85d7e17"
dependencies = [
"cc",
"libc",
]
[[package]] [[package]]
name = "maplit" name = "maplit"
version = "1.0.2" version = "1.0.2"
@ -5001,6 +5021,7 @@ dependencies = [
"itertools", "itertools",
"lazy_static", "lazy_static",
"log", "log",
"lz4",
"memmap2", "memmap2",
"num-derive", "num-derive",
"num-traits", "num-traits",
@ -5027,6 +5048,8 @@ dependencies = [
"solana-vote-program", "solana-vote-program",
"solana-zk-token-proof-program", "solana-zk-token-proof-program",
"solana-zk-token-sdk 1.11.0", "solana-zk-token-sdk 1.11.0",
"strum",
"strum_macros",
"symlink", "symlink",
"tar", "tar",
"tempfile", "tempfile",
@ -5604,6 +5627,28 @@ version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
[[package]]
name = "strum"
version = "0.24.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e96acfc1b70604b8b2f1ffa4c57e59176c7dbb05d556c71ecd2f5498a1dee7f8"
dependencies = [
"strum_macros",
]
[[package]]
name = "strum_macros"
version = "0.24.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6878079b17446e4d3eba6192bb0a2950d5b14f0ed8424b852310e5a94345d0ef"
dependencies = [
"heck 0.4.0",
"proc-macro2 1.0.38",
"quote 1.0.18",
"rustversion",
"syn 1.0.93",
]
[[package]] [[package]]
name = "subtle" name = "subtle"
version = "2.4.1" version = "2.4.1"

View File

@ -57,6 +57,9 @@ tar = "0.4.38"
tempfile = "3.3.0" tempfile = "3.3.0"
thiserror = "1.0" thiserror = "1.0"
zstd = "0.11.2" zstd = "0.11.2"
lz4 = "1.23.3"
strum_macros = "0.24"
strum = { version = "0.24", features = ["derive"] }
[lib] [lib]
crate-type = ["lib"] crate-type = ["lib"]

View File

@ -370,6 +370,12 @@ pub fn archive_snapshot_package(
do_archive_files(&mut encoder)?; do_archive_files(&mut encoder)?;
encoder.finish()?; encoder.finish()?;
} }
ArchiveFormat::TarLz4 => {
let mut encoder = lz4::EncoderBuilder::new().level(1).build(archive_file)?;
do_archive_files(&mut encoder)?;
let (_output, result) = encoder.finish();
result?
}
ArchiveFormat::Tar => { ArchiveFormat::Tar => {
do_archive_files(&mut archive_file)?; do_archive_files(&mut archive_file)?;
} }
@ -401,6 +407,11 @@ pub fn archive_snapshot_package(
datapoint_info!( datapoint_info!(
"archive-snapshot-package", "archive-snapshot-package",
("slot", snapshot_package.slot(), i64), ("slot", snapshot_package.slot(), i64),
(
"archive_format",
snapshot_package.archive_format().to_string(),
String
),
("duration_ms", timer.as_ms(), i64), ("duration_ms", timer.as_ms(), i64),
( (
if snapshot_package.snapshot_type.is_full_snapshot() { if snapshot_package.snapshot_type.is_full_snapshot() {
@ -1489,6 +1500,12 @@ fn untar_snapshot_file(
account_paths, account_paths,
parallel_divisions, parallel_divisions,
)?, )?,
ArchiveFormat::TarLz4 => unpack_snapshot_local(
|| lz4::Decoder::new(BufReader::new(open_file())).unwrap(),
unpack_dir,
account_paths,
parallel_divisions,
)?,
ArchiveFormat::Tar => unpack_snapshot_local( ArchiveFormat::Tar => unpack_snapshot_local(
|| BufReader::new(open_file()), || BufReader::new(open_file()),
unpack_dir, unpack_dir,
@ -1579,6 +1596,12 @@ fn untar_snapshot_mmap(
account_paths, account_paths,
parallel_divisions, parallel_divisions,
)?, )?,
ArchiveFormat::TarLz4 => unpack_snapshot_local(
|| lz4::Decoder::new(slice).unwrap(),
unpack_dir,
account_paths,
parallel_divisions,
)?,
ArchiveFormat::Tar => { ArchiveFormat::Tar => {
unpack_snapshot_local(|| slice, unpack_dir, account_paths, parallel_divisions)? unpack_snapshot_local(|| slice, unpack_dir, account_paths, parallel_divisions)?
} }

View File

@ -1,16 +1,24 @@
use std::str::FromStr; use {
std::{fmt, str::FromStr},
strum::Display,
};
pub const SUPPORTED_ARCHIVE_COMPRESSION: &[&str] = &["bz2", "gzip", "zstd", "lz4", "tar", "none"];
pub const DEFAULT_ARCHIVE_COMPRESSION: &str = "zstd";
pub const TAR_BZIP2_EXTENSION: &str = "tar.bz2"; pub const TAR_BZIP2_EXTENSION: &str = "tar.bz2";
pub const TAR_GZIP_EXTENSION: &str = "tar.gz"; pub const TAR_GZIP_EXTENSION: &str = "tar.gz";
pub const TAR_ZSTD_EXTENSION: &str = "tar.zst"; pub const TAR_ZSTD_EXTENSION: &str = "tar.zst";
pub const TAR_LZ4_EXTENSION: &str = "tar.lz4";
pub const TAR_EXTENSION: &str = "tar"; pub const TAR_EXTENSION: &str = "tar";
/// The different archive formats used for snapshots /// The different archive formats used for snapshots
#[derive(Copy, Clone, Debug, Eq, PartialEq)] #[derive(Copy, Clone, Debug, Eq, PartialEq, Display)]
pub enum ArchiveFormat { pub enum ArchiveFormat {
TarBzip2, TarBzip2,
TarGzip, TarGzip,
TarZstd, TarZstd,
TarLz4,
Tar, Tar,
} }
@ -21,9 +29,21 @@ impl ArchiveFormat {
ArchiveFormat::TarBzip2 => TAR_BZIP2_EXTENSION, ArchiveFormat::TarBzip2 => TAR_BZIP2_EXTENSION,
ArchiveFormat::TarGzip => TAR_GZIP_EXTENSION, ArchiveFormat::TarGzip => TAR_GZIP_EXTENSION,
ArchiveFormat::TarZstd => TAR_ZSTD_EXTENSION, ArchiveFormat::TarZstd => TAR_ZSTD_EXTENSION,
ArchiveFormat::TarLz4 => TAR_LZ4_EXTENSION,
ArchiveFormat::Tar => TAR_EXTENSION, ArchiveFormat::Tar => TAR_EXTENSION,
} }
} }
pub fn from_cli_arg(archive_format_str: &str) -> Option<ArchiveFormat> {
match archive_format_str {
"bz2" => Some(ArchiveFormat::TarBzip2),
"gzip" => Some(ArchiveFormat::TarGzip),
"zstd" => Some(ArchiveFormat::TarZstd),
"lz4" => Some(ArchiveFormat::TarLz4),
"tar" | "none" => Some(ArchiveFormat::Tar),
_ => None,
}
}
} }
// Change this to `impl<S: AsRef<str>> TryFrom<S> for ArchiveFormat [...]` // Change this to `impl<S: AsRef<str>> TryFrom<S> for ArchiveFormat [...]`
@ -36,8 +56,9 @@ impl TryFrom<&str> for ArchiveFormat {
TAR_BZIP2_EXTENSION => Ok(ArchiveFormat::TarBzip2), TAR_BZIP2_EXTENSION => Ok(ArchiveFormat::TarBzip2),
TAR_GZIP_EXTENSION => Ok(ArchiveFormat::TarGzip), TAR_GZIP_EXTENSION => Ok(ArchiveFormat::TarGzip),
TAR_ZSTD_EXTENSION => Ok(ArchiveFormat::TarZstd), TAR_ZSTD_EXTENSION => Ok(ArchiveFormat::TarZstd),
TAR_LZ4_EXTENSION => Ok(ArchiveFormat::TarLz4),
TAR_EXTENSION => Ok(ArchiveFormat::Tar), TAR_EXTENSION => Ok(ArchiveFormat::Tar),
_ => Err(ParseError::InvalidExtension), _ => Err(ParseError::InvalidExtension(extension.to_string())),
} }
} }
} }
@ -50,14 +71,24 @@ impl FromStr for ArchiveFormat {
} }
} }
#[derive(Debug, Copy, Clone, Eq, PartialEq)] #[derive(Debug, Clone, Eq, PartialEq)]
pub enum ParseError { pub enum ParseError {
InvalidExtension, InvalidExtension(String),
}
impl fmt::Display for ParseError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
ParseError::InvalidExtension(extension) => {
write!(f, "Invalid archive extension: {}", extension)
}
}
}
} }
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use {super::*, std::iter::zip};
const INVALID_EXTENSION: &str = "zip"; const INVALID_EXTENSION: &str = "zip";
#[test] #[test]
@ -65,6 +96,7 @@ mod tests {
assert_eq!(ArchiveFormat::TarBzip2.extension(), TAR_BZIP2_EXTENSION); assert_eq!(ArchiveFormat::TarBzip2.extension(), TAR_BZIP2_EXTENSION);
assert_eq!(ArchiveFormat::TarGzip.extension(), TAR_GZIP_EXTENSION); assert_eq!(ArchiveFormat::TarGzip.extension(), TAR_GZIP_EXTENSION);
assert_eq!(ArchiveFormat::TarZstd.extension(), TAR_ZSTD_EXTENSION); assert_eq!(ArchiveFormat::TarZstd.extension(), TAR_ZSTD_EXTENSION);
assert_eq!(ArchiveFormat::TarLz4.extension(), TAR_LZ4_EXTENSION);
assert_eq!(ArchiveFormat::Tar.extension(), TAR_EXTENSION); assert_eq!(ArchiveFormat::Tar.extension(), TAR_EXTENSION);
} }
@ -82,13 +114,17 @@ mod tests {
ArchiveFormat::try_from(TAR_ZSTD_EXTENSION), ArchiveFormat::try_from(TAR_ZSTD_EXTENSION),
Ok(ArchiveFormat::TarZstd) Ok(ArchiveFormat::TarZstd)
); );
assert_eq!(
ArchiveFormat::try_from(TAR_LZ4_EXTENSION),
Ok(ArchiveFormat::TarLz4)
);
assert_eq!( assert_eq!(
ArchiveFormat::try_from(TAR_EXTENSION), ArchiveFormat::try_from(TAR_EXTENSION),
Ok(ArchiveFormat::Tar) Ok(ArchiveFormat::Tar)
); );
assert_eq!( assert_eq!(
ArchiveFormat::try_from(INVALID_EXTENSION), ArchiveFormat::try_from(INVALID_EXTENSION),
Err(ParseError::InvalidExtension) Err(ParseError::InvalidExtension(INVALID_EXTENSION.to_string()))
); );
} }
@ -106,13 +142,35 @@ mod tests {
ArchiveFormat::from_str(TAR_ZSTD_EXTENSION), ArchiveFormat::from_str(TAR_ZSTD_EXTENSION),
Ok(ArchiveFormat::TarZstd) Ok(ArchiveFormat::TarZstd)
); );
assert_eq!(
ArchiveFormat::from_str(TAR_LZ4_EXTENSION),
Ok(ArchiveFormat::TarLz4)
);
assert_eq!( assert_eq!(
ArchiveFormat::from_str(TAR_EXTENSION), ArchiveFormat::from_str(TAR_EXTENSION),
Ok(ArchiveFormat::Tar) Ok(ArchiveFormat::Tar)
); );
assert_eq!( assert_eq!(
ArchiveFormat::from_str(INVALID_EXTENSION), ArchiveFormat::from_str(INVALID_EXTENSION),
Err(ParseError::InvalidExtension) Err(ParseError::InvalidExtension(INVALID_EXTENSION.to_string()))
); );
} }
#[test]
fn test_from_cli_arg() {
let golden = [
Some(ArchiveFormat::TarBzip2),
Some(ArchiveFormat::TarGzip),
Some(ArchiveFormat::TarZstd),
Some(ArchiveFormat::TarLz4),
Some(ArchiveFormat::Tar),
Some(ArchiveFormat::Tar),
];
for (arg, expected) in zip(SUPPORTED_ARCHIVE_COMPRESSION.iter(), golden.into_iter()) {
assert_eq!(ArchiveFormat::from_cli_arg(arg), expected);
}
assert_eq!(ArchiveFormat::from_cli_arg("bad"), None);
}
} }

View File

@ -54,10 +54,11 @@ use {
runtime_config::RuntimeConfig, runtime_config::RuntimeConfig,
snapshot_config::SnapshotConfig, snapshot_config::SnapshotConfig,
snapshot_utils::{ snapshot_utils::{
self, ArchiveFormat, SnapshotVersion, DEFAULT_FULL_SNAPSHOT_ARCHIVE_INTERVAL_SLOTS, self, ArchiveFormat, SnapshotVersion, DEFAULT_ARCHIVE_COMPRESSION,
DEFAULT_FULL_SNAPSHOT_ARCHIVE_INTERVAL_SLOTS,
DEFAULT_INCREMENTAL_SNAPSHOT_ARCHIVE_INTERVAL_SLOTS, DEFAULT_INCREMENTAL_SNAPSHOT_ARCHIVE_INTERVAL_SLOTS,
DEFAULT_MAX_FULL_SNAPSHOT_ARCHIVES_TO_RETAIN, DEFAULT_MAX_FULL_SNAPSHOT_ARCHIVES_TO_RETAIN,
DEFAULT_MAX_INCREMENTAL_SNAPSHOT_ARCHIVES_TO_RETAIN, DEFAULT_MAX_INCREMENTAL_SNAPSHOT_ARCHIVES_TO_RETAIN, SUPPORTED_ARCHIVE_COMPRESSION,
}, },
}, },
solana_sdk::{ solana_sdk::{
@ -1483,8 +1484,8 @@ pub fn main() {
Arg::with_name("snapshot_archive_format") Arg::with_name("snapshot_archive_format")
.long("snapshot-archive-format") .long("snapshot-archive-format")
.alias("snapshot-compression") // Legacy name used by Solana v1.5.x and older .alias("snapshot-compression") // Legacy name used by Solana v1.5.x and older
.possible_values(&["bz2", "gzip", "zstd", "tar", "none"]) .possible_values(SUPPORTED_ARCHIVE_COMPRESSION)
.default_value("zstd") .default_value(DEFAULT_ARCHIVE_COMPRESSION)
.value_name("ARCHIVE_TYPE") .value_name("ARCHIVE_TYPE")
.takes_value(true) .takes_value(true)
.help("Snapshot archive format to use."), .help("Snapshot archive format to use."),
@ -2653,13 +2654,8 @@ pub fn main() {
let archive_format = { let archive_format = {
let archive_format_str = value_t_or_exit!(matches, "snapshot_archive_format", String); let archive_format_str = value_t_or_exit!(matches, "snapshot_archive_format", String);
match archive_format_str.as_str() { ArchiveFormat::from_cli_arg(&archive_format_str)
"bz2" => ArchiveFormat::TarBzip2, .unwrap_or_else(|| panic!("Archive format not recognized: {}", archive_format_str))
"gzip" => ArchiveFormat::TarGzip,
"zstd" => ArchiveFormat::TarZstd,
"tar" | "none" => ArchiveFormat::Tar,
_ => panic!("Archive format not recognized: {}", archive_format_str),
}
}; };
let snapshot_version = let snapshot_version =