Support lz4 for snapshot archives (#25089)

* add lz4

* add cargo package file

* fix tests

* use fast lz4 compression

* report snapshot archive format

* add test

* code review feedback

* add cargo.lock

* fix var name

* refactor archive format parsing and add default compress cli arg

* clippy

* add from_cli_arg test

* update cargo.lock

* add lz4 support for mmap

* cargo.lock

* clippy
This commit is contained in:
HaoranYi 2022-05-16 12:44:15 -05:00 committed by GitHub
parent f81c5df1f0
commit 3e44046a73
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 214 additions and 24 deletions

45
Cargo.lock generated
View File

@ -2358,6 +2358,26 @@ dependencies = [
"hashbrown",
]
[[package]]
name = "lz4"
version = "1.23.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4edcb94251b1c375c459e5abe9fb0168c1c826c3370172684844f8f3f8d1a885"
dependencies = [
"libc",
"lz4-sys",
]
[[package]]
name = "lz4-sys"
version = "1.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7be8908e2ed6f31c02db8a9fa962f03e36c53fbfde437363eae3306b85d7e17"
dependencies = [
"cc",
"libc",
]
[[package]]
name = "maplit"
version = "1.0.2"
@ -5629,6 +5649,7 @@ dependencies = [
"lazy_static",
"libsecp256k1",
"log",
"lz4",
"memmap2",
"num-derive",
"num-traits",
@ -5657,6 +5678,8 @@ dependencies = [
"solana-vote-program",
"solana-zk-token-proof-program",
"solana-zk-token-sdk 1.11.0",
"strum",
"strum_macros",
"symlink",
"tar",
"tempfile",
@ -6352,6 +6375,28 @@ version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
[[package]]
name = "strum"
version = "0.24.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e96acfc1b70604b8b2f1ffa4c57e59176c7dbb05d556c71ecd2f5498a1dee7f8"
dependencies = [
"strum_macros",
]
[[package]]
name = "strum_macros"
version = "0.24.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6878079b17446e4d3eba6192bb0a2950d5b14f0ed8424b852310e5a94345d0ef"
dependencies = [
"heck 0.4.0",
"proc-macro2 1.0.38",
"quote 1.0.18",
"rustversion",
"syn 1.0.93",
]
[[package]]
name = "subtle"
version = "2.4.1"

View File

@ -281,6 +281,7 @@ pub fn download_snapshot_archive<'a, 'b>(
ArchiveFormat::TarZstd,
ArchiveFormat::TarGzip,
ArchiveFormat::TarBzip2,
ArchiveFormat::TarLz4,
ArchiveFormat::Tar, // `solana-test-validator` creates uncompressed snapshots
] {
let destination_path = match snapshot_type {

View File

@ -44,8 +44,9 @@ use {
snapshot_config::SnapshotConfig,
snapshot_hash::StartingSnapshotHashes,
snapshot_utils::{
self, ArchiveFormat, SnapshotVersion, DEFAULT_MAX_FULL_SNAPSHOT_ARCHIVES_TO_RETAIN,
DEFAULT_MAX_INCREMENTAL_SNAPSHOT_ARCHIVES_TO_RETAIN,
self, ArchiveFormat, SnapshotVersion, DEFAULT_ARCHIVE_COMPRESSION,
DEFAULT_MAX_FULL_SNAPSHOT_ARCHIVES_TO_RETAIN,
DEFAULT_MAX_INCREMENTAL_SNAPSHOT_ARCHIVES_TO_RETAIN, SUPPORTED_ARCHIVE_COMPRESSION,
},
},
solana_sdk::{
@ -1524,7 +1525,17 @@ fn main() {
base for the incremental snapshot.")
.conflicts_with("no_snapshot")
)
).subcommand(
.arg(
Arg::with_name("snapshot_archive_format")
.long("snapshot-archive-format")
.possible_values(SUPPORTED_ARCHIVE_COMPRESSION)
.default_value(DEFAULT_ARCHIVE_COMPRESSION)
.value_name("ARCHIVE_TYPE")
.takes_value(true)
.help("Snapshot archive format to use.")
.conflicts_with("no_snapshot")
)
).subcommand(
SubCommand::with_name("accounts")
.about("Print account stats and contents after processing the ledger")
.arg(&no_snapshot_arg)
@ -2292,6 +2303,14 @@ fn main() {
},
);
let snapshot_archive_format = {
let archive_format_str =
value_t_or_exit!(matches, "snapshot_archive_format", String);
ArchiveFormat::from_cli_arg(&archive_format_str).unwrap_or_else(|| {
panic!("Archive format not recognized: {}", archive_format_str)
})
};
let maximum_full_snapshot_archives_to_retain =
value_t_or_exit!(arg_matches, "maximum_full_snapshots_to_retain", usize);
let maximum_incremental_snapshot_archives_to_retain = value_t_or_exit!(
@ -2568,7 +2587,7 @@ fn main() {
Some(snapshot_version),
output_directory.clone(),
output_directory,
ArchiveFormat::TarZstd,
snapshot_archive_format,
maximum_full_snapshot_archives_to_retain,
maximum_incremental_snapshot_archives_to_retain,
)
@ -2592,7 +2611,7 @@ fn main() {
Some(snapshot_version),
output_directory.clone(),
output_directory,
ArchiveFormat::TarZstd,
snapshot_archive_format,
maximum_full_snapshot_archives_to_retain,
maximum_incremental_snapshot_archives_to_retain,
)

View File

@ -2088,6 +2088,26 @@ dependencies = [
"hashbrown",
]
[[package]]
name = "lz4"
version = "1.23.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4edcb94251b1c375c459e5abe9fb0168c1c826c3370172684844f8f3f8d1a885"
dependencies = [
"libc",
"lz4-sys",
]
[[package]]
name = "lz4-sys"
version = "1.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7be8908e2ed6f31c02db8a9fa962f03e36c53fbfde437363eae3306b85d7e17"
dependencies = [
"cc",
"libc",
]
[[package]]
name = "maplit"
version = "1.0.2"
@ -5001,6 +5021,7 @@ dependencies = [
"itertools",
"lazy_static",
"log",
"lz4",
"memmap2",
"num-derive",
"num-traits",
@ -5027,6 +5048,8 @@ dependencies = [
"solana-vote-program",
"solana-zk-token-proof-program",
"solana-zk-token-sdk 1.11.0",
"strum",
"strum_macros",
"symlink",
"tar",
"tempfile",
@ -5604,6 +5627,28 @@ version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
[[package]]
name = "strum"
version = "0.24.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e96acfc1b70604b8b2f1ffa4c57e59176c7dbb05d556c71ecd2f5498a1dee7f8"
dependencies = [
"strum_macros",
]
[[package]]
name = "strum_macros"
version = "0.24.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6878079b17446e4d3eba6192bb0a2950d5b14f0ed8424b852310e5a94345d0ef"
dependencies = [
"heck 0.4.0",
"proc-macro2 1.0.38",
"quote 1.0.18",
"rustversion",
"syn 1.0.93",
]
[[package]]
name = "subtle"
version = "2.4.1"

View File

@ -57,6 +57,9 @@ tar = "0.4.38"
tempfile = "3.3.0"
thiserror = "1.0"
zstd = "0.11.2"
lz4 = "1.23.3"
strum_macros = "0.24"
strum = { version = "0.24", features = ["derive"] }
[lib]
crate-type = ["lib"]

View File

@ -370,6 +370,12 @@ pub fn archive_snapshot_package(
do_archive_files(&mut encoder)?;
encoder.finish()?;
}
ArchiveFormat::TarLz4 => {
let mut encoder = lz4::EncoderBuilder::new().level(1).build(archive_file)?;
do_archive_files(&mut encoder)?;
let (_output, result) = encoder.finish();
result?
}
ArchiveFormat::Tar => {
do_archive_files(&mut archive_file)?;
}
@ -401,6 +407,11 @@ pub fn archive_snapshot_package(
datapoint_info!(
"archive-snapshot-package",
("slot", snapshot_package.slot(), i64),
(
"archive_format",
snapshot_package.archive_format().to_string(),
String
),
("duration_ms", timer.as_ms(), i64),
(
if snapshot_package.snapshot_type.is_full_snapshot() {
@ -1489,6 +1500,12 @@ fn untar_snapshot_file(
account_paths,
parallel_divisions,
)?,
ArchiveFormat::TarLz4 => unpack_snapshot_local(
|| lz4::Decoder::new(BufReader::new(open_file())).unwrap(),
unpack_dir,
account_paths,
parallel_divisions,
)?,
ArchiveFormat::Tar => unpack_snapshot_local(
|| BufReader::new(open_file()),
unpack_dir,
@ -1579,6 +1596,12 @@ fn untar_snapshot_mmap(
account_paths,
parallel_divisions,
)?,
ArchiveFormat::TarLz4 => unpack_snapshot_local(
|| lz4::Decoder::new(slice).unwrap(),
unpack_dir,
account_paths,
parallel_divisions,
)?,
ArchiveFormat::Tar => {
unpack_snapshot_local(|| slice, unpack_dir, account_paths, parallel_divisions)?
}

View File

@ -1,16 +1,24 @@
use std::str::FromStr;
use {
std::{fmt, str::FromStr},
strum::Display,
};
pub const SUPPORTED_ARCHIVE_COMPRESSION: &[&str] = &["bz2", "gzip", "zstd", "lz4", "tar", "none"];
pub const DEFAULT_ARCHIVE_COMPRESSION: &str = "zstd";
pub const TAR_BZIP2_EXTENSION: &str = "tar.bz2";
pub const TAR_GZIP_EXTENSION: &str = "tar.gz";
pub const TAR_ZSTD_EXTENSION: &str = "tar.zst";
pub const TAR_LZ4_EXTENSION: &str = "tar.lz4";
pub const TAR_EXTENSION: &str = "tar";
/// The different archive formats used for snapshots
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
#[derive(Copy, Clone, Debug, Eq, PartialEq, Display)]
pub enum ArchiveFormat {
TarBzip2,
TarGzip,
TarZstd,
TarLz4,
Tar,
}
@ -21,9 +29,21 @@ impl ArchiveFormat {
ArchiveFormat::TarBzip2 => TAR_BZIP2_EXTENSION,
ArchiveFormat::TarGzip => TAR_GZIP_EXTENSION,
ArchiveFormat::TarZstd => TAR_ZSTD_EXTENSION,
ArchiveFormat::TarLz4 => TAR_LZ4_EXTENSION,
ArchiveFormat::Tar => TAR_EXTENSION,
}
}
pub fn from_cli_arg(archive_format_str: &str) -> Option<ArchiveFormat> {
match archive_format_str {
"bz2" => Some(ArchiveFormat::TarBzip2),
"gzip" => Some(ArchiveFormat::TarGzip),
"zstd" => Some(ArchiveFormat::TarZstd),
"lz4" => Some(ArchiveFormat::TarLz4),
"tar" | "none" => Some(ArchiveFormat::Tar),
_ => None,
}
}
}
// Change this to `impl<S: AsRef<str>> TryFrom<S> for ArchiveFormat [...]`
@ -36,8 +56,9 @@ impl TryFrom<&str> for ArchiveFormat {
TAR_BZIP2_EXTENSION => Ok(ArchiveFormat::TarBzip2),
TAR_GZIP_EXTENSION => Ok(ArchiveFormat::TarGzip),
TAR_ZSTD_EXTENSION => Ok(ArchiveFormat::TarZstd),
TAR_LZ4_EXTENSION => Ok(ArchiveFormat::TarLz4),
TAR_EXTENSION => Ok(ArchiveFormat::Tar),
_ => Err(ParseError::InvalidExtension),
_ => Err(ParseError::InvalidExtension(extension.to_string())),
}
}
}
@ -50,14 +71,24 @@ impl FromStr for ArchiveFormat {
}
}
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
#[derive(Debug, Clone, Eq, PartialEq)]
pub enum ParseError {
InvalidExtension,
InvalidExtension(String),
}
impl fmt::Display for ParseError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
ParseError::InvalidExtension(extension) => {
write!(f, "Invalid archive extension: {}", extension)
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use {super::*, std::iter::zip};
const INVALID_EXTENSION: &str = "zip";
#[test]
@ -65,6 +96,7 @@ mod tests {
assert_eq!(ArchiveFormat::TarBzip2.extension(), TAR_BZIP2_EXTENSION);
assert_eq!(ArchiveFormat::TarGzip.extension(), TAR_GZIP_EXTENSION);
assert_eq!(ArchiveFormat::TarZstd.extension(), TAR_ZSTD_EXTENSION);
assert_eq!(ArchiveFormat::TarLz4.extension(), TAR_LZ4_EXTENSION);
assert_eq!(ArchiveFormat::Tar.extension(), TAR_EXTENSION);
}
@ -82,13 +114,17 @@ mod tests {
ArchiveFormat::try_from(TAR_ZSTD_EXTENSION),
Ok(ArchiveFormat::TarZstd)
);
assert_eq!(
ArchiveFormat::try_from(TAR_LZ4_EXTENSION),
Ok(ArchiveFormat::TarLz4)
);
assert_eq!(
ArchiveFormat::try_from(TAR_EXTENSION),
Ok(ArchiveFormat::Tar)
);
assert_eq!(
ArchiveFormat::try_from(INVALID_EXTENSION),
Err(ParseError::InvalidExtension)
Err(ParseError::InvalidExtension(INVALID_EXTENSION.to_string()))
);
}
@ -106,13 +142,35 @@ mod tests {
ArchiveFormat::from_str(TAR_ZSTD_EXTENSION),
Ok(ArchiveFormat::TarZstd)
);
assert_eq!(
ArchiveFormat::from_str(TAR_LZ4_EXTENSION),
Ok(ArchiveFormat::TarLz4)
);
assert_eq!(
ArchiveFormat::from_str(TAR_EXTENSION),
Ok(ArchiveFormat::Tar)
);
assert_eq!(
ArchiveFormat::from_str(INVALID_EXTENSION),
Err(ParseError::InvalidExtension)
Err(ParseError::InvalidExtension(INVALID_EXTENSION.to_string()))
);
}
#[test]
fn test_from_cli_arg() {
let golden = [
Some(ArchiveFormat::TarBzip2),
Some(ArchiveFormat::TarGzip),
Some(ArchiveFormat::TarZstd),
Some(ArchiveFormat::TarLz4),
Some(ArchiveFormat::Tar),
Some(ArchiveFormat::Tar),
];
for (arg, expected) in zip(SUPPORTED_ARCHIVE_COMPRESSION.iter(), golden.into_iter()) {
assert_eq!(ArchiveFormat::from_cli_arg(arg), expected);
}
assert_eq!(ArchiveFormat::from_cli_arg("bad"), None);
}
}

View File

@ -54,10 +54,11 @@ use {
runtime_config::RuntimeConfig,
snapshot_config::SnapshotConfig,
snapshot_utils::{
self, ArchiveFormat, SnapshotVersion, DEFAULT_FULL_SNAPSHOT_ARCHIVE_INTERVAL_SLOTS,
self, ArchiveFormat, SnapshotVersion, DEFAULT_ARCHIVE_COMPRESSION,
DEFAULT_FULL_SNAPSHOT_ARCHIVE_INTERVAL_SLOTS,
DEFAULT_INCREMENTAL_SNAPSHOT_ARCHIVE_INTERVAL_SLOTS,
DEFAULT_MAX_FULL_SNAPSHOT_ARCHIVES_TO_RETAIN,
DEFAULT_MAX_INCREMENTAL_SNAPSHOT_ARCHIVES_TO_RETAIN,
DEFAULT_MAX_INCREMENTAL_SNAPSHOT_ARCHIVES_TO_RETAIN, SUPPORTED_ARCHIVE_COMPRESSION,
},
},
solana_sdk::{
@ -1483,8 +1484,8 @@ pub fn main() {
Arg::with_name("snapshot_archive_format")
.long("snapshot-archive-format")
.alias("snapshot-compression") // Legacy name used by Solana v1.5.x and older
.possible_values(&["bz2", "gzip", "zstd", "tar", "none"])
.default_value("zstd")
.possible_values(SUPPORTED_ARCHIVE_COMPRESSION)
.default_value(DEFAULT_ARCHIVE_COMPRESSION)
.value_name("ARCHIVE_TYPE")
.takes_value(true)
.help("Snapshot archive format to use."),
@ -2653,13 +2654,8 @@ pub fn main() {
let archive_format = {
let archive_format_str = value_t_or_exit!(matches, "snapshot_archive_format", String);
match archive_format_str.as_str() {
"bz2" => ArchiveFormat::TarBzip2,
"gzip" => ArchiveFormat::TarGzip,
"zstd" => ArchiveFormat::TarZstd,
"tar" | "none" => ArchiveFormat::Tar,
_ => panic!("Archive format not recognized: {}", archive_format_str),
}
ArchiveFormat::from_cli_arg(&archive_format_str)
.unwrap_or_else(|| panic!("Archive format not recognized: {}", archive_format_str))
};
let snapshot_version =