From 3e44046a73d6b1876bf10c71d6051f4e4ce4240c Mon Sep 17 00:00:00 2001 From: HaoranYi Date: Mon, 16 May 2022 12:44:15 -0500 Subject: [PATCH] Support lz4 for snapshot archives (#25089) * add lz4 * add cargo package file * fix tests * use fast lz4 compression * report snapshot archive format * add test * code review feedback * add cargo.lock * fix var name * refactor archive format parsing and add default compress cli arg * clippy * add from_cli_arg test * update cargo.lock * add lz4 support for mmap * cargo.lock * clippy --- Cargo.lock | 45 ++++++++++++ download-utils/src/lib.rs | 1 + ledger-tool/src/main.rs | 29 ++++++-- programs/bpf/Cargo.lock | 45 ++++++++++++ runtime/Cargo.toml | 3 + runtime/src/snapshot_utils.rs | 23 ++++++ runtime/src/snapshot_utils/archive_format.rs | 74 +++++++++++++++++--- validator/src/main.rs | 18 ++--- 8 files changed, 214 insertions(+), 24 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0ec202701..3c592a991 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2358,6 +2358,26 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "lz4" +version = "1.23.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4edcb94251b1c375c459e5abe9fb0168c1c826c3370172684844f8f3f8d1a885" +dependencies = [ + "libc", + "lz4-sys", +] + +[[package]] +name = "lz4-sys" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7be8908e2ed6f31c02db8a9fa962f03e36c53fbfde437363eae3306b85d7e17" +dependencies = [ + "cc", + "libc", +] + [[package]] name = "maplit" version = "1.0.2" @@ -5629,6 +5649,7 @@ dependencies = [ "lazy_static", "libsecp256k1", "log", + "lz4", "memmap2", "num-derive", "num-traits", @@ -5657,6 +5678,8 @@ dependencies = [ "solana-vote-program", "solana-zk-token-proof-program", "solana-zk-token-sdk 1.11.0", + "strum", + "strum_macros", "symlink", "tar", "tempfile", @@ -6352,6 +6375,28 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" +[[package]] +name = "strum" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e96acfc1b70604b8b2f1ffa4c57e59176c7dbb05d556c71ecd2f5498a1dee7f8" +dependencies = [ + "strum_macros", +] + +[[package]] +name = "strum_macros" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6878079b17446e4d3eba6192bb0a2950d5b14f0ed8424b852310e5a94345d0ef" +dependencies = [ + "heck 0.4.0", + "proc-macro2 1.0.38", + "quote 1.0.18", + "rustversion", + "syn 1.0.93", +] + [[package]] name = "subtle" version = "2.4.1" diff --git a/download-utils/src/lib.rs b/download-utils/src/lib.rs index f05f7cf58..f607e42e5 100644 --- a/download-utils/src/lib.rs +++ b/download-utils/src/lib.rs @@ -281,6 +281,7 @@ pub fn download_snapshot_archive<'a, 'b>( ArchiveFormat::TarZstd, ArchiveFormat::TarGzip, ArchiveFormat::TarBzip2, + ArchiveFormat::TarLz4, ArchiveFormat::Tar, // `solana-test-validator` creates uncompressed snapshots ] { let destination_path = match snapshot_type { diff --git a/ledger-tool/src/main.rs b/ledger-tool/src/main.rs index 8881f6c04..33e6a9db4 100644 --- a/ledger-tool/src/main.rs +++ b/ledger-tool/src/main.rs @@ -44,8 +44,9 @@ use { snapshot_config::SnapshotConfig, snapshot_hash::StartingSnapshotHashes, snapshot_utils::{ - self, ArchiveFormat, SnapshotVersion, DEFAULT_MAX_FULL_SNAPSHOT_ARCHIVES_TO_RETAIN, - DEFAULT_MAX_INCREMENTAL_SNAPSHOT_ARCHIVES_TO_RETAIN, + self, ArchiveFormat, SnapshotVersion, DEFAULT_ARCHIVE_COMPRESSION, + DEFAULT_MAX_FULL_SNAPSHOT_ARCHIVES_TO_RETAIN, + DEFAULT_MAX_INCREMENTAL_SNAPSHOT_ARCHIVES_TO_RETAIN, SUPPORTED_ARCHIVE_COMPRESSION, }, }, solana_sdk::{ @@ -1524,7 +1525,17 @@ fn main() { base for the incremental snapshot.") .conflicts_with("no_snapshot") ) - ).subcommand( + .arg( + Arg::with_name("snapshot_archive_format") + .long("snapshot-archive-format") + .possible_values(SUPPORTED_ARCHIVE_COMPRESSION) + .default_value(DEFAULT_ARCHIVE_COMPRESSION) + .value_name("ARCHIVE_TYPE") + .takes_value(true) + .help("Snapshot archive format to use.") + .conflicts_with("no_snapshot") + ) + ).subcommand( SubCommand::with_name("accounts") .about("Print account stats and contents after processing the ledger") .arg(&no_snapshot_arg) @@ -2292,6 +2303,14 @@ fn main() { }, ); + let snapshot_archive_format = { + let archive_format_str = + value_t_or_exit!(matches, "snapshot_archive_format", String); + ArchiveFormat::from_cli_arg(&archive_format_str).unwrap_or_else(|| { + panic!("Archive format not recognized: {}", archive_format_str) + }) + }; + let maximum_full_snapshot_archives_to_retain = value_t_or_exit!(arg_matches, "maximum_full_snapshots_to_retain", usize); let maximum_incremental_snapshot_archives_to_retain = value_t_or_exit!( @@ -2568,7 +2587,7 @@ fn main() { Some(snapshot_version), output_directory.clone(), output_directory, - ArchiveFormat::TarZstd, + snapshot_archive_format, maximum_full_snapshot_archives_to_retain, maximum_incremental_snapshot_archives_to_retain, ) @@ -2592,7 +2611,7 @@ fn main() { Some(snapshot_version), output_directory.clone(), output_directory, - ArchiveFormat::TarZstd, + snapshot_archive_format, maximum_full_snapshot_archives_to_retain, maximum_incremental_snapshot_archives_to_retain, ) diff --git a/programs/bpf/Cargo.lock b/programs/bpf/Cargo.lock index aea6112d4..3935d955a 100644 --- a/programs/bpf/Cargo.lock +++ b/programs/bpf/Cargo.lock @@ -2088,6 +2088,26 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "lz4" +version = "1.23.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4edcb94251b1c375c459e5abe9fb0168c1c826c3370172684844f8f3f8d1a885" +dependencies = [ + "libc", + "lz4-sys", +] + +[[package]] +name = "lz4-sys" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7be8908e2ed6f31c02db8a9fa962f03e36c53fbfde437363eae3306b85d7e17" +dependencies = [ + "cc", + "libc", +] + [[package]] name = "maplit" version = "1.0.2" @@ -5001,6 +5021,7 @@ dependencies = [ "itertools", "lazy_static", "log", + "lz4", "memmap2", "num-derive", "num-traits", @@ -5027,6 +5048,8 @@ dependencies = [ "solana-vote-program", "solana-zk-token-proof-program", "solana-zk-token-sdk 1.11.0", + "strum", + "strum_macros", "symlink", "tar", "tempfile", @@ -5604,6 +5627,28 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" +[[package]] +name = "strum" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e96acfc1b70604b8b2f1ffa4c57e59176c7dbb05d556c71ecd2f5498a1dee7f8" +dependencies = [ + "strum_macros", +] + +[[package]] +name = "strum_macros" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6878079b17446e4d3eba6192bb0a2950d5b14f0ed8424b852310e5a94345d0ef" +dependencies = [ + "heck 0.4.0", + "proc-macro2 1.0.38", + "quote 1.0.18", + "rustversion", + "syn 1.0.93", +] + [[package]] name = "subtle" version = "2.4.1" diff --git a/runtime/Cargo.toml b/runtime/Cargo.toml index 14f0bdb8b..2c71e8355 100644 --- a/runtime/Cargo.toml +++ b/runtime/Cargo.toml @@ -57,6 +57,9 @@ tar = "0.4.38" tempfile = "3.3.0" thiserror = "1.0" zstd = "0.11.2" +lz4 = "1.23.3" +strum_macros = "0.24" +strum = { version = "0.24", features = ["derive"] } [lib] crate-type = ["lib"] diff --git a/runtime/src/snapshot_utils.rs b/runtime/src/snapshot_utils.rs index 45e6a928a..52830e03b 100644 --- a/runtime/src/snapshot_utils.rs +++ b/runtime/src/snapshot_utils.rs @@ -370,6 +370,12 @@ pub fn archive_snapshot_package( do_archive_files(&mut encoder)?; encoder.finish()?; } + ArchiveFormat::TarLz4 => { + let mut encoder = lz4::EncoderBuilder::new().level(1).build(archive_file)?; + do_archive_files(&mut encoder)?; + let (_output, result) = encoder.finish(); + result? + } ArchiveFormat::Tar => { do_archive_files(&mut archive_file)?; } @@ -401,6 +407,11 @@ pub fn archive_snapshot_package( datapoint_info!( "archive-snapshot-package", ("slot", snapshot_package.slot(), i64), + ( + "archive_format", + snapshot_package.archive_format().to_string(), + String + ), ("duration_ms", timer.as_ms(), i64), ( if snapshot_package.snapshot_type.is_full_snapshot() { @@ -1489,6 +1500,12 @@ fn untar_snapshot_file( account_paths, parallel_divisions, )?, + ArchiveFormat::TarLz4 => unpack_snapshot_local( + || lz4::Decoder::new(BufReader::new(open_file())).unwrap(), + unpack_dir, + account_paths, + parallel_divisions, + )?, ArchiveFormat::Tar => unpack_snapshot_local( || BufReader::new(open_file()), unpack_dir, @@ -1579,6 +1596,12 @@ fn untar_snapshot_mmap( account_paths, parallel_divisions, )?, + ArchiveFormat::TarLz4 => unpack_snapshot_local( + || lz4::Decoder::new(slice).unwrap(), + unpack_dir, + account_paths, + parallel_divisions, + )?, ArchiveFormat::Tar => { unpack_snapshot_local(|| slice, unpack_dir, account_paths, parallel_divisions)? } diff --git a/runtime/src/snapshot_utils/archive_format.rs b/runtime/src/snapshot_utils/archive_format.rs index ef0102e47..3151a5a51 100644 --- a/runtime/src/snapshot_utils/archive_format.rs +++ b/runtime/src/snapshot_utils/archive_format.rs @@ -1,16 +1,24 @@ -use std::str::FromStr; +use { + std::{fmt, str::FromStr}, + strum::Display, +}; + +pub const SUPPORTED_ARCHIVE_COMPRESSION: &[&str] = &["bz2", "gzip", "zstd", "lz4", "tar", "none"]; +pub const DEFAULT_ARCHIVE_COMPRESSION: &str = "zstd"; pub const TAR_BZIP2_EXTENSION: &str = "tar.bz2"; pub const TAR_GZIP_EXTENSION: &str = "tar.gz"; pub const TAR_ZSTD_EXTENSION: &str = "tar.zst"; +pub const TAR_LZ4_EXTENSION: &str = "tar.lz4"; pub const TAR_EXTENSION: &str = "tar"; /// The different archive formats used for snapshots -#[derive(Copy, Clone, Debug, Eq, PartialEq)] +#[derive(Copy, Clone, Debug, Eq, PartialEq, Display)] pub enum ArchiveFormat { TarBzip2, TarGzip, TarZstd, + TarLz4, Tar, } @@ -21,9 +29,21 @@ impl ArchiveFormat { ArchiveFormat::TarBzip2 => TAR_BZIP2_EXTENSION, ArchiveFormat::TarGzip => TAR_GZIP_EXTENSION, ArchiveFormat::TarZstd => TAR_ZSTD_EXTENSION, + ArchiveFormat::TarLz4 => TAR_LZ4_EXTENSION, ArchiveFormat::Tar => TAR_EXTENSION, } } + + pub fn from_cli_arg(archive_format_str: &str) -> Option { + match archive_format_str { + "bz2" => Some(ArchiveFormat::TarBzip2), + "gzip" => Some(ArchiveFormat::TarGzip), + "zstd" => Some(ArchiveFormat::TarZstd), + "lz4" => Some(ArchiveFormat::TarLz4), + "tar" | "none" => Some(ArchiveFormat::Tar), + _ => None, + } + } } // Change this to `impl> TryFrom for ArchiveFormat [...]` @@ -36,8 +56,9 @@ impl TryFrom<&str> for ArchiveFormat { TAR_BZIP2_EXTENSION => Ok(ArchiveFormat::TarBzip2), TAR_GZIP_EXTENSION => Ok(ArchiveFormat::TarGzip), TAR_ZSTD_EXTENSION => Ok(ArchiveFormat::TarZstd), + TAR_LZ4_EXTENSION => Ok(ArchiveFormat::TarLz4), TAR_EXTENSION => Ok(ArchiveFormat::Tar), - _ => Err(ParseError::InvalidExtension), + _ => Err(ParseError::InvalidExtension(extension.to_string())), } } } @@ -50,14 +71,24 @@ impl FromStr for ArchiveFormat { } } -#[derive(Debug, Copy, Clone, Eq, PartialEq)] +#[derive(Debug, Clone, Eq, PartialEq)] pub enum ParseError { - InvalidExtension, + InvalidExtension(String), +} + +impl fmt::Display for ParseError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + ParseError::InvalidExtension(extension) => { + write!(f, "Invalid archive extension: {}", extension) + } + } + } } #[cfg(test)] mod tests { - use super::*; + use {super::*, std::iter::zip}; const INVALID_EXTENSION: &str = "zip"; #[test] @@ -65,6 +96,7 @@ mod tests { assert_eq!(ArchiveFormat::TarBzip2.extension(), TAR_BZIP2_EXTENSION); assert_eq!(ArchiveFormat::TarGzip.extension(), TAR_GZIP_EXTENSION); assert_eq!(ArchiveFormat::TarZstd.extension(), TAR_ZSTD_EXTENSION); + assert_eq!(ArchiveFormat::TarLz4.extension(), TAR_LZ4_EXTENSION); assert_eq!(ArchiveFormat::Tar.extension(), TAR_EXTENSION); } @@ -82,13 +114,17 @@ mod tests { ArchiveFormat::try_from(TAR_ZSTD_EXTENSION), Ok(ArchiveFormat::TarZstd) ); + assert_eq!( + ArchiveFormat::try_from(TAR_LZ4_EXTENSION), + Ok(ArchiveFormat::TarLz4) + ); assert_eq!( ArchiveFormat::try_from(TAR_EXTENSION), Ok(ArchiveFormat::Tar) ); assert_eq!( ArchiveFormat::try_from(INVALID_EXTENSION), - Err(ParseError::InvalidExtension) + Err(ParseError::InvalidExtension(INVALID_EXTENSION.to_string())) ); } @@ -106,13 +142,35 @@ mod tests { ArchiveFormat::from_str(TAR_ZSTD_EXTENSION), Ok(ArchiveFormat::TarZstd) ); + assert_eq!( + ArchiveFormat::from_str(TAR_LZ4_EXTENSION), + Ok(ArchiveFormat::TarLz4) + ); assert_eq!( ArchiveFormat::from_str(TAR_EXTENSION), Ok(ArchiveFormat::Tar) ); assert_eq!( ArchiveFormat::from_str(INVALID_EXTENSION), - Err(ParseError::InvalidExtension) + Err(ParseError::InvalidExtension(INVALID_EXTENSION.to_string())) ); } + + #[test] + fn test_from_cli_arg() { + let golden = [ + Some(ArchiveFormat::TarBzip2), + Some(ArchiveFormat::TarGzip), + Some(ArchiveFormat::TarZstd), + Some(ArchiveFormat::TarLz4), + Some(ArchiveFormat::Tar), + Some(ArchiveFormat::Tar), + ]; + + for (arg, expected) in zip(SUPPORTED_ARCHIVE_COMPRESSION.iter(), golden.into_iter()) { + assert_eq!(ArchiveFormat::from_cli_arg(arg), expected); + } + + assert_eq!(ArchiveFormat::from_cli_arg("bad"), None); + } } diff --git a/validator/src/main.rs b/validator/src/main.rs index 8c1f8ac25..4eef70a4c 100644 --- a/validator/src/main.rs +++ b/validator/src/main.rs @@ -54,10 +54,11 @@ use { runtime_config::RuntimeConfig, snapshot_config::SnapshotConfig, snapshot_utils::{ - self, ArchiveFormat, SnapshotVersion, DEFAULT_FULL_SNAPSHOT_ARCHIVE_INTERVAL_SLOTS, + self, ArchiveFormat, SnapshotVersion, DEFAULT_ARCHIVE_COMPRESSION, + DEFAULT_FULL_SNAPSHOT_ARCHIVE_INTERVAL_SLOTS, DEFAULT_INCREMENTAL_SNAPSHOT_ARCHIVE_INTERVAL_SLOTS, DEFAULT_MAX_FULL_SNAPSHOT_ARCHIVES_TO_RETAIN, - DEFAULT_MAX_INCREMENTAL_SNAPSHOT_ARCHIVES_TO_RETAIN, + DEFAULT_MAX_INCREMENTAL_SNAPSHOT_ARCHIVES_TO_RETAIN, SUPPORTED_ARCHIVE_COMPRESSION, }, }, solana_sdk::{ @@ -1483,8 +1484,8 @@ pub fn main() { Arg::with_name("snapshot_archive_format") .long("snapshot-archive-format") .alias("snapshot-compression") // Legacy name used by Solana v1.5.x and older - .possible_values(&["bz2", "gzip", "zstd", "tar", "none"]) - .default_value("zstd") + .possible_values(SUPPORTED_ARCHIVE_COMPRESSION) + .default_value(DEFAULT_ARCHIVE_COMPRESSION) .value_name("ARCHIVE_TYPE") .takes_value(true) .help("Snapshot archive format to use."), @@ -2653,13 +2654,8 @@ pub fn main() { let archive_format = { let archive_format_str = value_t_or_exit!(matches, "snapshot_archive_format", String); - match archive_format_str.as_str() { - "bz2" => ArchiveFormat::TarBzip2, - "gzip" => ArchiveFormat::TarGzip, - "zstd" => ArchiveFormat::TarZstd, - "tar" | "none" => ArchiveFormat::Tar, - _ => panic!("Archive format not recognized: {}", archive_format_str), - } + ArchiveFormat::from_cli_arg(&archive_format_str) + .unwrap_or_else(|| panic!("Archive format not recognized: {}", archive_format_str)) }; let snapshot_version =