From af17800bba8d8fd39817b6ad7bbe804cf3b7ceba Mon Sep 17 00:00:00 2001 From: GroovieGermanikus Date: Fri, 7 Jun 2024 14:11:12 +0200 Subject: [PATCH] xor+lz4 --- Cargo.lock | 26 +++++++++++++++++++++ Cargo.toml | 2 ++ examples/bench_geyser_grpc_accounts.rs | 32 ++++++++++++++++++++++++-- 3 files changed, 58 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e906d14..13a96b5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1372,6 +1372,7 @@ dependencies = [ "futures", "itertools 0.10.5", "log", + "lz4_flex", "merge-streams", "regex", "solana-account-decoder", @@ -1846,6 +1847,15 @@ version = "0.4.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" +[[package]] +name = "lz4_flex" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75761162ae2b0e580d7e7c390558127e5f01b4194debd6221fd8c207fc80e3f5" +dependencies = [ + "twox-hash", +] + [[package]] name = "matchit" version = "0.7.3" @@ -3493,6 +3503,12 @@ dependencies = [ "spl-program-error", ] +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "strsim" version = "0.10.0" @@ -3902,6 +3918,16 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" +[[package]] +name = "twox-hash" +version = "1.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" +dependencies = [ + "cfg-if", + "static_assertions", +] + [[package]] name = "typenum" version = "1.17.0" diff --git a/Cargo.toml b/Cargo.toml index 177d7cd..f5699ea 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -38,6 +38,8 @@ tonic-health = "0.10.2" regex = "1.10.4" clap = { version = "4.2", features = ["derive"] } +lz4_flex = "0.11.3" + [dev-dependencies] tracing-subscriber = "0.3.16" solana-logger = "1" diff --git a/examples/bench_geyser_grpc_accounts.rs b/examples/bench_geyser_grpc_accounts.rs index 3b88d78..c82e2d5 100644 --- a/examples/bench_geyser_grpc_accounts.rs +++ b/examples/bench_geyser_grpc_accounts.rs @@ -1,3 +1,4 @@ +use std::cmp::min; use std::collections::{HashMap, VecDeque}; use futures::{Stream, StreamExt}; use log::{debug, info}; @@ -225,10 +226,15 @@ fn start_tracking_account_consumer(mut geyser_messages_rx: Receiver, cu info!("got account update!!! {} - {:?} - {} bytes - {}", slot, account_pk, account_info.data.len(), account_info.lamports); - if let Some(data) = last_account_data { - let hash1 = hash(&data); + if let Some(prev_data) = last_account_data { + let hash1 = hash(&prev_data); let hash2 = hash(&account_info.data); info!("diff: {} {}", hash1, hash2); + + if hash1 != hash2 { + delta_compress(&prev_data, &account_info.data); + } + } last_account_data = Some(account_info.data.clone()); @@ -302,6 +308,28 @@ fn start_tracking_account_consumer(mut geyser_messages_rx: Receiver, cu }); } +fn delta_compress(prev_data: &Vec, data: &Vec) { + + let xor_region = min(prev_data.len(), data.len()); + let mut xor_diff = vec![0u8; xor_region]; + + for i in 0..xor_region { + xor_diff[i] = prev_data[i] ^ data[i]; + } + + // TODO https://users.rust-lang.org/t/how-to-find-common-prefix-of-two-byte-slices-effectively/25815/3 + let count_non_zero = xor_diff.iter().filter(|&x| *x != 0).count(); + info!("count_non_zero={} xor_region={}", count_non_zero, xor_region); + // info!("hex {:02X?}", xor_data); + + let compressed_xor = lz4_flex::compress_prepend_size(&xor_diff); + info!("compressed size of xor: {} (was {})", compressed_xor.len(), xor_diff.len()); + + let compressed_data = lz4_flex::compress_prepend_size(&data); + info!("compressed size of data: {} (was {})", compressed_data.len(), data.len()); + +} + fn get_epoch_sec() -> UnixTimestamp { SystemTime::now() .duration_since(UNIX_EPOCH)