From 8120d313968d5d006dfaf31e5d5b6fadcf0e0ac6 Mon Sep 17 00:00:00 2001 From: Yueh-Hsuan Chiang <93241502+yhchiang-sol@users.noreply.github.com> Date: Sun, 7 May 2023 13:18:10 -0700 Subject: [PATCH] Implementation of the footer for tiered account storage (#31161) #### Summary of Changes This PR includes the implementation of the footer for the tiered account storage. Tiered account storage proposal: #30551 The prototype implementation of the tiered account storage: #30626. --- Cargo.lock | 1 + programs/sbf/Cargo.lock | 2 + programs/sbf/Cargo.toml | 1 + runtime/Cargo.toml | 2 + runtime/src/lib.rs | 1 + runtime/src/tiered_storage.rs | 3 + runtime/src/tiered_storage/file.rs | 79 ++++++ runtime/src/tiered_storage/footer.rs | 314 +++++++++++++++++++++++ runtime/src/tiered_storage/mmap_utils.rs | 37 +++ 9 files changed, 440 insertions(+) create mode 100644 runtime/src/tiered_storage.rs create mode 100644 runtime/src/tiered_storage/file.rs create mode 100644 runtime/src/tiered_storage/footer.rs create mode 100644 runtime/src/tiered_storage/mmap_utils.rs diff --git a/Cargo.lock b/Cargo.lock index a9e81c7037..ba8d468283 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6696,6 +6696,7 @@ dependencies = [ "num-derive", "num-traits", "num_cpus", + "num_enum 0.6.1", "once_cell", "ouroboros", "percentage", diff --git a/programs/sbf/Cargo.lock b/programs/sbf/Cargo.lock index 12dc628a70..b7c44e77cb 100644 --- a/programs/sbf/Cargo.lock +++ b/programs/sbf/Cargo.lock @@ -5614,6 +5614,7 @@ dependencies = [ "num-derive", "num-traits", "num_cpus", + "num_enum 0.6.1", "once_cell", "ouroboros", "percentage", @@ -5642,6 +5643,7 @@ dependencies = [ "solana-vote-program", "solana-zk-token-proof-program", "solana-zk-token-sdk 1.16.0", + "static_assertions", "strum", "strum_macros", "symlink", diff --git a/programs/sbf/Cargo.toml b/programs/sbf/Cargo.toml index 99c2bae1e5..b8146b76ff 100644 --- a/programs/sbf/Cargo.toml +++ b/programs/sbf/Cargo.toml @@ -48,6 +48,7 @@ solana-sdk = { path = "../../sdk", version = "=1.16.0" } solana-transaction-status = { path = "../../transaction-status", version = "=1.16.0" } solana-validator = { path = "../../validator", version = "=1.16.0" } solana-zk-token-sdk = { path = "../../zk-token-sdk", version = "=1.16.0" } +static_assertions = "1.1.0" thiserror = "1.0" [package] diff --git a/runtime/Cargo.toml b/runtime/Cargo.toml index 06d7d8cf6f..003bf38300 100644 --- a/runtime/Cargo.toml +++ b/runtime/Cargo.toml @@ -34,6 +34,7 @@ modular-bitfield = { workspace = true } num-derive = { workspace = true } num-traits = { workspace = true } num_cpus = { workspace = true } +num_enum = { workspace = true } once_cell = { workspace = true } ouroboros = { workspace = true } percentage = { workspace = true } @@ -61,6 +62,7 @@ solana-system-program = { workspace = true } solana-vote-program = { workspace = true } solana-zk-token-proof-program = { workspace = true } solana-zk-token-sdk = { workspace = true } +static_assertions = { workspace = true } strum = { workspace = true, features = ["derive"] } strum_macros = { workspace = true } symlink = { workspace = true } diff --git a/runtime/src/lib.rs b/runtime/src/lib.rs index 6b8d56714b..27f288ce8e 100644 --- a/runtime/src/lib.rs +++ b/runtime/src/lib.rs @@ -75,6 +75,7 @@ pub mod stakes; pub mod static_ids; pub mod status_cache; mod storable_accounts; +pub mod tiered_storage; pub mod transaction_batch; pub mod transaction_error_metrics; pub mod transaction_priority_details; diff --git a/runtime/src/tiered_storage.rs b/runtime/src/tiered_storage.rs new file mode 100644 index 0000000000..03c96d6ac4 --- /dev/null +++ b/runtime/src/tiered_storage.rs @@ -0,0 +1,3 @@ +pub mod file; +pub mod footer; +pub mod mmap_utils; diff --git a/runtime/src/tiered_storage/file.rs b/runtime/src/tiered_storage/file.rs new file mode 100644 index 0000000000..36d1076c79 --- /dev/null +++ b/runtime/src/tiered_storage/file.rs @@ -0,0 +1,79 @@ +use std::{ + fs::{File, OpenOptions}, + io::{Read, Seek, SeekFrom, Write}, + mem, + path::Path, +}; + +#[derive(Debug)] +pub struct TieredStorageFile(pub File); + +impl TieredStorageFile { + pub fn new_readonly(file_path: impl AsRef) -> Self { + Self( + OpenOptions::new() + .read(true) + .create(false) + .open(&file_path) + .unwrap_or_else(|e| { + panic!( + "[TieredStorageError] Unable to open {:?} as read-only: {:?}", + file_path.as_ref().display(), + e + ); + }), + ) + } + + pub fn new_writable(file_path: impl AsRef) -> Self { + Self( + OpenOptions::new() + .write(true) + .create(true) + .open(&file_path) + .unwrap_or_else(|e| { + panic!( + "[TieredStorageError] Unable to create {:?} as writable: {:?}", + file_path.as_ref().display(), + e, + ); + }), + ) + } + + pub fn write_type(&self, value: &T) -> Result { + let ptr = value as *const _ as *const u8; + let slice = unsafe { std::slice::from_raw_parts(ptr, mem::size_of::()) }; + (&self.0).write_all(slice)?; + + Ok(std::mem::size_of::()) + } + + pub fn read_type(&self, value: &mut T) -> Result<(), std::io::Error> { + let ptr = value as *mut _ as *mut u8; + let slice = unsafe { std::slice::from_raw_parts_mut(ptr, mem::size_of::()) }; + (&self.0).read_exact(slice)?; + + Ok(()) + } + + pub fn seek(&self, offset: u64) -> Result { + (&self.0).seek(SeekFrom::Start(offset)) + } + + pub fn seek_from_end(&self, offset: i64) -> Result { + (&self.0).seek(SeekFrom::End(offset)) + } + + pub fn write_bytes(&self, bytes: &[u8]) -> Result { + (&self.0).write_all(bytes)?; + + Ok(bytes.len()) + } + + pub fn read_bytes(&self, buffer: &mut [u8]) -> Result<(), std::io::Error> { + (&self.0).read_exact(buffer)?; + + Ok(()) + } +} diff --git a/runtime/src/tiered_storage/footer.rs b/runtime/src/tiered_storage/footer.rs new file mode 100644 index 0000000000..7a74c308fc --- /dev/null +++ b/runtime/src/tiered_storage/footer.rs @@ -0,0 +1,314 @@ +use { + crate::tiered_storage::{file::TieredStorageFile, mmap_utils::get_type}, + memmap2::Mmap, + solana_sdk::{hash::Hash, pubkey::Pubkey}, + std::{mem, path::Path}, +}; + +pub const FOOTER_FORMAT_VERSION: u64 = 1; + +/// The size of the footer struct + the magic number at the end. +pub const FOOTER_SIZE: usize = + mem::size_of::() + mem::size_of::(); +static_assertions::const_assert_eq!(mem::size_of::(), 160); + +/// The size of the ending part of the footer. This size should remain unchanged +/// even when the footer's format changes. +pub const FOOTER_TAIL_SIZE: usize = 24; + +/// The ending 8 bytes of a valid tiered account storage file. +pub const FOOTER_MAGIC_NUMBER: u64 = 0x502A2AB5; // SOLALABS -> SOLANA LABS + +#[derive(Debug, PartialEq, Eq)] +#[repr(C)] +pub struct TieredStorageMagicNumber(pub u64); + +impl Default for TieredStorageMagicNumber { + fn default() -> Self { + Self(FOOTER_MAGIC_NUMBER) + } +} + +#[repr(u16)] +#[derive( + Clone, + Copy, + Debug, + Default, + Eq, + Hash, + PartialEq, + num_enum::IntoPrimitive, + num_enum::TryFromPrimitive, +)] +pub enum AccountMetaFormat { + #[default] + Hot = 0, + Cold = 1, +} + +#[repr(u16)] +#[derive( + Clone, + Copy, + Debug, + Default, + Eq, + Hash, + PartialEq, + num_enum::IntoPrimitive, + num_enum::TryFromPrimitive, +)] +pub enum AccountBlockFormat { + #[default] + AlignedRaw = 0, + Lz4 = 1, +} + +#[repr(u16)] +#[derive( + Clone, + Copy, + Debug, + Default, + Eq, + Hash, + PartialEq, + num_enum::IntoPrimitive, + num_enum::TryFromPrimitive, +)] +pub enum OwnersBlockFormat { + #[default] + LocalIndex = 0, +} + +#[repr(u16)] +#[derive( + Clone, + Copy, + Debug, + Default, + Eq, + Hash, + PartialEq, + num_enum::IntoPrimitive, + num_enum::TryFromPrimitive, +)] +pub enum AccountIndexFormat { + // This format does not support any fast lookup. + // Any query from account hash to account meta requires linear search. + #[default] + Linear = 0, + // Similar to index, but this format also stores the offset of each account + // meta in the index block. + LinearIndex = 1, +} + +#[derive(Debug, PartialEq, Eq, Clone)] +#[repr(C)] +pub struct TieredStorageFooter { + // formats + /// The format of the account meta entry. + pub account_meta_format: AccountMetaFormat, + /// The format of the owners block. + pub owners_block_format: OwnersBlockFormat, + /// The format of the account index block. + pub account_index_format: AccountIndexFormat, + /// The format of the account block. + pub account_block_format: AccountBlockFormat, + + // Account-block related + /// The number of account entries. + pub account_entry_count: u32, + /// The size of each account meta entry in bytes. + pub account_meta_entry_size: u32, + /// The default size of an account block before compression. + /// + /// If the size of one account (meta + data + optional fields) before + /// compression is bigger than this number, than it is considered a + /// blob account and it will have its own account block. + pub account_block_size: u64, + + // Owner-related + /// The number of owners. + pub owner_count: u32, + /// The size of each owner entry. + pub owner_entry_size: u32, + + // Offsets + // Note that offset to the account blocks is omitted as it's always 0. + /// The offset pointing to the first byte of the account index block. + pub account_index_offset: u64, + /// The offset pointing to the first byte of the owners block. + pub owners_offset: u64, + + // account range + /// The smallest account address in this file. + pub min_account_address: Pubkey, + /// The largest account address in this file. + pub max_account_address: Pubkey, + + /// A hash that represents a tiered accounts file for consistency check. + pub hash: Hash, + + // The below fields belong to footer tail. + // The sum of their sizes should match FOOTER_TAIL_SIZE. + /// The size of the footer including the magic number. + pub footer_size: u64, + /// The format version of the tiered accounts file. + pub format_version: u64, + // This field is persisted in the storage but not in this struct. + // The number should match FOOTER_MAGIC_NUMBER. + // pub magic_number: u64, +} + +impl Default for TieredStorageFooter { + fn default() -> Self { + Self { + account_meta_format: AccountMetaFormat::default(), + owners_block_format: OwnersBlockFormat::default(), + account_index_format: AccountIndexFormat::default(), + account_block_format: AccountBlockFormat::default(), + account_entry_count: 0, + account_meta_entry_size: 0, + account_block_size: 0, + owner_count: 0, + owner_entry_size: 0, + account_index_offset: 0, + owners_offset: 0, + hash: Hash::new_unique(), + min_account_address: Pubkey::default(), + max_account_address: Pubkey::default(), + footer_size: FOOTER_SIZE as u64, + format_version: FOOTER_FORMAT_VERSION, + } + } +} + +impl TieredStorageFooter { + pub fn new_from_path(path: impl AsRef) -> std::io::Result { + let file = TieredStorageFile::new_readonly(path); + Self::new_from_footer_block(&file) + } + + pub fn write_footer_block(&self, file: &TieredStorageFile) -> std::io::Result<()> { + file.write_type(self)?; + file.write_type(&TieredStorageMagicNumber::default())?; + + Ok(()) + } + + pub fn new_from_footer_block(file: &TieredStorageFile) -> std::io::Result { + let mut footer_size: u64 = 0; + let mut footer_version: u64 = 0; + let mut magic_number = TieredStorageMagicNumber(0); + + file.seek_from_end(-(FOOTER_TAIL_SIZE as i64))?; + file.read_type(&mut footer_size)?; + file.read_type(&mut footer_version)?; + file.read_type(&mut magic_number)?; + + if magic_number != TieredStorageMagicNumber::default() { + return Err(std::io::Error::new( + std::io::ErrorKind::Other, + "TieredStorageError: Magic mumber mismatch", + )); + } + + let mut footer = Self::default(); + file.seek_from_end(-(footer_size as i64))?; + file.read_type(&mut footer)?; + + Ok(footer) + } + + pub fn new_from_mmap(map: &Mmap) -> std::io::Result<&TieredStorageFooter> { + let offset = map.len().saturating_sub(FOOTER_TAIL_SIZE); + let (footer_size, offset) = get_type::(map, offset)?; + let (_footer_version, offset) = get_type::(map, offset)?; + let (magic_number, _offset) = get_type::(map, offset)?; + + if *magic_number != TieredStorageMagicNumber::default() { + return Err(std::io::Error::new( + std::io::ErrorKind::Other, + "TieredStorageError: Magic mumber mismatch", + )); + } + + let (footer, _offset) = + get_type::(map, map.len().saturating_sub(*footer_size as usize))?; + + Ok(footer) + } +} + +#[cfg(test)] +mod tests { + use { + super::*, + crate::{ + append_vec::test_utils::get_append_vec_path, tiered_storage::file::TieredStorageFile, + }, + memoffset::offset_of, + solana_sdk::hash::Hash, + }; + + #[test] + fn test_footer() { + let path = get_append_vec_path("test_file_footer"); + let expected_footer = TieredStorageFooter { + account_meta_format: AccountMetaFormat::Hot, + owners_block_format: OwnersBlockFormat::LocalIndex, + account_index_format: AccountIndexFormat::Linear, + account_block_format: AccountBlockFormat::AlignedRaw, + account_entry_count: 300, + account_meta_entry_size: 24, + account_block_size: 4096, + owner_count: 250, + owner_entry_size: 32, + account_index_offset: 1069600, + owners_offset: 1081200, + hash: Hash::new_unique(), + min_account_address: Pubkey::default(), + max_account_address: Pubkey::new_unique(), + footer_size: FOOTER_SIZE as u64, + format_version: FOOTER_FORMAT_VERSION, + }; + + // Persist the expected footer. + { + let file = TieredStorageFile::new_writable(&path.path); + expected_footer.write_footer_block(&file).unwrap(); + } + + // Reopen the same storage, and expect the persisted footer is + // the same as what we have written. + { + let footer = TieredStorageFooter::new_from_path(&path.path).unwrap(); + assert_eq!(expected_footer, footer); + } + } + + #[test] + fn test_footer_layout() { + assert_eq!(offset_of!(TieredStorageFooter, account_meta_format), 0x00); + assert_eq!(offset_of!(TieredStorageFooter, owners_block_format), 0x02); + assert_eq!(offset_of!(TieredStorageFooter, account_index_format), 0x04); + assert_eq!(offset_of!(TieredStorageFooter, account_block_format), 0x06); + assert_eq!(offset_of!(TieredStorageFooter, account_entry_count), 0x08); + assert_eq!( + offset_of!(TieredStorageFooter, account_meta_entry_size), + 0x0C + ); + assert_eq!(offset_of!(TieredStorageFooter, account_block_size), 0x10); + assert_eq!(offset_of!(TieredStorageFooter, owner_count), 0x18); + assert_eq!(offset_of!(TieredStorageFooter, owner_entry_size), 0x1C); + assert_eq!(offset_of!(TieredStorageFooter, account_index_offset), 0x20); + assert_eq!(offset_of!(TieredStorageFooter, owners_offset), 0x28); + assert_eq!(offset_of!(TieredStorageFooter, min_account_address), 0x30); + assert_eq!(offset_of!(TieredStorageFooter, max_account_address), 0x50); + assert_eq!(offset_of!(TieredStorageFooter, hash), 0x70); + assert_eq!(offset_of!(TieredStorageFooter, footer_size), 0x90); + assert_eq!(offset_of!(TieredStorageFooter, format_version), 0x98); + } +} diff --git a/runtime/src/tiered_storage/mmap_utils.rs b/runtime/src/tiered_storage/mmap_utils.rs new file mode 100644 index 0000000000..5fac10f474 --- /dev/null +++ b/runtime/src/tiered_storage/mmap_utils.rs @@ -0,0 +1,37 @@ +use { + crate::{accounts_file::ALIGN_BOUNDARY_OFFSET, u64_align}, + log::*, + memmap2::Mmap, +}; + +pub fn get_type(map: &Mmap, offset: usize) -> std::io::Result<(&T, usize)> { + let (data, next) = get_slice(map, offset, std::mem::size_of::())?; + let ptr = data.as_ptr() as *const T; + debug_assert!(ptr as usize % std::mem::align_of::() == 0); + Ok((unsafe { &*ptr }, next)) +} + +/// Get a reference to the data at `offset` of `size` bytes if that slice +/// doesn't overrun the internal buffer. Otherwise return an Error. +/// Also return the offset of the first byte after the requested data that +/// falls on a 64-byte boundary. +pub fn get_slice(map: &Mmap, offset: usize, size: usize) -> std::io::Result<(&[u8], usize)> { + let (next, overflow) = offset.overflowing_add(size); + if overflow || next > map.len() { + error!( + "Requested offset {} and size {} while mmap only has length {}", + offset, + size, + map.len() + ); + return Err(std::io::Error::new( + std::io::ErrorKind::AddrNotAvailable, + "Requested offset and data length exceeds the mmap slice", + )); + } + let data = &map[offset..next]; + let next = u64_align!(next); + let ptr = data.as_ptr() as *const u8; + + Ok((unsafe { std::slice::from_raw_parts(ptr, size) }, next)) +}