Implementation of the footer for tiered account storage (#31161)
#### Summary of Changes This PR includes the implementation of the footer for the tiered account storage. Tiered account storage proposal: #30551 The prototype implementation of the tiered account storage: #30626.
This commit is contained in:
parent
1f91a90a53
commit
8120d31396
|
@ -6696,6 +6696,7 @@ dependencies = [
|
|||
"num-derive",
|
||||
"num-traits",
|
||||
"num_cpus",
|
||||
"num_enum 0.6.1",
|
||||
"once_cell",
|
||||
"ouroboros",
|
||||
"percentage",
|
||||
|
|
|
@ -5614,6 +5614,7 @@ dependencies = [
|
|||
"num-derive",
|
||||
"num-traits",
|
||||
"num_cpus",
|
||||
"num_enum 0.6.1",
|
||||
"once_cell",
|
||||
"ouroboros",
|
||||
"percentage",
|
||||
|
@ -5642,6 +5643,7 @@ dependencies = [
|
|||
"solana-vote-program",
|
||||
"solana-zk-token-proof-program",
|
||||
"solana-zk-token-sdk 1.16.0",
|
||||
"static_assertions",
|
||||
"strum",
|
||||
"strum_macros",
|
||||
"symlink",
|
||||
|
|
|
@ -48,6 +48,7 @@ solana-sdk = { path = "../../sdk", version = "=1.16.0" }
|
|||
solana-transaction-status = { path = "../../transaction-status", version = "=1.16.0" }
|
||||
solana-validator = { path = "../../validator", version = "=1.16.0" }
|
||||
solana-zk-token-sdk = { path = "../../zk-token-sdk", version = "=1.16.0" }
|
||||
static_assertions = "1.1.0"
|
||||
thiserror = "1.0"
|
||||
|
||||
[package]
|
||||
|
|
|
@ -34,6 +34,7 @@ modular-bitfield = { workspace = true }
|
|||
num-derive = { workspace = true }
|
||||
num-traits = { workspace = true }
|
||||
num_cpus = { workspace = true }
|
||||
num_enum = { workspace = true }
|
||||
once_cell = { workspace = true }
|
||||
ouroboros = { workspace = true }
|
||||
percentage = { workspace = true }
|
||||
|
@ -61,6 +62,7 @@ solana-system-program = { workspace = true }
|
|||
solana-vote-program = { workspace = true }
|
||||
solana-zk-token-proof-program = { workspace = true }
|
||||
solana-zk-token-sdk = { workspace = true }
|
||||
static_assertions = { workspace = true }
|
||||
strum = { workspace = true, features = ["derive"] }
|
||||
strum_macros = { workspace = true }
|
||||
symlink = { workspace = true }
|
||||
|
|
|
@ -75,6 +75,7 @@ pub mod stakes;
|
|||
pub mod static_ids;
|
||||
pub mod status_cache;
|
||||
mod storable_accounts;
|
||||
pub mod tiered_storage;
|
||||
pub mod transaction_batch;
|
||||
pub mod transaction_error_metrics;
|
||||
pub mod transaction_priority_details;
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
pub mod file;
|
||||
pub mod footer;
|
||||
pub mod mmap_utils;
|
|
@ -0,0 +1,79 @@
|
|||
use std::{
|
||||
fs::{File, OpenOptions},
|
||||
io::{Read, Seek, SeekFrom, Write},
|
||||
mem,
|
||||
path::Path,
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct TieredStorageFile(pub File);
|
||||
|
||||
impl TieredStorageFile {
|
||||
pub fn new_readonly(file_path: impl AsRef<Path>) -> Self {
|
||||
Self(
|
||||
OpenOptions::new()
|
||||
.read(true)
|
||||
.create(false)
|
||||
.open(&file_path)
|
||||
.unwrap_or_else(|e| {
|
||||
panic!(
|
||||
"[TieredStorageError] Unable to open {:?} as read-only: {:?}",
|
||||
file_path.as_ref().display(),
|
||||
e
|
||||
);
|
||||
}),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn new_writable(file_path: impl AsRef<Path>) -> Self {
|
||||
Self(
|
||||
OpenOptions::new()
|
||||
.write(true)
|
||||
.create(true)
|
||||
.open(&file_path)
|
||||
.unwrap_or_else(|e| {
|
||||
panic!(
|
||||
"[TieredStorageError] Unable to create {:?} as writable: {:?}",
|
||||
file_path.as_ref().display(),
|
||||
e,
|
||||
);
|
||||
}),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn write_type<T>(&self, value: &T) -> Result<usize, std::io::Error> {
|
||||
let ptr = value as *const _ as *const u8;
|
||||
let slice = unsafe { std::slice::from_raw_parts(ptr, mem::size_of::<T>()) };
|
||||
(&self.0).write_all(slice)?;
|
||||
|
||||
Ok(std::mem::size_of::<T>())
|
||||
}
|
||||
|
||||
pub fn read_type<T>(&self, value: &mut T) -> Result<(), std::io::Error> {
|
||||
let ptr = value as *mut _ as *mut u8;
|
||||
let slice = unsafe { std::slice::from_raw_parts_mut(ptr, mem::size_of::<T>()) };
|
||||
(&self.0).read_exact(slice)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn seek(&self, offset: u64) -> Result<u64, std::io::Error> {
|
||||
(&self.0).seek(SeekFrom::Start(offset))
|
||||
}
|
||||
|
||||
pub fn seek_from_end(&self, offset: i64) -> Result<u64, std::io::Error> {
|
||||
(&self.0).seek(SeekFrom::End(offset))
|
||||
}
|
||||
|
||||
pub fn write_bytes(&self, bytes: &[u8]) -> Result<usize, std::io::Error> {
|
||||
(&self.0).write_all(bytes)?;
|
||||
|
||||
Ok(bytes.len())
|
||||
}
|
||||
|
||||
pub fn read_bytes(&self, buffer: &mut [u8]) -> Result<(), std::io::Error> {
|
||||
(&self.0).read_exact(buffer)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
|
@ -0,0 +1,314 @@
|
|||
use {
|
||||
crate::tiered_storage::{file::TieredStorageFile, mmap_utils::get_type},
|
||||
memmap2::Mmap,
|
||||
solana_sdk::{hash::Hash, pubkey::Pubkey},
|
||||
std::{mem, path::Path},
|
||||
};
|
||||
|
||||
pub const FOOTER_FORMAT_VERSION: u64 = 1;
|
||||
|
||||
/// The size of the footer struct + the magic number at the end.
|
||||
pub const FOOTER_SIZE: usize =
|
||||
mem::size_of::<TieredStorageFooter>() + mem::size_of::<TieredStorageMagicNumber>();
|
||||
static_assertions::const_assert_eq!(mem::size_of::<TieredStorageFooter>(), 160);
|
||||
|
||||
/// The size of the ending part of the footer. This size should remain unchanged
|
||||
/// even when the footer's format changes.
|
||||
pub const FOOTER_TAIL_SIZE: usize = 24;
|
||||
|
||||
/// The ending 8 bytes of a valid tiered account storage file.
|
||||
pub const FOOTER_MAGIC_NUMBER: u64 = 0x502A2AB5; // SOLALABS -> SOLANA LABS
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
#[repr(C)]
|
||||
pub struct TieredStorageMagicNumber(pub u64);
|
||||
|
||||
impl Default for TieredStorageMagicNumber {
|
||||
fn default() -> Self {
|
||||
Self(FOOTER_MAGIC_NUMBER)
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(u16)]
|
||||
#[derive(
|
||||
Clone,
|
||||
Copy,
|
||||
Debug,
|
||||
Default,
|
||||
Eq,
|
||||
Hash,
|
||||
PartialEq,
|
||||
num_enum::IntoPrimitive,
|
||||
num_enum::TryFromPrimitive,
|
||||
)]
|
||||
pub enum AccountMetaFormat {
|
||||
#[default]
|
||||
Hot = 0,
|
||||
Cold = 1,
|
||||
}
|
||||
|
||||
#[repr(u16)]
|
||||
#[derive(
|
||||
Clone,
|
||||
Copy,
|
||||
Debug,
|
||||
Default,
|
||||
Eq,
|
||||
Hash,
|
||||
PartialEq,
|
||||
num_enum::IntoPrimitive,
|
||||
num_enum::TryFromPrimitive,
|
||||
)]
|
||||
pub enum AccountBlockFormat {
|
||||
#[default]
|
||||
AlignedRaw = 0,
|
||||
Lz4 = 1,
|
||||
}
|
||||
|
||||
#[repr(u16)]
|
||||
#[derive(
|
||||
Clone,
|
||||
Copy,
|
||||
Debug,
|
||||
Default,
|
||||
Eq,
|
||||
Hash,
|
||||
PartialEq,
|
||||
num_enum::IntoPrimitive,
|
||||
num_enum::TryFromPrimitive,
|
||||
)]
|
||||
pub enum OwnersBlockFormat {
|
||||
#[default]
|
||||
LocalIndex = 0,
|
||||
}
|
||||
|
||||
#[repr(u16)]
|
||||
#[derive(
|
||||
Clone,
|
||||
Copy,
|
||||
Debug,
|
||||
Default,
|
||||
Eq,
|
||||
Hash,
|
||||
PartialEq,
|
||||
num_enum::IntoPrimitive,
|
||||
num_enum::TryFromPrimitive,
|
||||
)]
|
||||
pub enum AccountIndexFormat {
|
||||
// This format does not support any fast lookup.
|
||||
// Any query from account hash to account meta requires linear search.
|
||||
#[default]
|
||||
Linear = 0,
|
||||
// Similar to index, but this format also stores the offset of each account
|
||||
// meta in the index block.
|
||||
LinearIndex = 1,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone)]
|
||||
#[repr(C)]
|
||||
pub struct TieredStorageFooter {
|
||||
// formats
|
||||
/// The format of the account meta entry.
|
||||
pub account_meta_format: AccountMetaFormat,
|
||||
/// The format of the owners block.
|
||||
pub owners_block_format: OwnersBlockFormat,
|
||||
/// The format of the account index block.
|
||||
pub account_index_format: AccountIndexFormat,
|
||||
/// The format of the account block.
|
||||
pub account_block_format: AccountBlockFormat,
|
||||
|
||||
// Account-block related
|
||||
/// The number of account entries.
|
||||
pub account_entry_count: u32,
|
||||
/// The size of each account meta entry in bytes.
|
||||
pub account_meta_entry_size: u32,
|
||||
/// The default size of an account block before compression.
|
||||
///
|
||||
/// If the size of one account (meta + data + optional fields) before
|
||||
/// compression is bigger than this number, than it is considered a
|
||||
/// blob account and it will have its own account block.
|
||||
pub account_block_size: u64,
|
||||
|
||||
// Owner-related
|
||||
/// The number of owners.
|
||||
pub owner_count: u32,
|
||||
/// The size of each owner entry.
|
||||
pub owner_entry_size: u32,
|
||||
|
||||
// Offsets
|
||||
// Note that offset to the account blocks is omitted as it's always 0.
|
||||
/// The offset pointing to the first byte of the account index block.
|
||||
pub account_index_offset: u64,
|
||||
/// The offset pointing to the first byte of the owners block.
|
||||
pub owners_offset: u64,
|
||||
|
||||
// account range
|
||||
/// The smallest account address in this file.
|
||||
pub min_account_address: Pubkey,
|
||||
/// The largest account address in this file.
|
||||
pub max_account_address: Pubkey,
|
||||
|
||||
/// A hash that represents a tiered accounts file for consistency check.
|
||||
pub hash: Hash,
|
||||
|
||||
// The below fields belong to footer tail.
|
||||
// The sum of their sizes should match FOOTER_TAIL_SIZE.
|
||||
/// The size of the footer including the magic number.
|
||||
pub footer_size: u64,
|
||||
/// The format version of the tiered accounts file.
|
||||
pub format_version: u64,
|
||||
// This field is persisted in the storage but not in this struct.
|
||||
// The number should match FOOTER_MAGIC_NUMBER.
|
||||
// pub magic_number: u64,
|
||||
}
|
||||
|
||||
impl Default for TieredStorageFooter {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
account_meta_format: AccountMetaFormat::default(),
|
||||
owners_block_format: OwnersBlockFormat::default(),
|
||||
account_index_format: AccountIndexFormat::default(),
|
||||
account_block_format: AccountBlockFormat::default(),
|
||||
account_entry_count: 0,
|
||||
account_meta_entry_size: 0,
|
||||
account_block_size: 0,
|
||||
owner_count: 0,
|
||||
owner_entry_size: 0,
|
||||
account_index_offset: 0,
|
||||
owners_offset: 0,
|
||||
hash: Hash::new_unique(),
|
||||
min_account_address: Pubkey::default(),
|
||||
max_account_address: Pubkey::default(),
|
||||
footer_size: FOOTER_SIZE as u64,
|
||||
format_version: FOOTER_FORMAT_VERSION,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TieredStorageFooter {
|
||||
pub fn new_from_path(path: impl AsRef<Path>) -> std::io::Result<Self> {
|
||||
let file = TieredStorageFile::new_readonly(path);
|
||||
Self::new_from_footer_block(&file)
|
||||
}
|
||||
|
||||
pub fn write_footer_block(&self, file: &TieredStorageFile) -> std::io::Result<()> {
|
||||
file.write_type(self)?;
|
||||
file.write_type(&TieredStorageMagicNumber::default())?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn new_from_footer_block(file: &TieredStorageFile) -> std::io::Result<Self> {
|
||||
let mut footer_size: u64 = 0;
|
||||
let mut footer_version: u64 = 0;
|
||||
let mut magic_number = TieredStorageMagicNumber(0);
|
||||
|
||||
file.seek_from_end(-(FOOTER_TAIL_SIZE as i64))?;
|
||||
file.read_type(&mut footer_size)?;
|
||||
file.read_type(&mut footer_version)?;
|
||||
file.read_type(&mut magic_number)?;
|
||||
|
||||
if magic_number != TieredStorageMagicNumber::default() {
|
||||
return Err(std::io::Error::new(
|
||||
std::io::ErrorKind::Other,
|
||||
"TieredStorageError: Magic mumber mismatch",
|
||||
));
|
||||
}
|
||||
|
||||
let mut footer = Self::default();
|
||||
file.seek_from_end(-(footer_size as i64))?;
|
||||
file.read_type(&mut footer)?;
|
||||
|
||||
Ok(footer)
|
||||
}
|
||||
|
||||
pub fn new_from_mmap(map: &Mmap) -> std::io::Result<&TieredStorageFooter> {
|
||||
let offset = map.len().saturating_sub(FOOTER_TAIL_SIZE);
|
||||
let (footer_size, offset) = get_type::<u64>(map, offset)?;
|
||||
let (_footer_version, offset) = get_type::<u64>(map, offset)?;
|
||||
let (magic_number, _offset) = get_type::<TieredStorageMagicNumber>(map, offset)?;
|
||||
|
||||
if *magic_number != TieredStorageMagicNumber::default() {
|
||||
return Err(std::io::Error::new(
|
||||
std::io::ErrorKind::Other,
|
||||
"TieredStorageError: Magic mumber mismatch",
|
||||
));
|
||||
}
|
||||
|
||||
let (footer, _offset) =
|
||||
get_type::<TieredStorageFooter>(map, map.len().saturating_sub(*footer_size as usize))?;
|
||||
|
||||
Ok(footer)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use {
|
||||
super::*,
|
||||
crate::{
|
||||
append_vec::test_utils::get_append_vec_path, tiered_storage::file::TieredStorageFile,
|
||||
},
|
||||
memoffset::offset_of,
|
||||
solana_sdk::hash::Hash,
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn test_footer() {
|
||||
let path = get_append_vec_path("test_file_footer");
|
||||
let expected_footer = TieredStorageFooter {
|
||||
account_meta_format: AccountMetaFormat::Hot,
|
||||
owners_block_format: OwnersBlockFormat::LocalIndex,
|
||||
account_index_format: AccountIndexFormat::Linear,
|
||||
account_block_format: AccountBlockFormat::AlignedRaw,
|
||||
account_entry_count: 300,
|
||||
account_meta_entry_size: 24,
|
||||
account_block_size: 4096,
|
||||
owner_count: 250,
|
||||
owner_entry_size: 32,
|
||||
account_index_offset: 1069600,
|
||||
owners_offset: 1081200,
|
||||
hash: Hash::new_unique(),
|
||||
min_account_address: Pubkey::default(),
|
||||
max_account_address: Pubkey::new_unique(),
|
||||
footer_size: FOOTER_SIZE as u64,
|
||||
format_version: FOOTER_FORMAT_VERSION,
|
||||
};
|
||||
|
||||
// Persist the expected footer.
|
||||
{
|
||||
let file = TieredStorageFile::new_writable(&path.path);
|
||||
expected_footer.write_footer_block(&file).unwrap();
|
||||
}
|
||||
|
||||
// Reopen the same storage, and expect the persisted footer is
|
||||
// the same as what we have written.
|
||||
{
|
||||
let footer = TieredStorageFooter::new_from_path(&path.path).unwrap();
|
||||
assert_eq!(expected_footer, footer);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_footer_layout() {
|
||||
assert_eq!(offset_of!(TieredStorageFooter, account_meta_format), 0x00);
|
||||
assert_eq!(offset_of!(TieredStorageFooter, owners_block_format), 0x02);
|
||||
assert_eq!(offset_of!(TieredStorageFooter, account_index_format), 0x04);
|
||||
assert_eq!(offset_of!(TieredStorageFooter, account_block_format), 0x06);
|
||||
assert_eq!(offset_of!(TieredStorageFooter, account_entry_count), 0x08);
|
||||
assert_eq!(
|
||||
offset_of!(TieredStorageFooter, account_meta_entry_size),
|
||||
0x0C
|
||||
);
|
||||
assert_eq!(offset_of!(TieredStorageFooter, account_block_size), 0x10);
|
||||
assert_eq!(offset_of!(TieredStorageFooter, owner_count), 0x18);
|
||||
assert_eq!(offset_of!(TieredStorageFooter, owner_entry_size), 0x1C);
|
||||
assert_eq!(offset_of!(TieredStorageFooter, account_index_offset), 0x20);
|
||||
assert_eq!(offset_of!(TieredStorageFooter, owners_offset), 0x28);
|
||||
assert_eq!(offset_of!(TieredStorageFooter, min_account_address), 0x30);
|
||||
assert_eq!(offset_of!(TieredStorageFooter, max_account_address), 0x50);
|
||||
assert_eq!(offset_of!(TieredStorageFooter, hash), 0x70);
|
||||
assert_eq!(offset_of!(TieredStorageFooter, footer_size), 0x90);
|
||||
assert_eq!(offset_of!(TieredStorageFooter, format_version), 0x98);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
use {
|
||||
crate::{accounts_file::ALIGN_BOUNDARY_OFFSET, u64_align},
|
||||
log::*,
|
||||
memmap2::Mmap,
|
||||
};
|
||||
|
||||
pub fn get_type<T>(map: &Mmap, offset: usize) -> std::io::Result<(&T, usize)> {
|
||||
let (data, next) = get_slice(map, offset, std::mem::size_of::<T>())?;
|
||||
let ptr = data.as_ptr() as *const T;
|
||||
debug_assert!(ptr as usize % std::mem::align_of::<T>() == 0);
|
||||
Ok((unsafe { &*ptr }, next))
|
||||
}
|
||||
|
||||
/// Get a reference to the data at `offset` of `size` bytes if that slice
|
||||
/// doesn't overrun the internal buffer. Otherwise return an Error.
|
||||
/// Also return the offset of the first byte after the requested data that
|
||||
/// falls on a 64-byte boundary.
|
||||
pub fn get_slice(map: &Mmap, offset: usize, size: usize) -> std::io::Result<(&[u8], usize)> {
|
||||
let (next, overflow) = offset.overflowing_add(size);
|
||||
if overflow || next > map.len() {
|
||||
error!(
|
||||
"Requested offset {} and size {} while mmap only has length {}",
|
||||
offset,
|
||||
size,
|
||||
map.len()
|
||||
);
|
||||
return Err(std::io::Error::new(
|
||||
std::io::ErrorKind::AddrNotAvailable,
|
||||
"Requested offset and data length exceeds the mmap slice",
|
||||
));
|
||||
}
|
||||
let data = &map[offset..next];
|
||||
let next = u64_align!(next);
|
||||
let ptr = data.as_ptr() as *const u8;
|
||||
|
||||
Ok((unsafe { std::slice::from_raw_parts(ptr, size) }, next))
|
||||
}
|
Loading…
Reference in New Issue