Implementation of the footer for tiered account storage (#31161)

#### Summary of Changes
This PR includes the implementation of the footer for the tiered account storage.

Tiered account storage proposal: #30551
The prototype implementation of the tiered account storage: #30626.
This commit is contained in:
Yueh-Hsuan Chiang 2023-05-07 13:18:10 -07:00 committed by GitHub
parent 1f91a90a53
commit 8120d31396
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 440 additions and 0 deletions

1
Cargo.lock generated
View File

@ -6696,6 +6696,7 @@ dependencies = [
"num-derive",
"num-traits",
"num_cpus",
"num_enum 0.6.1",
"once_cell",
"ouroboros",
"percentage",

View File

@ -5614,6 +5614,7 @@ dependencies = [
"num-derive",
"num-traits",
"num_cpus",
"num_enum 0.6.1",
"once_cell",
"ouroboros",
"percentage",
@ -5642,6 +5643,7 @@ dependencies = [
"solana-vote-program",
"solana-zk-token-proof-program",
"solana-zk-token-sdk 1.16.0",
"static_assertions",
"strum",
"strum_macros",
"symlink",

View File

@ -48,6 +48,7 @@ solana-sdk = { path = "../../sdk", version = "=1.16.0" }
solana-transaction-status = { path = "../../transaction-status", version = "=1.16.0" }
solana-validator = { path = "../../validator", version = "=1.16.0" }
solana-zk-token-sdk = { path = "../../zk-token-sdk", version = "=1.16.0" }
static_assertions = "1.1.0"
thiserror = "1.0"
[package]

View File

@ -34,6 +34,7 @@ modular-bitfield = { workspace = true }
num-derive = { workspace = true }
num-traits = { workspace = true }
num_cpus = { workspace = true }
num_enum = { workspace = true }
once_cell = { workspace = true }
ouroboros = { workspace = true }
percentage = { workspace = true }
@ -61,6 +62,7 @@ solana-system-program = { workspace = true }
solana-vote-program = { workspace = true }
solana-zk-token-proof-program = { workspace = true }
solana-zk-token-sdk = { workspace = true }
static_assertions = { workspace = true }
strum = { workspace = true, features = ["derive"] }
strum_macros = { workspace = true }
symlink = { workspace = true }

View File

@ -75,6 +75,7 @@ pub mod stakes;
pub mod static_ids;
pub mod status_cache;
mod storable_accounts;
pub mod tiered_storage;
pub mod transaction_batch;
pub mod transaction_error_metrics;
pub mod transaction_priority_details;

View File

@ -0,0 +1,3 @@
pub mod file;
pub mod footer;
pub mod mmap_utils;

View File

@ -0,0 +1,79 @@
use std::{
fs::{File, OpenOptions},
io::{Read, Seek, SeekFrom, Write},
mem,
path::Path,
};
#[derive(Debug)]
pub struct TieredStorageFile(pub File);
impl TieredStorageFile {
pub fn new_readonly(file_path: impl AsRef<Path>) -> Self {
Self(
OpenOptions::new()
.read(true)
.create(false)
.open(&file_path)
.unwrap_or_else(|e| {
panic!(
"[TieredStorageError] Unable to open {:?} as read-only: {:?}",
file_path.as_ref().display(),
e
);
}),
)
}
pub fn new_writable(file_path: impl AsRef<Path>) -> Self {
Self(
OpenOptions::new()
.write(true)
.create(true)
.open(&file_path)
.unwrap_or_else(|e| {
panic!(
"[TieredStorageError] Unable to create {:?} as writable: {:?}",
file_path.as_ref().display(),
e,
);
}),
)
}
pub fn write_type<T>(&self, value: &T) -> Result<usize, std::io::Error> {
let ptr = value as *const _ as *const u8;
let slice = unsafe { std::slice::from_raw_parts(ptr, mem::size_of::<T>()) };
(&self.0).write_all(slice)?;
Ok(std::mem::size_of::<T>())
}
pub fn read_type<T>(&self, value: &mut T) -> Result<(), std::io::Error> {
let ptr = value as *mut _ as *mut u8;
let slice = unsafe { std::slice::from_raw_parts_mut(ptr, mem::size_of::<T>()) };
(&self.0).read_exact(slice)?;
Ok(())
}
pub fn seek(&self, offset: u64) -> Result<u64, std::io::Error> {
(&self.0).seek(SeekFrom::Start(offset))
}
pub fn seek_from_end(&self, offset: i64) -> Result<u64, std::io::Error> {
(&self.0).seek(SeekFrom::End(offset))
}
pub fn write_bytes(&self, bytes: &[u8]) -> Result<usize, std::io::Error> {
(&self.0).write_all(bytes)?;
Ok(bytes.len())
}
pub fn read_bytes(&self, buffer: &mut [u8]) -> Result<(), std::io::Error> {
(&self.0).read_exact(buffer)?;
Ok(())
}
}

View File

@ -0,0 +1,314 @@
use {
crate::tiered_storage::{file::TieredStorageFile, mmap_utils::get_type},
memmap2::Mmap,
solana_sdk::{hash::Hash, pubkey::Pubkey},
std::{mem, path::Path},
};
pub const FOOTER_FORMAT_VERSION: u64 = 1;
/// The size of the footer struct + the magic number at the end.
pub const FOOTER_SIZE: usize =
mem::size_of::<TieredStorageFooter>() + mem::size_of::<TieredStorageMagicNumber>();
static_assertions::const_assert_eq!(mem::size_of::<TieredStorageFooter>(), 160);
/// The size of the ending part of the footer. This size should remain unchanged
/// even when the footer's format changes.
pub const FOOTER_TAIL_SIZE: usize = 24;
/// The ending 8 bytes of a valid tiered account storage file.
pub const FOOTER_MAGIC_NUMBER: u64 = 0x502A2AB5; // SOLALABS -> SOLANA LABS
#[derive(Debug, PartialEq, Eq)]
#[repr(C)]
pub struct TieredStorageMagicNumber(pub u64);
impl Default for TieredStorageMagicNumber {
fn default() -> Self {
Self(FOOTER_MAGIC_NUMBER)
}
}
#[repr(u16)]
#[derive(
Clone,
Copy,
Debug,
Default,
Eq,
Hash,
PartialEq,
num_enum::IntoPrimitive,
num_enum::TryFromPrimitive,
)]
pub enum AccountMetaFormat {
#[default]
Hot = 0,
Cold = 1,
}
#[repr(u16)]
#[derive(
Clone,
Copy,
Debug,
Default,
Eq,
Hash,
PartialEq,
num_enum::IntoPrimitive,
num_enum::TryFromPrimitive,
)]
pub enum AccountBlockFormat {
#[default]
AlignedRaw = 0,
Lz4 = 1,
}
#[repr(u16)]
#[derive(
Clone,
Copy,
Debug,
Default,
Eq,
Hash,
PartialEq,
num_enum::IntoPrimitive,
num_enum::TryFromPrimitive,
)]
pub enum OwnersBlockFormat {
#[default]
LocalIndex = 0,
}
#[repr(u16)]
#[derive(
Clone,
Copy,
Debug,
Default,
Eq,
Hash,
PartialEq,
num_enum::IntoPrimitive,
num_enum::TryFromPrimitive,
)]
pub enum AccountIndexFormat {
// This format does not support any fast lookup.
// Any query from account hash to account meta requires linear search.
#[default]
Linear = 0,
// Similar to index, but this format also stores the offset of each account
// meta in the index block.
LinearIndex = 1,
}
#[derive(Debug, PartialEq, Eq, Clone)]
#[repr(C)]
pub struct TieredStorageFooter {
// formats
/// The format of the account meta entry.
pub account_meta_format: AccountMetaFormat,
/// The format of the owners block.
pub owners_block_format: OwnersBlockFormat,
/// The format of the account index block.
pub account_index_format: AccountIndexFormat,
/// The format of the account block.
pub account_block_format: AccountBlockFormat,
// Account-block related
/// The number of account entries.
pub account_entry_count: u32,
/// The size of each account meta entry in bytes.
pub account_meta_entry_size: u32,
/// The default size of an account block before compression.
///
/// If the size of one account (meta + data + optional fields) before
/// compression is bigger than this number, than it is considered a
/// blob account and it will have its own account block.
pub account_block_size: u64,
// Owner-related
/// The number of owners.
pub owner_count: u32,
/// The size of each owner entry.
pub owner_entry_size: u32,
// Offsets
// Note that offset to the account blocks is omitted as it's always 0.
/// The offset pointing to the first byte of the account index block.
pub account_index_offset: u64,
/// The offset pointing to the first byte of the owners block.
pub owners_offset: u64,
// account range
/// The smallest account address in this file.
pub min_account_address: Pubkey,
/// The largest account address in this file.
pub max_account_address: Pubkey,
/// A hash that represents a tiered accounts file for consistency check.
pub hash: Hash,
// The below fields belong to footer tail.
// The sum of their sizes should match FOOTER_TAIL_SIZE.
/// The size of the footer including the magic number.
pub footer_size: u64,
/// The format version of the tiered accounts file.
pub format_version: u64,
// This field is persisted in the storage but not in this struct.
// The number should match FOOTER_MAGIC_NUMBER.
// pub magic_number: u64,
}
impl Default for TieredStorageFooter {
fn default() -> Self {
Self {
account_meta_format: AccountMetaFormat::default(),
owners_block_format: OwnersBlockFormat::default(),
account_index_format: AccountIndexFormat::default(),
account_block_format: AccountBlockFormat::default(),
account_entry_count: 0,
account_meta_entry_size: 0,
account_block_size: 0,
owner_count: 0,
owner_entry_size: 0,
account_index_offset: 0,
owners_offset: 0,
hash: Hash::new_unique(),
min_account_address: Pubkey::default(),
max_account_address: Pubkey::default(),
footer_size: FOOTER_SIZE as u64,
format_version: FOOTER_FORMAT_VERSION,
}
}
}
impl TieredStorageFooter {
pub fn new_from_path(path: impl AsRef<Path>) -> std::io::Result<Self> {
let file = TieredStorageFile::new_readonly(path);
Self::new_from_footer_block(&file)
}
pub fn write_footer_block(&self, file: &TieredStorageFile) -> std::io::Result<()> {
file.write_type(self)?;
file.write_type(&TieredStorageMagicNumber::default())?;
Ok(())
}
pub fn new_from_footer_block(file: &TieredStorageFile) -> std::io::Result<Self> {
let mut footer_size: u64 = 0;
let mut footer_version: u64 = 0;
let mut magic_number = TieredStorageMagicNumber(0);
file.seek_from_end(-(FOOTER_TAIL_SIZE as i64))?;
file.read_type(&mut footer_size)?;
file.read_type(&mut footer_version)?;
file.read_type(&mut magic_number)?;
if magic_number != TieredStorageMagicNumber::default() {
return Err(std::io::Error::new(
std::io::ErrorKind::Other,
"TieredStorageError: Magic mumber mismatch",
));
}
let mut footer = Self::default();
file.seek_from_end(-(footer_size as i64))?;
file.read_type(&mut footer)?;
Ok(footer)
}
pub fn new_from_mmap(map: &Mmap) -> std::io::Result<&TieredStorageFooter> {
let offset = map.len().saturating_sub(FOOTER_TAIL_SIZE);
let (footer_size, offset) = get_type::<u64>(map, offset)?;
let (_footer_version, offset) = get_type::<u64>(map, offset)?;
let (magic_number, _offset) = get_type::<TieredStorageMagicNumber>(map, offset)?;
if *magic_number != TieredStorageMagicNumber::default() {
return Err(std::io::Error::new(
std::io::ErrorKind::Other,
"TieredStorageError: Magic mumber mismatch",
));
}
let (footer, _offset) =
get_type::<TieredStorageFooter>(map, map.len().saturating_sub(*footer_size as usize))?;
Ok(footer)
}
}
#[cfg(test)]
mod tests {
use {
super::*,
crate::{
append_vec::test_utils::get_append_vec_path, tiered_storage::file::TieredStorageFile,
},
memoffset::offset_of,
solana_sdk::hash::Hash,
};
#[test]
fn test_footer() {
let path = get_append_vec_path("test_file_footer");
let expected_footer = TieredStorageFooter {
account_meta_format: AccountMetaFormat::Hot,
owners_block_format: OwnersBlockFormat::LocalIndex,
account_index_format: AccountIndexFormat::Linear,
account_block_format: AccountBlockFormat::AlignedRaw,
account_entry_count: 300,
account_meta_entry_size: 24,
account_block_size: 4096,
owner_count: 250,
owner_entry_size: 32,
account_index_offset: 1069600,
owners_offset: 1081200,
hash: Hash::new_unique(),
min_account_address: Pubkey::default(),
max_account_address: Pubkey::new_unique(),
footer_size: FOOTER_SIZE as u64,
format_version: FOOTER_FORMAT_VERSION,
};
// Persist the expected footer.
{
let file = TieredStorageFile::new_writable(&path.path);
expected_footer.write_footer_block(&file).unwrap();
}
// Reopen the same storage, and expect the persisted footer is
// the same as what we have written.
{
let footer = TieredStorageFooter::new_from_path(&path.path).unwrap();
assert_eq!(expected_footer, footer);
}
}
#[test]
fn test_footer_layout() {
assert_eq!(offset_of!(TieredStorageFooter, account_meta_format), 0x00);
assert_eq!(offset_of!(TieredStorageFooter, owners_block_format), 0x02);
assert_eq!(offset_of!(TieredStorageFooter, account_index_format), 0x04);
assert_eq!(offset_of!(TieredStorageFooter, account_block_format), 0x06);
assert_eq!(offset_of!(TieredStorageFooter, account_entry_count), 0x08);
assert_eq!(
offset_of!(TieredStorageFooter, account_meta_entry_size),
0x0C
);
assert_eq!(offset_of!(TieredStorageFooter, account_block_size), 0x10);
assert_eq!(offset_of!(TieredStorageFooter, owner_count), 0x18);
assert_eq!(offset_of!(TieredStorageFooter, owner_entry_size), 0x1C);
assert_eq!(offset_of!(TieredStorageFooter, account_index_offset), 0x20);
assert_eq!(offset_of!(TieredStorageFooter, owners_offset), 0x28);
assert_eq!(offset_of!(TieredStorageFooter, min_account_address), 0x30);
assert_eq!(offset_of!(TieredStorageFooter, max_account_address), 0x50);
assert_eq!(offset_of!(TieredStorageFooter, hash), 0x70);
assert_eq!(offset_of!(TieredStorageFooter, footer_size), 0x90);
assert_eq!(offset_of!(TieredStorageFooter, format_version), 0x98);
}
}

View File

@ -0,0 +1,37 @@
use {
crate::{accounts_file::ALIGN_BOUNDARY_OFFSET, u64_align},
log::*,
memmap2::Mmap,
};
pub fn get_type<T>(map: &Mmap, offset: usize) -> std::io::Result<(&T, usize)> {
let (data, next) = get_slice(map, offset, std::mem::size_of::<T>())?;
let ptr = data.as_ptr() as *const T;
debug_assert!(ptr as usize % std::mem::align_of::<T>() == 0);
Ok((unsafe { &*ptr }, next))
}
/// Get a reference to the data at `offset` of `size` bytes if that slice
/// doesn't overrun the internal buffer. Otherwise return an Error.
/// Also return the offset of the first byte after the requested data that
/// falls on a 64-byte boundary.
pub fn get_slice(map: &Mmap, offset: usize, size: usize) -> std::io::Result<(&[u8], usize)> {
let (next, overflow) = offset.overflowing_add(size);
if overflow || next > map.len() {
error!(
"Requested offset {} and size {} while mmap only has length {}",
offset,
size,
map.len()
);
return Err(std::io::Error::new(
std::io::ErrorKind::AddrNotAvailable,
"Requested offset and data length exceeds the mmap slice",
));
}
let data = &map[offset..next];
let next = u64_align!(next);
let ptr = data.as_ptr() as *const u8;
Ok((unsafe { std::slice::from_raw_parts(ptr, size) }, next))
}