(LedgerStore) Enable performance sampling in column family get() (#23834)

#### Summary of Changes
This PR enables RocksDB read side performance metrics to report to blockstore_rocksdb_read_perf.
The sampling rate is controlled by an env arg `SOLANA_METRICS_ROCKSDB_PERF_SAMPLES_IN_1K`,
specifies the number of perf samples for every 1000 operations.  The default value is set to 10, meaning
we will report 10 out of 1000 (or 1/100) reads.

The metrics are based on the RocksDB [PerfContext](https://github.com/facebook/rocksdb/blob/main/include/rocksdb/perf_context.h).
It includes many useful metrics including block read time, cache hit rate, and time spent on decompressing the block.
This commit is contained in:
Yueh-Hsuan Chiang 2022-04-01 13:13:32 -07:00 committed by GitHub
parent c9a476e24d
commit 0b5ed87220
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 363 additions and 5 deletions

View File

@ -13,7 +13,9 @@ use {
}, },
}; };
// Determines how often we report blockstore metrics. // Determines how often we report blockstore metrics under
// LedgerMetricReportService. Note that there're other blockstore
// metrics that are reported outside LedgerMetricReportService.
const BLOCKSTORE_METRICS_REPORT_PERIOD_MILLIS: u64 = 10000; const BLOCKSTORE_METRICS_REPORT_PERIOD_MILLIS: u64 = 10000;
pub struct LedgerMetricReportService { pub struct LedgerMetricReportService {

View File

@ -5,16 +5,19 @@ use {
byteorder::{BigEndian, ByteOrder}, byteorder::{BigEndian, ByteOrder},
log::*, log::*,
prost::Message, prost::Message,
rand::{thread_rng, Rng},
rocksdb::{ rocksdb::{
self, self,
compaction_filter::CompactionFilter, compaction_filter::CompactionFilter,
compaction_filter_factory::{CompactionFilterContext, CompactionFilterFactory}, compaction_filter_factory::{CompactionFilterContext, CompactionFilterFactory},
perf::{set_perf_stats, PerfMetric, PerfStatsLevel},
properties as RocksProperties, ColumnFamily, ColumnFamilyDescriptor, CompactionDecision, properties as RocksProperties, ColumnFamily, ColumnFamilyDescriptor, CompactionDecision,
DBCompactionStyle, DBCompressionType as RocksCompressionType, DBIterator, DBRawIterator, DBCompactionStyle, DBCompressionType as RocksCompressionType, DBIterator, DBRawIterator,
DBRecoveryMode, FifoCompactOptions, IteratorMode as RocksIteratorMode, Options, DBRecoveryMode, FifoCompactOptions, IteratorMode as RocksIteratorMode, Options,
WriteBatch as RWriteBatch, DB, PerfContext, WriteBatch as RWriteBatch, DB,
}, },
serde::{de::DeserializeOwned, Serialize}, serde::{de::DeserializeOwned, Serialize},
solana_metrics::datapoint_info,
solana_runtime::hardened_unpack::UnpackError, solana_runtime::hardened_unpack::UnpackError,
solana_sdk::{ solana_sdk::{
clock::{Slot, UnixTimestamp}, clock::{Slot, UnixTimestamp},
@ -23,6 +26,7 @@ use {
}, },
solana_storage_proto::convert::generated, solana_storage_proto::convert::generated,
std::{ std::{
cell::RefCell,
collections::{HashMap, HashSet}, collections::{HashMap, HashSet},
ffi::{CStr, CString}, ffi::{CStr, CString},
fs, fs,
@ -56,6 +60,9 @@ const DEFAULT_FIFO_COMPACTION_DATA_CF_SIZE: u64 = 125 * 1024 * 1024 * 1024;
// used by coding shreds. // used by coding shreds.
const DEFAULT_FIFO_COMPACTION_CODING_CF_SIZE: u64 = 100 * 1024 * 1024 * 1024; const DEFAULT_FIFO_COMPACTION_CODING_CF_SIZE: u64 = 100 * 1024 * 1024 * 1024;
// Thread local instance of RocksDB's PerfContext.
thread_local! {static PER_THREAD_ROCKS_PERF_CONTEXT: RefCell<PerfContext> = RefCell::new(PerfContext::default());}
// Column family for metadata about a leader slot // Column family for metadata about a leader slot
const META_CF: &str = "meta"; const META_CF: &str = "meta";
// Column family for slots that have been marked as dead // Column family for slots that have been marked as dead
@ -778,6 +785,7 @@ pub trait ColumnMetrics {
cf_metrics: BlockstoreRocksDbColumnFamilyMetrics, cf_metrics: BlockstoreRocksDbColumnFamilyMetrics,
column_options: &Arc<LedgerColumnOptions>, column_options: &Arc<LedgerColumnOptions>,
); );
fn rocksdb_get_perf_metric_header(column_options: &Arc<LedgerColumnOptions>) -> &'static str;
} }
pub trait ColumnName { pub trait ColumnName {
@ -875,6 +883,13 @@ impl ColumnMetrics for columns::TransactionStatus {
column_options column_options
)); ));
} }
fn rocksdb_get_perf_metric_header(column_options: &Arc<LedgerColumnOptions>) -> &'static str {
rocksdb_metric_header!(
"blockstore_rocksdb_read_perf,op=get",
"transaction_status",
column_options
)
}
} }
impl ColumnName for columns::TransactionStatus { impl ColumnName for columns::TransactionStatus {
const NAME: &'static str = TRANSACTION_STATUS_CF; const NAME: &'static str = TRANSACTION_STATUS_CF;
@ -927,6 +942,13 @@ impl ColumnMetrics for columns::AddressSignatures {
column_options column_options
)); ));
} }
fn rocksdb_get_perf_metric_header(column_options: &Arc<LedgerColumnOptions>) -> &'static str {
rocksdb_metric_header!(
"blockstore_rocksdb_read_perf,op=get",
"address_signatures",
column_options
)
}
} }
impl ColumnName for columns::AddressSignatures { impl ColumnName for columns::AddressSignatures {
const NAME: &'static str = ADDRESS_SIGNATURES_CF; const NAME: &'static str = ADDRESS_SIGNATURES_CF;
@ -969,6 +991,13 @@ impl ColumnMetrics for columns::TransactionMemos {
column_options column_options
)); ));
} }
fn rocksdb_get_perf_metric_header(column_options: &Arc<LedgerColumnOptions>) -> &'static str {
rocksdb_metric_header!(
"blockstore_rocksdb_read_perf,op=get",
"transaction_memos",
column_options
)
}
} }
impl ColumnName for columns::TransactionMemos { impl ColumnName for columns::TransactionMemos {
const NAME: &'static str = TRANSACTION_MEMOS_CF; const NAME: &'static str = TRANSACTION_MEMOS_CF;
@ -1011,6 +1040,13 @@ impl ColumnMetrics for columns::TransactionStatusIndex {
column_options column_options
)); ));
} }
fn rocksdb_get_perf_metric_header(column_options: &Arc<LedgerColumnOptions>) -> &'static str {
rocksdb_metric_header!(
"blockstore_rocksdb_read_perf,op=get",
"transaction_status_index",
column_options
)
}
} }
impl ColumnName for columns::TransactionStatusIndex { impl ColumnName for columns::TransactionStatusIndex {
const NAME: &'static str = TRANSACTION_STATUS_INDEX_CF; const NAME: &'static str = TRANSACTION_STATUS_INDEX_CF;
@ -1028,6 +1064,13 @@ impl ColumnMetrics for columns::Rewards {
column_options column_options
)); ));
} }
fn rocksdb_get_perf_metric_header(column_options: &Arc<LedgerColumnOptions>) -> &'static str {
rocksdb_metric_header!(
"blockstore_rocksdb_read_perf,op=get",
"rewards",
column_options
)
}
} }
impl ColumnName for columns::Rewards { impl ColumnName for columns::Rewards {
const NAME: &'static str = REWARDS_CF; const NAME: &'static str = REWARDS_CF;
@ -1048,6 +1091,13 @@ impl ColumnMetrics for columns::Blocktime {
column_options column_options
)); ));
} }
fn rocksdb_get_perf_metric_header(column_options: &Arc<LedgerColumnOptions>) -> &'static str {
rocksdb_metric_header!(
"blockstore_rocksdb_read_perf,op=get",
"blocktime",
column_options
)
}
} }
impl ColumnName for columns::Blocktime { impl ColumnName for columns::Blocktime {
const NAME: &'static str = BLOCKTIME_CF; const NAME: &'static str = BLOCKTIME_CF;
@ -1068,6 +1118,13 @@ impl ColumnMetrics for columns::PerfSamples {
column_options column_options
)); ));
} }
fn rocksdb_get_perf_metric_header(column_options: &Arc<LedgerColumnOptions>) -> &'static str {
rocksdb_metric_header!(
"blockstore_rocksdb_read_perf,op=get",
"perf_samples",
column_options
)
}
} }
impl ColumnName for columns::PerfSamples { impl ColumnName for columns::PerfSamples {
const NAME: &'static str = PERF_SAMPLES_CF; const NAME: &'static str = PERF_SAMPLES_CF;
@ -1088,6 +1145,13 @@ impl ColumnMetrics for columns::BlockHeight {
column_options column_options
)); ));
} }
fn rocksdb_get_perf_metric_header(column_options: &Arc<LedgerColumnOptions>) -> &'static str {
rocksdb_metric_header!(
"blockstore_rocksdb_read_perf,op=get",
"block_height",
column_options
)
}
} }
impl ColumnName for columns::BlockHeight { impl ColumnName for columns::BlockHeight {
const NAME: &'static str = BLOCK_HEIGHT_CF; const NAME: &'static str = BLOCK_HEIGHT_CF;
@ -1107,6 +1171,13 @@ impl ColumnMetrics for columns::ProgramCosts {
column_options column_options
)); ));
} }
fn rocksdb_get_perf_metric_header(column_options: &Arc<LedgerColumnOptions>) -> &'static str {
rocksdb_metric_header!(
"blockstore_rocksdb_read_perf,op=get",
"program_costs",
column_options
)
}
} }
impl ColumnName for columns::ProgramCosts { impl ColumnName for columns::ProgramCosts {
@ -1173,6 +1244,13 @@ impl ColumnMetrics for columns::ShredCode {
column_options column_options
)); ));
} }
fn rocksdb_get_perf_metric_header(column_options: &Arc<LedgerColumnOptions>) -> &'static str {
rocksdb_metric_header!(
"blockstore_rocksdb_read_perf,op=get",
"shred_code",
column_options
)
}
} }
impl ColumnName for columns::ShredCode { impl ColumnName for columns::ShredCode {
const NAME: &'static str = CODE_SHRED_CF; const NAME: &'static str = CODE_SHRED_CF;
@ -1214,6 +1292,13 @@ impl ColumnMetrics for columns::ShredData {
column_options column_options
)); ));
} }
fn rocksdb_get_perf_metric_header(column_options: &Arc<LedgerColumnOptions>) -> &'static str {
rocksdb_metric_header!(
"blockstore_rocksdb_read_perf,op=get",
"shred_data",
column_options
)
}
} }
impl ColumnName for columns::ShredData { impl ColumnName for columns::ShredData {
const NAME: &'static str = DATA_SHRED_CF; const NAME: &'static str = DATA_SHRED_CF;
@ -1231,6 +1316,13 @@ impl ColumnMetrics for columns::Index {
column_options column_options
)); ));
} }
fn rocksdb_get_perf_metric_header(column_options: &Arc<LedgerColumnOptions>) -> &'static str {
rocksdb_metric_header!(
"blockstore_rocksdb_read_perf,op=get",
"index",
column_options
)
}
} }
impl ColumnName for columns::Index { impl ColumnName for columns::Index {
const NAME: &'static str = INDEX_CF; const NAME: &'static str = INDEX_CF;
@ -1251,6 +1343,13 @@ impl ColumnMetrics for columns::DeadSlots {
column_options column_options
)); ));
} }
fn rocksdb_get_perf_metric_header(column_options: &Arc<LedgerColumnOptions>) -> &'static str {
rocksdb_metric_header!(
"blockstore_rocksdb_read_perf,op=get",
"dead_slots",
column_options
)
}
} }
impl ColumnName for columns::DeadSlots { impl ColumnName for columns::DeadSlots {
const NAME: &'static str = DEAD_SLOTS_CF; const NAME: &'static str = DEAD_SLOTS_CF;
@ -1271,6 +1370,13 @@ impl ColumnMetrics for columns::DuplicateSlots {
column_options column_options
)); ));
} }
fn rocksdb_get_perf_metric_header(column_options: &Arc<LedgerColumnOptions>) -> &'static str {
rocksdb_metric_header!(
"blockstore_rocksdb_read_perf,op=get",
"duplicate_slots",
column_options
)
}
} }
impl ColumnName for columns::DuplicateSlots { impl ColumnName for columns::DuplicateSlots {
const NAME: &'static str = DUPLICATE_SLOTS_CF; const NAME: &'static str = DUPLICATE_SLOTS_CF;
@ -1291,6 +1397,13 @@ impl ColumnMetrics for columns::Orphans {
column_options column_options
)); ));
} }
fn rocksdb_get_perf_metric_header(column_options: &Arc<LedgerColumnOptions>) -> &'static str {
rocksdb_metric_header!(
"blockstore_rocksdb_read_perf,op=get",
"orphans",
column_options
)
}
} }
impl ColumnName for columns::Orphans { impl ColumnName for columns::Orphans {
const NAME: &'static str = ORPHANS_CF; const NAME: &'static str = ORPHANS_CF;
@ -1311,6 +1424,13 @@ impl ColumnMetrics for columns::BankHash {
column_options column_options
)); ));
} }
fn rocksdb_get_perf_metric_header(column_options: &Arc<LedgerColumnOptions>) -> &'static str {
rocksdb_metric_header!(
"blockstore_rocksdb_read_perf,op=get",
"bank_hash",
column_options
)
}
} }
impl ColumnName for columns::BankHash { impl ColumnName for columns::BankHash {
const NAME: &'static str = BANK_HASH_CF; const NAME: &'static str = BANK_HASH_CF;
@ -1331,6 +1451,13 @@ impl ColumnMetrics for columns::Root {
column_options column_options
)); ));
} }
fn rocksdb_get_perf_metric_header(column_options: &Arc<LedgerColumnOptions>) -> &'static str {
rocksdb_metric_header!(
"blockstore_rocksdb_read_perf,op=get",
"root",
column_options
)
}
} }
impl ColumnName for columns::Root { impl ColumnName for columns::Root {
const NAME: &'static str = ROOT_CF; const NAME: &'static str = ROOT_CF;
@ -1351,6 +1478,13 @@ impl ColumnMetrics for columns::SlotMeta {
column_options column_options
)); ));
} }
fn rocksdb_get_perf_metric_header(column_options: &Arc<LedgerColumnOptions>) -> &'static str {
rocksdb_metric_header!(
"blockstore_rocksdb_read_perf,op=get",
"slot_meta",
column_options
)
}
} }
impl ColumnName for columns::SlotMeta { impl ColumnName for columns::SlotMeta {
const NAME: &'static str = META_CF; const NAME: &'static str = META_CF;
@ -1396,6 +1530,13 @@ impl ColumnMetrics for columns::ErasureMeta {
column_options column_options
)); ));
} }
fn rocksdb_get_perf_metric_header(column_options: &Arc<LedgerColumnOptions>) -> &'static str {
rocksdb_metric_header!(
"blockstore_rocksdb_read_perf,op=get",
"erasure_meta",
column_options
)
}
} }
impl ColumnName for columns::ErasureMeta { impl ColumnName for columns::ErasureMeta {
const NAME: &'static str = ERASURE_META_CF; const NAME: &'static str = ERASURE_META_CF;
@ -1793,18 +1934,37 @@ where
} }
} }
const METRIC_SAMPLES_1K: i32 = 1000;
// The default number of rocksdb perf samples in 1K
const ROCKSDB_PERF_CONTEXT_SAMPLES_IN_1K_DEFAULT: i32 = 10;
lazy_static! {
// The number of RocksDB performance counter samples in 1000.
static ref ROCKSDB_PERF_CONTEXT_SAMPLES_IN_1K: i32 =
std::env::var("SOLANA_METRICS_ROCKSDB_PERF_SAMPLES_IN_1K")
.map(|x| {
x.parse().expect("Failed to parse SOLANA_METRICS_ROCKSDB_PERF_SAMPLES_IN_1K")
}).unwrap_or(ROCKSDB_PERF_CONTEXT_SAMPLES_IN_1K_DEFAULT);
}
impl<C> LedgerColumn<C> impl<C> LedgerColumn<C>
where where
C: TypedColumn + ColumnName + ColumnMetrics, C: TypedColumn + ColumnName + ColumnMetrics,
{ {
pub fn get(&self, key: C::Index) -> Result<Option<C::Type>> { pub fn get(&self, key: C::Index) -> Result<Option<C::Type>> {
let mut result = Ok(None);
let is_perf_context_enabled = Self::maybe_collect_perf_context();
if let Some(serialized_value) = self.backend.get_cf(self.handle(), &C::key(key))? { if let Some(serialized_value) = self.backend.get_cf(self.handle(), &C::key(key))? {
let value = deserialize(&serialized_value)?; let value = deserialize(&serialized_value)?;
Ok(Some(value)) result = Ok(Some(value))
} else {
Ok(None)
} }
if is_perf_context_enabled {
self.report_read_perf_context(C::rocksdb_get_perf_metric_header(&self.column_options));
}
result
} }
pub fn put(&self, key: C::Index, value: &C::Type) -> Result<()> { pub fn put(&self, key: C::Index, value: &C::Type) -> Result<()> {
@ -1817,6 +1977,202 @@ where
pub fn delete(&self, key: C::Index) -> Result<()> { pub fn delete(&self, key: C::Index) -> Result<()> {
self.backend.delete_cf(self.handle(), &C::key(key)) self.backend.delete_cf(self.handle(), &C::key(key))
} }
/// The function enables RocksDB's PerfContext in N out of 1000
/// where N is ROCKSDB_PERF_CONTEXT_SAMPLES_IN_1K.
///
/// Returns true if the PerfContext is enabled.
fn maybe_collect_perf_context() -> bool {
if thread_rng().gen_range(0, METRIC_SAMPLES_1K) > *ROCKSDB_PERF_CONTEXT_SAMPLES_IN_1K {
return false;
}
set_perf_stats(PerfStatsLevel::EnableTime);
PER_THREAD_ROCKS_PERF_CONTEXT.with(|perf_context| {
perf_context.borrow_mut().reset();
});
true
}
/// Reports the collected PerfContext and disables the PerfContext after
/// reporting.
fn report_read_perf_context(&self, metric_header: &'static str) {
PER_THREAD_ROCKS_PERF_CONTEXT.with(|perf_context_cell| {
set_perf_stats(PerfStatsLevel::Disable);
let perf_context = perf_context_cell.borrow();
datapoint_info!(
metric_header,
(
"user_key_comparison_count",
perf_context.metric(PerfMetric::UserKeyComparisonCount) as i64,
i64
),
(
"block_cache_hit_count",
perf_context.metric(PerfMetric::BlockCacheHitCount) as i64,
i64
),
(
"block_read_count",
perf_context.metric(PerfMetric::BlockReadCount) as i64,
i64
),
(
"block_read_byte",
perf_context.metric(PerfMetric::BlockReadByte) as i64,
i64
),
(
"block_read_nanos",
perf_context.metric(PerfMetric::BlockReadTime) as i64,
i64
),
(
"block_checksum_nanos",
perf_context.metric(PerfMetric::BlockChecksumTime) as i64,
i64
),
(
"block_decompress_nanos",
perf_context.metric(PerfMetric::BlockDecompressTime) as i64,
i64
),
(
"get_read_bytes",
perf_context.metric(PerfMetric::GetReadBytes) as i64,
i64
),
(
"multiget_read_bytes",
perf_context.metric(PerfMetric::MultigetReadBytes) as i64,
i64
),
(
"get_snapshot_nanos",
perf_context.metric(PerfMetric::GetSnapshotTime) as i64,
i64
),
(
"get_from_memtable_nanos",
perf_context.metric(PerfMetric::GetFromMemtableTime) as i64,
i64
),
(
"get_from_memtable_count",
perf_context.metric(PerfMetric::GetFromMemtableCount) as i64,
i64
),
(
// total nanos spent after Get() finds a key
"get_post_process_nanos",
perf_context.metric(PerfMetric::GetPostProcessTime) as i64,
i64
),
(
// total nanos reading from output files
"get_from_output_files_nanos",
perf_context.metric(PerfMetric::GetFromOutputFilesTime) as i64,
i64
),
(
// time spent on acquiring DB mutex
"db_mutex_lock_nanos",
perf_context.metric(PerfMetric::DbMutexLockNanos) as i64,
i64
),
(
// time spent on waiting with a condition variable created with DB mutex.
"db_condition_wait_nanos",
perf_context.metric(PerfMetric::DbConditionWaitNanos) as i64,
i64
),
(
"merge_operator_nanos",
perf_context.metric(PerfMetric::MergeOperatorTimeNanos) as i64,
i64
),
(
"read_index_block_nanos",
perf_context.metric(PerfMetric::ReadIndexBlockNanos) as i64,
i64
),
(
"read_filter_block_nanos",
perf_context.metric(PerfMetric::ReadFilterBlockNanos) as i64,
i64
),
(
"new_table_block_iter_nanos",
perf_context.metric(PerfMetric::NewTableBlockIterNanos) as i64,
i64
),
(
"block_seek_nanos",
perf_context.metric(PerfMetric::BlockSeekNanos) as i64,
i64
),
(
"find_table_nanos",
perf_context.metric(PerfMetric::FindTableNanos) as i64,
i64
),
(
"bloom_memtable_hit_count",
perf_context.metric(PerfMetric::BloomMemtableHitCount) as i64,
i64
),
(
"bloom_memtable_miss_count",
perf_context.metric(PerfMetric::BloomMemtableMissCount) as i64,
i64
),
(
"bloom_sst_hit_count",
perf_context.metric(PerfMetric::BloomSstHitCount) as i64,
i64
),
(
"bloom_sst_miss_count",
perf_context.metric(PerfMetric::BloomSstMissCount) as i64,
i64
),
(
"key_lock_wait_time",
perf_context.metric(PerfMetric::KeyLockWaitTime) as i64,
i64
),
(
"key_lock_wait_count",
perf_context.metric(PerfMetric::KeyLockWaitCount) as i64,
i64
),
(
"env_file_exists_nanos",
perf_context.metric(PerfMetric::EnvFileExistsNanos) as i64,
i64
),
(
"env_get_children_nanos",
perf_context.metric(PerfMetric::EnvGetChildrenNanos) as i64,
i64
),
(
"env_lock_file_nanos",
perf_context.metric(PerfMetric::EnvLockFileNanos) as i64,
i64
),
(
"env_unlock_file_nanos",
perf_context.metric(PerfMetric::EnvUnlockFileNanos) as i64,
i64
),
(
"total_metric_count",
perf_context.metric(PerfMetric::TotalMetricCount) as i64,
i64
),
);
});
}
} }
impl<C> LedgerColumn<C> impl<C> LedgerColumn<C>