From 0b5ed872203fdaece1740f326647fff1b9edcea0 Mon Sep 17 00:00:00 2001 From: Yueh-Hsuan Chiang <93241502+yhchiang-sol@users.noreply.github.com> Date: Fri, 1 Apr 2022 13:13:32 -0700 Subject: [PATCH] (LedgerStore) Enable performance sampling in column family get() (#23834) #### Summary of Changes This PR enables RocksDB read side performance metrics to report to blockstore_rocksdb_read_perf. The sampling rate is controlled by an env arg `SOLANA_METRICS_ROCKSDB_PERF_SAMPLES_IN_1K`, specifies the number of perf samples for every 1000 operations. The default value is set to 10, meaning we will report 10 out of 1000 (or 1/100) reads. The metrics are based on the RocksDB [PerfContext](https://github.com/facebook/rocksdb/blob/main/include/rocksdb/perf_context.h). It includes many useful metrics including block read time, cache hit rate, and time spent on decompressing the block. --- core/src/ledger_metric_report_service.rs | 4 +- ledger/src/blockstore_db.rs | 364 ++++++++++++++++++++++- 2 files changed, 363 insertions(+), 5 deletions(-) diff --git a/core/src/ledger_metric_report_service.rs b/core/src/ledger_metric_report_service.rs index 2493d0e1d..8d0b96d28 100644 --- a/core/src/ledger_metric_report_service.rs +++ b/core/src/ledger_metric_report_service.rs @@ -13,7 +13,9 @@ use { }, }; -// Determines how often we report blockstore metrics. +// Determines how often we report blockstore metrics under +// LedgerMetricReportService. Note that there're other blockstore +// metrics that are reported outside LedgerMetricReportService. const BLOCKSTORE_METRICS_REPORT_PERIOD_MILLIS: u64 = 10000; pub struct LedgerMetricReportService { diff --git a/ledger/src/blockstore_db.rs b/ledger/src/blockstore_db.rs index c2618c210..43a7c6361 100644 --- a/ledger/src/blockstore_db.rs +++ b/ledger/src/blockstore_db.rs @@ -5,16 +5,19 @@ use { byteorder::{BigEndian, ByteOrder}, log::*, prost::Message, + rand::{thread_rng, Rng}, rocksdb::{ self, compaction_filter::CompactionFilter, compaction_filter_factory::{CompactionFilterContext, CompactionFilterFactory}, + perf::{set_perf_stats, PerfMetric, PerfStatsLevel}, properties as RocksProperties, ColumnFamily, ColumnFamilyDescriptor, CompactionDecision, DBCompactionStyle, DBCompressionType as RocksCompressionType, DBIterator, DBRawIterator, DBRecoveryMode, FifoCompactOptions, IteratorMode as RocksIteratorMode, Options, - WriteBatch as RWriteBatch, DB, + PerfContext, WriteBatch as RWriteBatch, DB, }, serde::{de::DeserializeOwned, Serialize}, + solana_metrics::datapoint_info, solana_runtime::hardened_unpack::UnpackError, solana_sdk::{ clock::{Slot, UnixTimestamp}, @@ -23,6 +26,7 @@ use { }, solana_storage_proto::convert::generated, std::{ + cell::RefCell, collections::{HashMap, HashSet}, ffi::{CStr, CString}, fs, @@ -56,6 +60,9 @@ const DEFAULT_FIFO_COMPACTION_DATA_CF_SIZE: u64 = 125 * 1024 * 1024 * 1024; // used by coding shreds. const DEFAULT_FIFO_COMPACTION_CODING_CF_SIZE: u64 = 100 * 1024 * 1024 * 1024; +// Thread local instance of RocksDB's PerfContext. +thread_local! {static PER_THREAD_ROCKS_PERF_CONTEXT: RefCell = RefCell::new(PerfContext::default());} + // Column family for metadata about a leader slot const META_CF: &str = "meta"; // Column family for slots that have been marked as dead @@ -778,6 +785,7 @@ pub trait ColumnMetrics { cf_metrics: BlockstoreRocksDbColumnFamilyMetrics, column_options: &Arc, ); + fn rocksdb_get_perf_metric_header(column_options: &Arc) -> &'static str; } pub trait ColumnName { @@ -875,6 +883,13 @@ impl ColumnMetrics for columns::TransactionStatus { column_options )); } + fn rocksdb_get_perf_metric_header(column_options: &Arc) -> &'static str { + rocksdb_metric_header!( + "blockstore_rocksdb_read_perf,op=get", + "transaction_status", + column_options + ) + } } impl ColumnName for columns::TransactionStatus { const NAME: &'static str = TRANSACTION_STATUS_CF; @@ -927,6 +942,13 @@ impl ColumnMetrics for columns::AddressSignatures { column_options )); } + fn rocksdb_get_perf_metric_header(column_options: &Arc) -> &'static str { + rocksdb_metric_header!( + "blockstore_rocksdb_read_perf,op=get", + "address_signatures", + column_options + ) + } } impl ColumnName for columns::AddressSignatures { const NAME: &'static str = ADDRESS_SIGNATURES_CF; @@ -969,6 +991,13 @@ impl ColumnMetrics for columns::TransactionMemos { column_options )); } + fn rocksdb_get_perf_metric_header(column_options: &Arc) -> &'static str { + rocksdb_metric_header!( + "blockstore_rocksdb_read_perf,op=get", + "transaction_memos", + column_options + ) + } } impl ColumnName for columns::TransactionMemos { const NAME: &'static str = TRANSACTION_MEMOS_CF; @@ -1011,6 +1040,13 @@ impl ColumnMetrics for columns::TransactionStatusIndex { column_options )); } + fn rocksdb_get_perf_metric_header(column_options: &Arc) -> &'static str { + rocksdb_metric_header!( + "blockstore_rocksdb_read_perf,op=get", + "transaction_status_index", + column_options + ) + } } impl ColumnName for columns::TransactionStatusIndex { const NAME: &'static str = TRANSACTION_STATUS_INDEX_CF; @@ -1028,6 +1064,13 @@ impl ColumnMetrics for columns::Rewards { column_options )); } + fn rocksdb_get_perf_metric_header(column_options: &Arc) -> &'static str { + rocksdb_metric_header!( + "blockstore_rocksdb_read_perf,op=get", + "rewards", + column_options + ) + } } impl ColumnName for columns::Rewards { const NAME: &'static str = REWARDS_CF; @@ -1048,6 +1091,13 @@ impl ColumnMetrics for columns::Blocktime { column_options )); } + fn rocksdb_get_perf_metric_header(column_options: &Arc) -> &'static str { + rocksdb_metric_header!( + "blockstore_rocksdb_read_perf,op=get", + "blocktime", + column_options + ) + } } impl ColumnName for columns::Blocktime { const NAME: &'static str = BLOCKTIME_CF; @@ -1068,6 +1118,13 @@ impl ColumnMetrics for columns::PerfSamples { column_options )); } + fn rocksdb_get_perf_metric_header(column_options: &Arc) -> &'static str { + rocksdb_metric_header!( + "blockstore_rocksdb_read_perf,op=get", + "perf_samples", + column_options + ) + } } impl ColumnName for columns::PerfSamples { const NAME: &'static str = PERF_SAMPLES_CF; @@ -1088,6 +1145,13 @@ impl ColumnMetrics for columns::BlockHeight { column_options )); } + fn rocksdb_get_perf_metric_header(column_options: &Arc) -> &'static str { + rocksdb_metric_header!( + "blockstore_rocksdb_read_perf,op=get", + "block_height", + column_options + ) + } } impl ColumnName for columns::BlockHeight { const NAME: &'static str = BLOCK_HEIGHT_CF; @@ -1107,6 +1171,13 @@ impl ColumnMetrics for columns::ProgramCosts { column_options )); } + fn rocksdb_get_perf_metric_header(column_options: &Arc) -> &'static str { + rocksdb_metric_header!( + "blockstore_rocksdb_read_perf,op=get", + "program_costs", + column_options + ) + } } impl ColumnName for columns::ProgramCosts { @@ -1173,6 +1244,13 @@ impl ColumnMetrics for columns::ShredCode { column_options )); } + fn rocksdb_get_perf_metric_header(column_options: &Arc) -> &'static str { + rocksdb_metric_header!( + "blockstore_rocksdb_read_perf,op=get", + "shred_code", + column_options + ) + } } impl ColumnName for columns::ShredCode { const NAME: &'static str = CODE_SHRED_CF; @@ -1214,6 +1292,13 @@ impl ColumnMetrics for columns::ShredData { column_options )); } + fn rocksdb_get_perf_metric_header(column_options: &Arc) -> &'static str { + rocksdb_metric_header!( + "blockstore_rocksdb_read_perf,op=get", + "shred_data", + column_options + ) + } } impl ColumnName for columns::ShredData { const NAME: &'static str = DATA_SHRED_CF; @@ -1231,6 +1316,13 @@ impl ColumnMetrics for columns::Index { column_options )); } + fn rocksdb_get_perf_metric_header(column_options: &Arc) -> &'static str { + rocksdb_metric_header!( + "blockstore_rocksdb_read_perf,op=get", + "index", + column_options + ) + } } impl ColumnName for columns::Index { const NAME: &'static str = INDEX_CF; @@ -1251,6 +1343,13 @@ impl ColumnMetrics for columns::DeadSlots { column_options )); } + fn rocksdb_get_perf_metric_header(column_options: &Arc) -> &'static str { + rocksdb_metric_header!( + "blockstore_rocksdb_read_perf,op=get", + "dead_slots", + column_options + ) + } } impl ColumnName for columns::DeadSlots { const NAME: &'static str = DEAD_SLOTS_CF; @@ -1271,6 +1370,13 @@ impl ColumnMetrics for columns::DuplicateSlots { column_options )); } + fn rocksdb_get_perf_metric_header(column_options: &Arc) -> &'static str { + rocksdb_metric_header!( + "blockstore_rocksdb_read_perf,op=get", + "duplicate_slots", + column_options + ) + } } impl ColumnName for columns::DuplicateSlots { const NAME: &'static str = DUPLICATE_SLOTS_CF; @@ -1291,6 +1397,13 @@ impl ColumnMetrics for columns::Orphans { column_options )); } + fn rocksdb_get_perf_metric_header(column_options: &Arc) -> &'static str { + rocksdb_metric_header!( + "blockstore_rocksdb_read_perf,op=get", + "orphans", + column_options + ) + } } impl ColumnName for columns::Orphans { const NAME: &'static str = ORPHANS_CF; @@ -1311,6 +1424,13 @@ impl ColumnMetrics for columns::BankHash { column_options )); } + fn rocksdb_get_perf_metric_header(column_options: &Arc) -> &'static str { + rocksdb_metric_header!( + "blockstore_rocksdb_read_perf,op=get", + "bank_hash", + column_options + ) + } } impl ColumnName for columns::BankHash { const NAME: &'static str = BANK_HASH_CF; @@ -1331,6 +1451,13 @@ impl ColumnMetrics for columns::Root { column_options )); } + fn rocksdb_get_perf_metric_header(column_options: &Arc) -> &'static str { + rocksdb_metric_header!( + "blockstore_rocksdb_read_perf,op=get", + "root", + column_options + ) + } } impl ColumnName for columns::Root { const NAME: &'static str = ROOT_CF; @@ -1351,6 +1478,13 @@ impl ColumnMetrics for columns::SlotMeta { column_options )); } + fn rocksdb_get_perf_metric_header(column_options: &Arc) -> &'static str { + rocksdb_metric_header!( + "blockstore_rocksdb_read_perf,op=get", + "slot_meta", + column_options + ) + } } impl ColumnName for columns::SlotMeta { const NAME: &'static str = META_CF; @@ -1396,6 +1530,13 @@ impl ColumnMetrics for columns::ErasureMeta { column_options )); } + fn rocksdb_get_perf_metric_header(column_options: &Arc) -> &'static str { + rocksdb_metric_header!( + "blockstore_rocksdb_read_perf,op=get", + "erasure_meta", + column_options + ) + } } impl ColumnName for columns::ErasureMeta { const NAME: &'static str = ERASURE_META_CF; @@ -1793,18 +1934,37 @@ where } } +const METRIC_SAMPLES_1K: i32 = 1000; +// The default number of rocksdb perf samples in 1K +const ROCKSDB_PERF_CONTEXT_SAMPLES_IN_1K_DEFAULT: i32 = 10; +lazy_static! { +// The number of RocksDB performance counter samples in 1000. +static ref ROCKSDB_PERF_CONTEXT_SAMPLES_IN_1K: i32 = +std::env::var("SOLANA_METRICS_ROCKSDB_PERF_SAMPLES_IN_1K") + .map(|x| { + x.parse().expect("Failed to parse SOLANA_METRICS_ROCKSDB_PERF_SAMPLES_IN_1K") + + }).unwrap_or(ROCKSDB_PERF_CONTEXT_SAMPLES_IN_1K_DEFAULT); + +} + impl LedgerColumn where C: TypedColumn + ColumnName + ColumnMetrics, { pub fn get(&self, key: C::Index) -> Result> { + let mut result = Ok(None); + let is_perf_context_enabled = Self::maybe_collect_perf_context(); if let Some(serialized_value) = self.backend.get_cf(self.handle(), &C::key(key))? { let value = deserialize(&serialized_value)?; - Ok(Some(value)) - } else { - Ok(None) + result = Ok(Some(value)) } + + if is_perf_context_enabled { + self.report_read_perf_context(C::rocksdb_get_perf_metric_header(&self.column_options)); + } + result } pub fn put(&self, key: C::Index, value: &C::Type) -> Result<()> { @@ -1817,6 +1977,202 @@ where pub fn delete(&self, key: C::Index) -> Result<()> { self.backend.delete_cf(self.handle(), &C::key(key)) } + + /// The function enables RocksDB's PerfContext in N out of 1000 + /// where N is ROCKSDB_PERF_CONTEXT_SAMPLES_IN_1K. + /// + /// Returns true if the PerfContext is enabled. + fn maybe_collect_perf_context() -> bool { + if thread_rng().gen_range(0, METRIC_SAMPLES_1K) > *ROCKSDB_PERF_CONTEXT_SAMPLES_IN_1K { + return false; + } + set_perf_stats(PerfStatsLevel::EnableTime); + PER_THREAD_ROCKS_PERF_CONTEXT.with(|perf_context| { + perf_context.borrow_mut().reset(); + }); + true + } + + /// Reports the collected PerfContext and disables the PerfContext after + /// reporting. + fn report_read_perf_context(&self, metric_header: &'static str) { + PER_THREAD_ROCKS_PERF_CONTEXT.with(|perf_context_cell| { + set_perf_stats(PerfStatsLevel::Disable); + let perf_context = perf_context_cell.borrow(); + datapoint_info!( + metric_header, + ( + "user_key_comparison_count", + perf_context.metric(PerfMetric::UserKeyComparisonCount) as i64, + i64 + ), + ( + "block_cache_hit_count", + perf_context.metric(PerfMetric::BlockCacheHitCount) as i64, + i64 + ), + ( + "block_read_count", + perf_context.metric(PerfMetric::BlockReadCount) as i64, + i64 + ), + ( + "block_read_byte", + perf_context.metric(PerfMetric::BlockReadByte) as i64, + i64 + ), + ( + "block_read_nanos", + perf_context.metric(PerfMetric::BlockReadTime) as i64, + i64 + ), + ( + "block_checksum_nanos", + perf_context.metric(PerfMetric::BlockChecksumTime) as i64, + i64 + ), + ( + "block_decompress_nanos", + perf_context.metric(PerfMetric::BlockDecompressTime) as i64, + i64 + ), + ( + "get_read_bytes", + perf_context.metric(PerfMetric::GetReadBytes) as i64, + i64 + ), + ( + "multiget_read_bytes", + perf_context.metric(PerfMetric::MultigetReadBytes) as i64, + i64 + ), + ( + "get_snapshot_nanos", + perf_context.metric(PerfMetric::GetSnapshotTime) as i64, + i64 + ), + ( + "get_from_memtable_nanos", + perf_context.metric(PerfMetric::GetFromMemtableTime) as i64, + i64 + ), + ( + "get_from_memtable_count", + perf_context.metric(PerfMetric::GetFromMemtableCount) as i64, + i64 + ), + ( + // total nanos spent after Get() finds a key + "get_post_process_nanos", + perf_context.metric(PerfMetric::GetPostProcessTime) as i64, + i64 + ), + ( + // total nanos reading from output files + "get_from_output_files_nanos", + perf_context.metric(PerfMetric::GetFromOutputFilesTime) as i64, + i64 + ), + ( + // time spent on acquiring DB mutex + "db_mutex_lock_nanos", + perf_context.metric(PerfMetric::DbMutexLockNanos) as i64, + i64 + ), + ( + // time spent on waiting with a condition variable created with DB mutex. + "db_condition_wait_nanos", + perf_context.metric(PerfMetric::DbConditionWaitNanos) as i64, + i64 + ), + ( + "merge_operator_nanos", + perf_context.metric(PerfMetric::MergeOperatorTimeNanos) as i64, + i64 + ), + ( + "read_index_block_nanos", + perf_context.metric(PerfMetric::ReadIndexBlockNanos) as i64, + i64 + ), + ( + "read_filter_block_nanos", + perf_context.metric(PerfMetric::ReadFilterBlockNanos) as i64, + i64 + ), + ( + "new_table_block_iter_nanos", + perf_context.metric(PerfMetric::NewTableBlockIterNanos) as i64, + i64 + ), + ( + "block_seek_nanos", + perf_context.metric(PerfMetric::BlockSeekNanos) as i64, + i64 + ), + ( + "find_table_nanos", + perf_context.metric(PerfMetric::FindTableNanos) as i64, + i64 + ), + ( + "bloom_memtable_hit_count", + perf_context.metric(PerfMetric::BloomMemtableHitCount) as i64, + i64 + ), + ( + "bloom_memtable_miss_count", + perf_context.metric(PerfMetric::BloomMemtableMissCount) as i64, + i64 + ), + ( + "bloom_sst_hit_count", + perf_context.metric(PerfMetric::BloomSstHitCount) as i64, + i64 + ), + ( + "bloom_sst_miss_count", + perf_context.metric(PerfMetric::BloomSstMissCount) as i64, + i64 + ), + ( + "key_lock_wait_time", + perf_context.metric(PerfMetric::KeyLockWaitTime) as i64, + i64 + ), + ( + "key_lock_wait_count", + perf_context.metric(PerfMetric::KeyLockWaitCount) as i64, + i64 + ), + ( + "env_file_exists_nanos", + perf_context.metric(PerfMetric::EnvFileExistsNanos) as i64, + i64 + ), + ( + "env_get_children_nanos", + perf_context.metric(PerfMetric::EnvGetChildrenNanos) as i64, + i64 + ), + ( + "env_lock_file_nanos", + perf_context.metric(PerfMetric::EnvLockFileNanos) as i64, + i64 + ), + ( + "env_unlock_file_nanos", + perf_context.metric(PerfMetric::EnvUnlockFileNanos) as i64, + i64 + ), + ( + "total_metric_count", + perf_context.metric(PerfMetric::TotalMetricCount) as i64, + i64 + ), + ); + }); + } } impl LedgerColumn