(LedgerStore) Rate-limit RocksDB perf sample by a minimum time interval (#25100)

#### Problem
The current RocksDB read/write perf metrics do not include the total operation nanos
and thus we have to include all fields that might contribute to the total operation nanos.

#### Summary of Changes
This PR includes the total operation nanos in RocksDB's read/write perf and reduces the
number of reported fields in its perf metric.
This commit is contained in:
Yueh-Hsuan Chiang 2022-05-21 16:42:33 -07:00 committed by GitHub
parent d121a06826
commit d3dc2db9fb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 66 additions and 51 deletions

View File

@ -542,17 +542,20 @@ impl Rocks {
}
fn write(&self, batch: RWriteBatch) -> Result<()> {
let is_perf_enabled = maybe_enable_rocksdb_perf(
let op_start_instant = maybe_enable_rocksdb_perf(
self.column_options.rocks_perf_sample_interval,
&self.write_batch_perf_status,
);
let result = self.db.write(batch);
if is_perf_enabled {
report_rocksdb_write_perf(rocksdb_metric_header!(
"blockstore_rocksdb_write_perf,op=write_batch",
"write_batch",
self.column_options
));
if let Some(op_start_instant) = op_start_instant {
report_rocksdb_write_perf(
&op_start_instant.elapsed(),
rocksdb_metric_header!(
"blockstore_rocksdb_write_perf,op=write_batch",
"write_batch",
self.column_options
),
);
}
match result {
Ok(_) => Ok(()),
@ -1321,8 +1324,11 @@ where
&self.read_perf_status,
);
let result = self.backend.get_cf(self.handle(), &C::key(key));
if is_perf_enabled {
report_rocksdb_read_perf(C::rocksdb_get_perf_metric_header(&self.column_options));
if let Some(op_start_instant) = is_perf_enabled {
report_rocksdb_read_perf(
&op_start_instant.elapsed(),
C::rocksdb_get_perf_metric_header(&self.column_options),
);
}
result
}
@ -1399,8 +1405,11 @@ where
&self.write_perf_status,
);
let result = self.backend.put_cf(self.handle(), &C::key(key), value);
if is_perf_enabled {
report_rocksdb_write_perf(C::rocksdb_put_perf_metric_header(&self.column_options));
if let Some(op_start_instant) = is_perf_enabled {
report_rocksdb_write_perf(
&op_start_instant.elapsed(),
C::rocksdb_put_perf_metric_header(&self.column_options),
);
}
result
}
@ -1431,8 +1440,11 @@ where
result = Ok(Some(value))
}
if is_perf_enabled {
report_rocksdb_read_perf(C::rocksdb_get_perf_metric_header(&self.column_options));
if let Some(op_start_instant) = is_perf_enabled {
report_rocksdb_read_perf(
&op_start_instant.elapsed(),
C::rocksdb_get_perf_metric_header(&self.column_options),
);
}
result
}
@ -1448,8 +1460,11 @@ where
.backend
.put_cf(self.handle(), &C::key(key), &serialized_value);
if is_perf_enabled {
report_rocksdb_write_perf(C::rocksdb_put_perf_metric_header(&self.column_options));
if let Some(op_start_instant) = is_perf_enabled {
report_rocksdb_write_perf(
&op_start_instant.elapsed(),
C::rocksdb_put_perf_metric_header(&self.column_options),
);
}
result
}
@ -1460,8 +1475,11 @@ where
&self.write_perf_status,
);
let result = self.backend.delete_cf(self.handle(), &C::key(key));
if is_perf_enabled {
report_rocksdb_write_perf(C::rocksdb_delete_perf_metric_header(&self.column_options));
if let Some(op_start_instant) = is_perf_enabled {
report_rocksdb_write_perf(
&op_start_instant.elapsed(),
C::rocksdb_delete_perf_metric_header(&self.column_options),
);
}
result
}
@ -1480,8 +1498,11 @@ where
&self.read_perf_status,
);
let result = self.backend.get_cf(self.handle(), &C::key(key));
if is_perf_enabled {
report_rocksdb_read_perf(C::rocksdb_get_perf_metric_header(&self.column_options));
if let Some(op_start_instant) = is_perf_enabled {
report_rocksdb_read_perf(
&op_start_instant.elapsed(),
C::rocksdb_get_perf_metric_header(&self.column_options),
);
}
if let Some(serialized_value) = result? {
@ -1501,8 +1522,11 @@ where
&self.read_perf_status,
);
let result = self.backend.get_cf(self.handle(), &C::key(key));
if is_perf_enabled {
report_rocksdb_read_perf(C::rocksdb_get_perf_metric_header(&self.column_options));
if let Some(op_start_instant) = is_perf_enabled {
report_rocksdb_read_perf(
&op_start_instant.elapsed(),
C::rocksdb_get_perf_metric_header(&self.column_options),
);
}
if let Some(serialized_value) = result? {
@ -1521,8 +1545,11 @@ where
&self.write_perf_status,
);
let result = self.backend.put_cf(self.handle(), &C::key(key), &buf);
if is_perf_enabled {
report_rocksdb_write_perf(C::rocksdb_put_perf_metric_header(&self.column_options));
if let Some(op_start_instant) = is_perf_enabled {
report_rocksdb_write_perf(
&op_start_instant.elapsed(),
C::rocksdb_put_perf_metric_header(&self.column_options),
);
}
result

View File

@ -15,7 +15,7 @@ use {
atomic::{AtomicU64, AtomicUsize, Ordering},
Arc,
},
time::Duration,
time::{Duration, Instant},
},
};
@ -251,25 +251,27 @@ const PERF_SAMPLING_MIN_DURATION: Duration = Duration::from_secs(1);
pub(crate) fn maybe_enable_rocksdb_perf(
sample_interval: usize,
perf_status: &PerfSamplingStatus,
) -> bool {
) -> Option<Instant> {
if perf_status.should_sample(sample_interval) {
set_perf_stats(PerfStatsLevel::EnableTime);
PER_THREAD_ROCKS_PERF_CONTEXT.with(|perf_context| {
perf_context.borrow_mut().reset();
});
return true;
return Some(Instant::now());
}
false
None
}
/// Reports the collected PerfContext and disables the PerfContext after
/// reporting.
pub(crate) fn report_rocksdb_read_perf(metric_header: &'static str) {
pub(crate) fn report_rocksdb_read_perf(total_op_duration: &Duration, metric_header: &'static str) {
PER_THREAD_ROCKS_PERF_CONTEXT.with(|perf_context_cell| {
set_perf_stats(PerfStatsLevel::Disable);
let perf_context = perf_context_cell.borrow();
datapoint_info!(
metric_header,
// total nanos spent on the entire operation.
("total_op_nanos", total_op_duration.as_nanos() as i64, i64),
(
"user_key_comparison_count",
perf_context.metric(PerfMetric::UserKeyComparisonCount) as i64,
@ -414,29 +416,13 @@ pub(crate) fn report_rocksdb_read_perf(metric_header: &'static str) {
perf_context.metric(PerfMetric::KeyLockWaitCount) as i64,
i64
),
// nanos spent on file/directory operations.
(
"env_file_exists_nanos",
perf_context.metric(PerfMetric::EnvFileExistsNanos) as i64,
i64
),
(
"env_get_children_nanos",
perf_context.metric(PerfMetric::EnvGetChildrenNanos) as i64,
i64
),
(
"env_lock_file_nanos",
perf_context.metric(PerfMetric::EnvLockFileNanos) as i64,
i64
),
(
"env_unlock_file_nanos",
perf_context.metric(PerfMetric::EnvUnlockFileNanos) as i64,
i64
),
(
"total_metric_count",
perf_context.metric(PerfMetric::TotalMetricCount) as i64,
"env_file_ops_nanos",
(perf_context.metric(PerfMetric::EnvFileExistsNanos)
+ perf_context.metric(PerfMetric::EnvGetChildrenNanos)
+ perf_context.metric(PerfMetric::EnvLockFileNanos)
+ perf_context.metric(PerfMetric::EnvUnlockFileNanos)) as i64,
i64
),
);
@ -444,12 +430,14 @@ pub(crate) fn report_rocksdb_read_perf(metric_header: &'static str) {
}
/// Reports the collected PerfContext and disables the PerfContext after
/// reporting.
pub(crate) fn report_rocksdb_write_perf(metric_header: &'static str) {
pub(crate) fn report_rocksdb_write_perf(total_op_duration: &Duration, metric_header: &'static str) {
PER_THREAD_ROCKS_PERF_CONTEXT.with(|perf_context_cell| {
set_perf_stats(PerfStatsLevel::Disable);
let perf_context = perf_context_cell.borrow();
datapoint_info!(
metric_header,
// total nanos spent on the entire operation.
("total_op_nanos", total_op_duration.as_nanos() as i64, i64),
// total nanos spent on writing to WAL
(
"write_wal_nanos",