Entry verify cleanup and gossip counters (#10632)
* Add prune message counter * Switch to us verification time to match other counters * Add separate transaction/poh verify timing
This commit is contained in:
parent
1b7ef69bce
commit
1eca9b19ab
|
@ -226,6 +226,8 @@ struct GossipStats {
|
||||||
process_prune: Counter,
|
process_prune: Counter,
|
||||||
process_push_message: Counter,
|
process_push_message: Counter,
|
||||||
prune_received_cache: Counter,
|
prune_received_cache: Counter,
|
||||||
|
prune_message_count: Counter,
|
||||||
|
prune_message_len: Counter,
|
||||||
purge: Counter,
|
purge: Counter,
|
||||||
epoch_slots_lookup: Counter,
|
epoch_slots_lookup: Counter,
|
||||||
epoch_slots_push: Counter,
|
epoch_slots_push: Counter,
|
||||||
|
@ -1641,7 +1643,7 @@ impl ClusterInfo {
|
||||||
}
|
}
|
||||||
ret
|
ret
|
||||||
});
|
});
|
||||||
let rsp = Self::handle_push_message(self, recycler, &from, data, stakes);
|
let rsp = self.handle_push_message(recycler, &from, data, stakes);
|
||||||
if let Some(rsp) = rsp {
|
if let Some(rsp) = rsp {
|
||||||
let _ignore_disconnect = response_sender.send(rsp);
|
let _ignore_disconnect = response_sender.send(rsp);
|
||||||
}
|
}
|
||||||
|
@ -1653,11 +1655,10 @@ impl ClusterInfo {
|
||||||
Protocol::PruneMessage(from, data) => {
|
Protocol::PruneMessage(from, data) => {
|
||||||
let start = allocated.get();
|
let start = allocated.get();
|
||||||
if data.verify() {
|
if data.verify() {
|
||||||
inc_new_counter_debug!("cluster_info-prune_message", 1);
|
self.stats.prune_message_count.add_relaxed(1);
|
||||||
inc_new_counter_debug!(
|
self.stats
|
||||||
"cluster_info-prune_message-size",
|
.prune_message_len
|
||||||
data.prunes.len()
|
.add_relaxed(data.prunes.len() as u64);
|
||||||
);
|
|
||||||
match self
|
match self
|
||||||
.time_gossip_write_lock("process_prune", &self.stats.process_prune)
|
.time_gossip_write_lock("process_prune", &self.stats.process_prune)
|
||||||
.process_prune_msg(
|
.process_prune_msg(
|
||||||
|
@ -2241,6 +2242,16 @@ impl ClusterInfo {
|
||||||
self.stats.new_pull_requests_count.clear(),
|
self.stats.new_pull_requests_count.clear(),
|
||||||
i64
|
i64
|
||||||
),
|
),
|
||||||
|
(
|
||||||
|
"prune_message_count",
|
||||||
|
self.stats.prune_message_count.clear(),
|
||||||
|
i64
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"prune_message_len",
|
||||||
|
self.stats.prune_message_len.clear(),
|
||||||
|
i64
|
||||||
|
),
|
||||||
);
|
);
|
||||||
|
|
||||||
*last_print = Instant::now();
|
*last_print = Instant::now();
|
||||||
|
|
|
@ -42,7 +42,16 @@ impl ReplaySlotStats {
|
||||||
self.fetch_fail_elapsed as i64,
|
self.fetch_fail_elapsed as i64,
|
||||||
i64
|
i64
|
||||||
),
|
),
|
||||||
("entry_verification_time", self.verify_elapsed as i64, i64),
|
(
|
||||||
|
"entry_poh_verification_time",
|
||||||
|
self.poh_verify_elapsed as i64,
|
||||||
|
i64
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"entry_transaction_verification_time",
|
||||||
|
self.transaction_verify_elapsed as i64,
|
||||||
|
i64
|
||||||
|
),
|
||||||
("replay_time", self.replay_elapsed as i64, i64),
|
("replay_time", self.replay_elapsed as i64, i64),
|
||||||
(
|
(
|
||||||
"replay_total_elapsed",
|
"replay_total_elapsed",
|
||||||
|
|
|
@ -484,7 +484,8 @@ fn confirm_full_slot(
|
||||||
pub struct ConfirmationTiming {
|
pub struct ConfirmationTiming {
|
||||||
pub started: Instant,
|
pub started: Instant,
|
||||||
pub replay_elapsed: u64,
|
pub replay_elapsed: u64,
|
||||||
pub verify_elapsed: u64,
|
pub poh_verify_elapsed: u64,
|
||||||
|
pub transaction_verify_elapsed: u64,
|
||||||
pub fetch_elapsed: u64,
|
pub fetch_elapsed: u64,
|
||||||
pub fetch_fail_elapsed: u64,
|
pub fetch_fail_elapsed: u64,
|
||||||
}
|
}
|
||||||
|
@ -494,7 +495,8 @@ impl Default for ConfirmationTiming {
|
||||||
Self {
|
Self {
|
||||||
started: Instant::now(),
|
started: Instant::now(),
|
||||||
replay_elapsed: 0,
|
replay_elapsed: 0,
|
||||||
verify_elapsed: 0,
|
poh_verify_elapsed: 0,
|
||||||
|
transaction_verify_elapsed: 0,
|
||||||
fetch_elapsed: 0,
|
fetch_elapsed: 0,
|
||||||
fetch_fail_elapsed: 0,
|
fetch_fail_elapsed: 0,
|
||||||
}
|
}
|
||||||
|
@ -599,11 +601,13 @@ pub fn confirm_slot(
|
||||||
timing.replay_elapsed += replay_elapsed.as_us();
|
timing.replay_elapsed += replay_elapsed.as_us();
|
||||||
|
|
||||||
if let Some(mut verifier) = verifier {
|
if let Some(mut verifier) = verifier {
|
||||||
if !verifier.finish_verify(&entries) {
|
let verified = verifier.finish_verify(&entries);
|
||||||
|
timing.poh_verify_elapsed += verifier.poh_duration_us();
|
||||||
|
timing.transaction_verify_elapsed += verifier.transaction_duration_us();
|
||||||
|
if !verified {
|
||||||
warn!("Ledger proof of history failed at slot: {}", bank.slot());
|
warn!("Ledger proof of history failed at slot: {}", bank.slot());
|
||||||
return Err(BlockError::InvalidEntryHash.into());
|
return Err(BlockError::InvalidEntryHash.into());
|
||||||
}
|
}
|
||||||
timing.verify_elapsed += verifier.duration_ms();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
process_result?;
|
process_result?;
|
||||||
|
|
|
@ -151,12 +151,22 @@ pub fn next_hash(start_hash: &Hash, num_hashes: u64, transactions: &[Transaction
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct VerificationData {
|
pub struct GpuVerificationData {
|
||||||
thread_h: Option<JoinHandle<u64>>,
|
thread_h: Option<JoinHandle<u64>>,
|
||||||
verification_status: EntryVerificationStatus,
|
|
||||||
hashes: Option<Arc<Mutex<PinnedVec<Hash>>>>,
|
hashes: Option<Arc<Mutex<PinnedVec<Hash>>>>,
|
||||||
tx_hashes: Vec<Option<Hash>>,
|
tx_hashes: Vec<Option<Hash>>,
|
||||||
duration_ms: u64,
|
}
|
||||||
|
|
||||||
|
pub enum DeviceVerificationData {
|
||||||
|
CPU(),
|
||||||
|
GPU(GpuVerificationData),
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct EntryVerificationState {
|
||||||
|
verification_status: EntryVerificationStatus,
|
||||||
|
poh_duration_us: u64,
|
||||||
|
transaction_duration_us: u64,
|
||||||
|
device_verification_data: DeviceVerificationData,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Default, Clone)]
|
#[derive(Default, Clone)]
|
||||||
|
@ -172,33 +182,30 @@ pub enum EntryVerificationStatus {
|
||||||
Pending,
|
Pending,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub enum EntryVerificationState {
|
|
||||||
CPU(VerificationData),
|
|
||||||
GPU(VerificationData),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl EntryVerificationState {
|
impl EntryVerificationState {
|
||||||
pub fn status(&self) -> EntryVerificationStatus {
|
pub fn status(&self) -> EntryVerificationStatus {
|
||||||
match self {
|
self.verification_status
|
||||||
EntryVerificationState::CPU(state) => state.verification_status,
|
|
||||||
EntryVerificationState::GPU(state) => state.verification_status,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn duration_ms(&self) -> u64 {
|
pub fn poh_duration_us(&self) -> u64 {
|
||||||
match self {
|
self.poh_duration_us
|
||||||
EntryVerificationState::CPU(state) => state.duration_ms,
|
}
|
||||||
EntryVerificationState::GPU(state) => state.duration_ms,
|
|
||||||
}
|
pub fn set_transaction_duration_us(&mut self, new: u64) {
|
||||||
|
self.transaction_duration_us = new;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn transaction_duration_us(&self) -> u64 {
|
||||||
|
self.transaction_duration_us
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn finish_verify(&mut self, entries: &[Entry]) -> bool {
|
pub fn finish_verify(&mut self, entries: &[Entry]) -> bool {
|
||||||
match self {
|
match &mut self.device_verification_data {
|
||||||
EntryVerificationState::GPU(verification_state) => {
|
DeviceVerificationData::GPU(verification_state) => {
|
||||||
let gpu_time_ms = verification_state.thread_h.take().unwrap().join().unwrap();
|
let gpu_time_us = verification_state.thread_h.take().unwrap().join().unwrap();
|
||||||
|
|
||||||
let mut verify_check_time = Measure::start("verify_check");
|
let mut verify_check_time = Measure::start("verify_check");
|
||||||
let hashes = verification_state.hashes.take().expect("hashes.as_ref");
|
let hashes = verification_state.hashes.take().unwrap();
|
||||||
let hashes = Arc::try_unwrap(hashes)
|
let hashes = Arc::try_unwrap(hashes)
|
||||||
.expect("unwrap Arc")
|
.expect("unwrap Arc")
|
||||||
.into_inner()
|
.into_inner()
|
||||||
|
@ -225,21 +232,17 @@ impl EntryVerificationState {
|
||||||
});
|
});
|
||||||
|
|
||||||
verify_check_time.stop();
|
verify_check_time.stop();
|
||||||
verification_state.duration_ms += gpu_time_ms + verify_check_time.as_ms();
|
self.poh_duration_us += gpu_time_us + verify_check_time.as_us();
|
||||||
inc_new_counter_info!(
|
|
||||||
"entry_verify-duration",
|
|
||||||
verification_state.duration_ms as usize
|
|
||||||
);
|
|
||||||
|
|
||||||
verification_state.verification_status = if res {
|
self.verification_status = if res {
|
||||||
EntryVerificationStatus::Success
|
EntryVerificationStatus::Success
|
||||||
} else {
|
} else {
|
||||||
EntryVerificationStatus::Failure
|
EntryVerificationStatus::Failure
|
||||||
};
|
};
|
||||||
res
|
res
|
||||||
}
|
}
|
||||||
EntryVerificationState::CPU(verification_state) => {
|
DeviceVerificationData::CPU() => {
|
||||||
verification_state.verification_status == EntryVerificationStatus::Success
|
self.verification_status == EntryVerificationStatus::Success
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -290,19 +293,17 @@ impl EntrySlice for [Entry] {
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
});
|
});
|
||||||
let duration_ms = timing::duration_as_ms(&now.elapsed());
|
let poh_duration_us = timing::duration_as_us(&now.elapsed());
|
||||||
inc_new_counter_warn!("entry_verify-duration", duration_ms as usize);
|
EntryVerificationState {
|
||||||
EntryVerificationState::CPU(VerificationData {
|
|
||||||
thread_h: None,
|
|
||||||
verification_status: if res {
|
verification_status: if res {
|
||||||
EntryVerificationStatus::Success
|
EntryVerificationStatus::Success
|
||||||
} else {
|
} else {
|
||||||
EntryVerificationStatus::Failure
|
EntryVerificationStatus::Failure
|
||||||
},
|
},
|
||||||
hashes: None,
|
poh_duration_us,
|
||||||
tx_hashes: vec![],
|
transaction_duration_us: 0,
|
||||||
duration_ms,
|
device_verification_data: DeviceVerificationData::CPU(),
|
||||||
})
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn verify_transaction_signatures(&self) -> bool {
|
fn verify_transaction_signatures(&self) -> bool {
|
||||||
|
@ -324,19 +325,22 @@ impl EntrySlice for [Entry] {
|
||||||
) -> EntryVerificationState {
|
) -> EntryVerificationState {
|
||||||
let start = Instant::now();
|
let start = Instant::now();
|
||||||
let res = self.verify_transaction_signatures();
|
let res = self.verify_transaction_signatures();
|
||||||
|
let transaction_duration_us = timing::duration_as_us(&start.elapsed());
|
||||||
if !res {
|
if !res {
|
||||||
return EntryVerificationState::CPU(VerificationData {
|
return EntryVerificationState {
|
||||||
thread_h: None,
|
|
||||||
verification_status: EntryVerificationStatus::Failure,
|
verification_status: EntryVerificationStatus::Failure,
|
||||||
duration_ms: timing::duration_as_ms(&start.elapsed()),
|
transaction_duration_us,
|
||||||
hashes: None,
|
poh_duration_us: 0,
|
||||||
tx_hashes: vec![],
|
device_verification_data: DeviceVerificationData::CPU(),
|
||||||
});
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let start = Instant::now();
|
||||||
let api = perf_libs::api();
|
let api = perf_libs::api();
|
||||||
if api.is_none() {
|
if api.is_none() {
|
||||||
return self.verify_cpu(start_hash);
|
let mut res: EntryVerificationState = self.verify_cpu(start_hash);
|
||||||
|
res.set_transaction_duration_us(transaction_duration_us);
|
||||||
|
return res;
|
||||||
}
|
}
|
||||||
let api = api.unwrap();
|
let api = api.unwrap();
|
||||||
inc_new_counter_warn!("entry_verify-num_entries", self.len() as usize);
|
inc_new_counter_warn!("entry_verify-num_entries", self.len() as usize);
|
||||||
|
@ -386,11 +390,11 @@ impl EntrySlice for [Entry] {
|
||||||
if res != 0 {
|
if res != 0 {
|
||||||
panic!("GPU PoH verify many failed");
|
panic!("GPU PoH verify many failed");
|
||||||
}
|
}
|
||||||
inc_new_counter_warn!(
|
inc_new_counter_info!(
|
||||||
"entry_verify-gpu_thread",
|
"entry_verify-gpu_thread",
|
||||||
timing::duration_as_ms(&gpu_wait.elapsed()) as usize
|
timing::duration_as_us(&gpu_wait.elapsed()) as usize
|
||||||
);
|
);
|
||||||
timing::duration_as_ms(&gpu_wait.elapsed())
|
timing::duration_as_us(&gpu_wait.elapsed())
|
||||||
});
|
});
|
||||||
|
|
||||||
let tx_hashes = PAR_THREAD_POOL.with(|thread_pool| {
|
let tx_hashes = PAR_THREAD_POOL.with(|thread_pool| {
|
||||||
|
@ -407,13 +411,17 @@ impl EntrySlice for [Entry] {
|
||||||
})
|
})
|
||||||
});
|
});
|
||||||
|
|
||||||
EntryVerificationState::GPU(VerificationData {
|
let device_verification_data = DeviceVerificationData::GPU(GpuVerificationData {
|
||||||
thread_h: Some(gpu_verify_thread),
|
thread_h: Some(gpu_verify_thread),
|
||||||
verification_status: EntryVerificationStatus::Pending,
|
|
||||||
tx_hashes,
|
tx_hashes,
|
||||||
duration_ms: timing::duration_as_ms(&start.elapsed()),
|
|
||||||
hashes: Some(hashes),
|
hashes: Some(hashes),
|
||||||
})
|
});
|
||||||
|
EntryVerificationState {
|
||||||
|
verification_status: EntryVerificationStatus::Pending,
|
||||||
|
poh_duration_us: timing::duration_as_us(&start.elapsed()),
|
||||||
|
transaction_duration_us,
|
||||||
|
device_verification_data,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn verify_tick_hash_count(&self, tick_hash_count: &mut u64, hashes_per_tick: u64) -> bool {
|
fn verify_tick_hash_count(&self, tick_hash_count: &mut u64, hashes_per_tick: u64) -> bool {
|
||||||
|
|
Loading…
Reference in New Issue