From 4522e85ac408ffdfb9ee3aa734bc6a7aab77344f Mon Sep 17 00:00:00 2001 From: carllin Date: Wed, 8 Apr 2020 14:35:24 -0700 Subject: [PATCH] Add Metrics/Dashboards tracking block production (#9342) * Add metric tracking blocks/dropped blocks Co-authored-by: Carl --- core/src/consensus.rs | 11 +- core/src/progress_map.rs | 62 +- core/src/replay_stage.rs | 172 ++- ledger/src/bank_forks.rs | 18 + .../dashboards/cluster-monitor.json | 1305 +++++++++-------- runtime/src/epoch_stakes.rs | 2 +- 6 files changed, 903 insertions(+), 667 deletions(-) diff --git a/core/src/consensus.rs b/core/src/consensus.rs index 22d4dfe035..f574529f72 100644 --- a/core/src/consensus.rs +++ b/core/src/consensus.rs @@ -566,9 +566,9 @@ pub mod test { .expect("parent bank must exist") .clone(); info!("parent of {} is {}", missing_slot, parent_bank.slot(),); - progress - .entry(missing_slot) - .or_insert_with(|| ForkProgress::new(parent_bank.last_blockhash(), None, None)); + progress.entry(missing_slot).or_insert_with(|| { + ForkProgress::new(parent_bank.last_blockhash(), None, None, 0, 0) + }); // Create the missing bank let new_bank = @@ -719,7 +719,10 @@ pub mod test { bank0.freeze(); let mut progress = ProgressMap::default(); - progress.insert(0, ForkProgress::new(bank0.last_blockhash(), None, None)); + progress.insert( + 0, + ForkProgress::new(bank0.last_blockhash(), None, None, 0, 0), + ); (BankForks::new(0, bank0), progress) } diff --git a/core/src/progress_map.rs b/core/src/progress_map.rs index edf5deeeea..6ff4809661 100644 --- a/core/src/progress_map.rs +++ b/core/src/progress_map.rs @@ -85,6 +85,12 @@ pub(crate) struct ForkProgress { pub(crate) propagated_stats: PropagatedStats, pub(crate) replay_stats: ReplaySlotStats, pub(crate) replay_progress: ConfirmationProgress, + // Note `num_blocks_on_fork` and `num_dropped_blocks_on_fork` only + // count new blocks replayed since last restart, which won't include + // blocks already existing in the ledger/before snapshot at start, + // so these stats do not span all of time + pub(crate) num_blocks_on_fork: u64, + pub(crate) num_dropped_blocks_on_fork: u64, } impl ForkProgress { @@ -92,6 +98,8 @@ impl ForkProgress { last_entry: Hash, prev_leader_slot: Option, validator_stake_info: Option, + num_blocks_on_fork: u64, + num_dropped_blocks_on_fork: u64, ) -> Self { let ( is_leader_slot, @@ -124,6 +132,8 @@ impl ForkProgress { fork_stats: ForkStats::default(), replay_stats: ReplaySlotStats::default(), replay_progress: ConfirmationProgress::new(last_entry), + num_blocks_on_fork, + num_dropped_blocks_on_fork, propagated_stats: PropagatedStats { prev_leader_slot, is_leader_slot, @@ -141,6 +151,8 @@ impl ForkProgress { my_pubkey: &Pubkey, voting_pubkey: &Pubkey, prev_leader_slot: Option, + num_blocks_on_fork: u64, + num_dropped_blocks_on_fork: u64, ) -> Self { let validator_fork_info = { if bank.collector_id() == my_pubkey { @@ -155,7 +167,13 @@ impl ForkProgress { } }; - Self::new(bank.last_blockhash(), prev_leader_slot, validator_fork_info) + Self::new( + bank.last_blockhash(), + prev_leader_slot, + validator_fork_info, + num_blocks_on_fork, + num_dropped_blocks_on_fork, + ) } } @@ -352,6 +370,26 @@ impl ProgressMap { self.progress_map .retain(|k, _| bank_forks.get(*k).is_some()); } + + pub fn log_propagated_stats(&self, slot: Slot, bank_forks: &RwLock) { + if let Some(stats) = self.get_propagated_stats(slot) { + info!( + "Propagated stats: + total staked: {}, + observed staked: {}, + vote pubkeys: {:?}, + node_pubkeys: {:?}, + slot: {}, + epoch: {:?}", + stats.total_epoch_stake, + stats.propagated_validators_stake, + stats.propagated_validators, + stats.propagated_node_ids, + slot, + bank_forks.read().unwrap().get(slot).map(|x| x.epoch()), + ); + } + } } #[cfg(test)] @@ -476,7 +514,7 @@ mod test { fn test_is_propagated_status_on_construction() { // If the given ValidatorStakeInfo == None, then this is not // a leader slot and is_propagated == false - let progress = ForkProgress::new(Hash::default(), Some(9), None); + let progress = ForkProgress::new(Hash::default(), Some(9), None, 0, 0); assert!(!progress.propagated_stats.is_propagated); // If the stake is zero, then threshold is always achieved @@ -487,6 +525,8 @@ mod test { total_epoch_stake: 0, ..ValidatorStakeInfo::default() }), + 0, + 0, ); assert!(progress.propagated_stats.is_propagated); @@ -499,6 +539,8 @@ mod test { total_epoch_stake: 2, ..ValidatorStakeInfo::default() }), + 0, + 0, ); assert!(!progress.propagated_stats.is_propagated); @@ -511,6 +553,8 @@ mod test { total_epoch_stake: 2, ..ValidatorStakeInfo::default() }), + 0, + 0, ); assert!(progress.propagated_stats.is_propagated); @@ -521,6 +565,8 @@ mod test { Hash::default(), Some(9), Some(ValidatorStakeInfo::default()), + 0, + 0, ); assert!(!progress.propagated_stats.is_propagated); } @@ -531,10 +577,16 @@ mod test { // Insert new ForkProgress for slot 10 (not a leader slot) and its // previous leader slot 9 (leader slot) - progress_map.insert(10, ForkProgress::new(Hash::default(), Some(9), None)); + progress_map.insert(10, ForkProgress::new(Hash::default(), Some(9), None, 0, 0)); progress_map.insert( 9, - ForkProgress::new(Hash::default(), None, Some(ValidatorStakeInfo::default())), + ForkProgress::new( + Hash::default(), + None, + Some(ValidatorStakeInfo::default()), + 0, + 0, + ), ); // None of these slot have parents which are confirmed @@ -545,7 +597,7 @@ mod test { // The previous leader before 8, slot 7, does not exist in // progress map, so is_propagated(8) should return true as // this implies the parent is rooted - progress_map.insert(8, ForkProgress::new(Hash::default(), Some(7), None)); + progress_map.insert(8, ForkProgress::new(Hash::default(), Some(7), None, 0, 0)); assert!(progress_map.is_propagated(8)); // If we set the is_propagated = true, is_propagated should return true diff --git a/core/src/replay_stage.rs b/core/src/replay_stage.rs index 101a3e078d..1a28947905 100644 --- a/core/src/replay_stage.rs +++ b/core/src/replay_stage.rs @@ -171,6 +171,8 @@ impl ReplayStage { &my_pubkey, &vote_account, prev_leader_slot, + 0, + 0, ), ); } @@ -281,22 +283,7 @@ impl ReplayStage { for r in failure_reasons { if let HeaviestForkFailures::NoPropagatedConfirmation(slot) = r { - if let Some(latest_leader_slot) = progress.get_latest_leader_slot(slot) - { - if let Some(stats) = - progress.get_propagated_stats(latest_leader_slot) - { - info!( - "total staked: {}, observed staked: {}, vote pubkeys: {:?}, node_pubkeys: {:?}, latest_leader_slot: {}, epoch: {:?}", - stats.total_epoch_stake, - stats.propagated_validators_stake, - stats.propagated_validators, - stats.propagated_node_ids, - latest_leader_slot, - bank_forks.read().unwrap().get(latest_leader_slot).map(|x| x.epoch()), - ); - } - } + progress.log_propagated_stats(slot, &bank_forks); } } } @@ -305,9 +292,12 @@ impl ReplayStage { // Vote on a fork if let Some(ref vote_bank) = vote_bank { - subscriptions.notify_subscribers(block_commitment_cache.read().unwrap().slot(), &bank_forks); - if let Some(votable_leader) = leader_schedule_cache - .slot_leader_at(vote_bank.slot(), Some(vote_bank)) + subscriptions.notify_subscribers( + block_commitment_cache.read().unwrap().slot(), + &bank_forks, + ); + if let Some(votable_leader) = + leader_schedule_cache.slot_leader_at(vote_bank.slot(), Some(vote_bank)) { Self::log_leader_change( &my_pubkey, @@ -341,13 +331,24 @@ impl ReplayStage { // Reset onto a fork if let Some(reset_bank) = reset_bank { - if last_reset != reset_bank.last_blockhash() - { + if last_reset != reset_bank.last_blockhash() { info!( "vote bank: {:?} reset bank: {:?}", vote_bank.as_ref().map(|b| b.slot()), reset_bank.slot(), ); + let fork_progress = progress + .get(&reset_bank.slot()) + .expect("bank to reset to must exist in progress map"); + datapoint_info!( + "blocks_produced", + ("num_blocks_on_fork", fork_progress.num_blocks_on_fork, i64), + ( + "num_dropped_blocks_on_fork", + fork_progress.num_dropped_blocks_on_fork, + i64 + ), + ); Self::reset_poh_recorder( &my_pubkey, &blockstore, @@ -575,17 +576,22 @@ impl ReplayStage { ); if !Self::check_propagation_for_start_leader(poh_slot, parent_slot, progress_map) { - let latest_leader_slot = progress_map.get_latest_leader_slot(parent_slot).expect("In order for propagated check to fail, latest leader must exist in progress map"); + let latest_unconfirmed_leader_slot = progress_map.get_latest_leader_slot(parent_slot).expect("In order for propagated check to fail, latest leader must exist in progress map"); if poh_slot != skipped_slots_info.last_skipped_slot { datapoint_info!( "replay_stage-skip_leader_slot", ("slot", poh_slot, i64), ("parent_slot", parent_slot, i64), - ("latest_unconfirmed_leader", latest_leader_slot, i64) + ( + "latest_unconfirmed_leader_slot", + latest_unconfirmed_leader_slot, + i64 + ) ); + progress_map.log_propagated_stats(latest_unconfirmed_leader_slot, bank_forks); skipped_slots_info.last_skipped_slot = poh_slot; } - let bank = bank_forks.read().unwrap().get(latest_leader_slot) + let bank = bank_forks.read().unwrap().get(latest_unconfirmed_leader_slot) .expect("In order for propagated check to fail, latest leader must exist in progress map, and thus also in BankForks").clone(); // Signal retransmit @@ -599,6 +605,7 @@ impl ReplayStage { } let root_slot = bank_forks.read().unwrap().root(); + datapoint_info!("replay_stage-my_leader_slot", ("slot", poh_slot, i64),); info!( "new fork:{} parent:{} (leader) root:{}", poh_slot, parent_slot, root_slot @@ -893,13 +900,30 @@ impl ReplayStage { } let bank = bank_forks.read().unwrap().get(*bank_slot).unwrap().clone(); + let parent_slot = bank.parent_slot(); let prev_leader_slot = progress.get_bank_prev_leader_slot(&bank); - + let (num_blocks_on_fork, num_dropped_blocks_on_fork) = { + let stats = progress + .get(&parent_slot) + .expect("parent of active bank must exist in progress map"); + let num_blocks_on_fork = stats.num_blocks_on_fork + 1; + let new_dropped_blocks = bank.slot() - parent_slot - 1; + let num_dropped_blocks_on_fork = + stats.num_dropped_blocks_on_fork + new_dropped_blocks; + (num_blocks_on_fork, num_dropped_blocks_on_fork) + }; // Insert a progress entry even for slots this node is the leader for, so that // 1) confirm_forks can report confirmation, 2) we can cache computations about // this bank in `select_forks()` let bank_progress = &mut progress.entry(bank.slot()).or_insert_with(|| { - ForkProgress::new_from_bank(&bank, &my_pubkey, vote_account, prev_leader_slot) + ForkProgress::new_from_bank( + &bank, + &my_pubkey, + vote_account, + prev_leader_slot, + num_blocks_on_fork, + num_dropped_blocks_on_fork, + ) }); if bank.collector_id() != my_pubkey { let replay_result = Self::replay_blockstore_into_bank( @@ -1740,7 +1764,7 @@ pub(crate) mod tests { let bank = &bank_forks.banks[&0]; fork_progress .entry(neutral_fork.fork[0]) - .or_insert_with(|| ForkProgress::new(bank.last_blockhash(), None, None)); + .or_insert_with(|| ForkProgress::new(bank.last_blockhash(), None, None, 0, 0)); } for index in 1..neutral_fork.fork.len() { @@ -1766,7 +1790,7 @@ pub(crate) mod tests { let bank = &bank_forks.banks[&neutral_fork.fork[index]]; fork_progress .entry(bank_forks.banks[&neutral_fork.fork[index]].slot()) - .or_insert_with(|| ForkProgress::new(bank.last_blockhash(), None, None)); + .or_insert_with(|| ForkProgress::new(bank.last_blockhash(), None, None, 0, 0)); } } @@ -1806,7 +1830,9 @@ pub(crate) mod tests { let bank = &bank_forks.banks[&fork_info.fork[index]]; fork_progress .entry(bank_forks.banks[&fork_info.fork[index]].slot()) - .or_insert_with(|| ForkProgress::new(bank.last_blockhash(), None, None)); + .or_insert_with(|| { + ForkProgress::new(bank.last_blockhash(), None, None, 0, 0) + }); } } } @@ -1942,7 +1968,14 @@ pub(crate) mod tests { let mut progress = ProgressMap::default(); progress.insert( 0, - ForkProgress::new_from_bank(&bank0, bank0.collector_id(), &Pubkey::default(), None), + ForkProgress::new_from_bank( + &bank0, + bank0.collector_id(), + &Pubkey::default(), + None, + 0, + 0, + ), ); let leader_schedule_cache = Arc::new(LeaderScheduleCache::new_from_bank(&bank0)); let exit = Arc::new(AtomicBool::new(false)); @@ -1966,6 +1999,8 @@ pub(crate) mod tests { bank1.collector_id(), &validator_voting_keys.get(&bank1.collector_id()).unwrap(), Some(0), + 0, + 0, ), ); assert!(progress.get_propagated_stats(1).unwrap().is_leader_slot); @@ -2060,7 +2095,7 @@ pub(crate) mod tests { bank_forks.write().unwrap().insert(root_bank); let mut progress = ProgressMap::default(); for i in 0..=root { - progress.insert(i, ForkProgress::new(Hash::default(), None, None)); + progress.insert(i, ForkProgress::new(Hash::default(), None, None, 0, 0)); } ReplayStage::handle_new_root(root, &bank_forks, &mut progress, &None, &mut HashSet::new()); assert_eq!(bank_forks.read().unwrap().root(), root); @@ -2298,7 +2333,7 @@ pub(crate) mod tests { let last_blockhash = bank0.last_blockhash(); let mut bank0_progress = progress .entry(bank0.slot()) - .or_insert_with(|| ForkProgress::new(last_blockhash, None, None)); + .or_insert_with(|| ForkProgress::new(last_blockhash, None, None, 0, 0)); let shreds = shred_to_insert(&mint_keypair, bank0.clone()); blockstore.insert_shreds(shreds, None, false).unwrap(); let res = ReplayStage::replay_blockstore_into_bank( @@ -2626,7 +2661,10 @@ pub(crate) mod tests { // Insert the bank that contains a vote for slot 0, which confirms slot 0 bank_forks.write().unwrap().insert(bank1); - progress.insert(1, ForkProgress::new(bank0.last_blockhash(), None, None)); + progress.insert( + 1, + ForkProgress::new(bank0.last_blockhash(), None, None, 0, 0), + ); let ancestors = bank_forks.read().unwrap().ancestors(); let mut frozen_banks: Vec<_> = bank_forks .read() @@ -2999,6 +3037,8 @@ pub(crate) mod tests { total_epoch_stake, ..ValidatorStakeInfo::default() }), + 0, + 0, ), ); progress_map.insert( @@ -3010,6 +3050,8 @@ pub(crate) mod tests { total_epoch_stake, ..ValidatorStakeInfo::default() }), + 0, + 0, ), ); @@ -3085,16 +3127,22 @@ pub(crate) mod tests { bank_forks.insert(Bank::new_from_parent(&parent_bank, &Pubkey::default(), i)); progress_map.insert( i, - ForkProgress::new(Hash::default(), Some(prev_leader_slot), { - if i % 2 == 0 { - Some(ValidatorStakeInfo { - total_epoch_stake, - ..ValidatorStakeInfo::default() - }) - } else { - None - } - }), + ForkProgress::new( + Hash::default(), + Some(prev_leader_slot), + { + if i % 2 == 0 { + Some(ValidatorStakeInfo { + total_epoch_stake, + ..ValidatorStakeInfo::default() + }) + } else { + None + } + }, + 0, + 0, + ), ); } @@ -3167,6 +3215,8 @@ pub(crate) mod tests { total_epoch_stake, ..ValidatorStakeInfo::default() }), + 0, + 0, ); let end_range = { @@ -3222,7 +3272,7 @@ pub(crate) mod tests { // If there is no previous leader slot (previous leader slot is None), // should succeed - progress_map.insert(3, ForkProgress::new(Hash::default(), None, None)); + progress_map.insert(3, ForkProgress::new(Hash::default(), None, None, 0, 0)); assert!(ReplayStage::check_propagation_for_start_leader( poh_slot, parent_slot, @@ -3232,7 +3282,13 @@ pub(crate) mod tests { // If the parent was itself the leader, then requires propagation confirmation progress_map.insert( 3, - ForkProgress::new(Hash::default(), None, Some(ValidatorStakeInfo::default())), + ForkProgress::new( + Hash::default(), + None, + Some(ValidatorStakeInfo::default()), + 0, + 0, + ), ); assert!(!ReplayStage::check_propagation_for_start_leader( poh_slot, @@ -3252,10 +3308,16 @@ pub(crate) mod tests { // Now, set up the progress map to show that the previous leader slot of 5 is // 2 (even though the parent is 3), so 2 needs to see propagation confirmation // before we can start a leader for block 5 - progress_map.insert(3, ForkProgress::new(Hash::default(), Some(2), None)); + progress_map.insert(3, ForkProgress::new(Hash::default(), Some(2), None, 0, 0)); progress_map.insert( 2, - ForkProgress::new(Hash::default(), None, Some(ValidatorStakeInfo::default())), + ForkProgress::new( + Hash::default(), + None, + Some(ValidatorStakeInfo::default()), + 0, + 0, + ), ); // Last leader slot has not seen propagation threshold, so should fail @@ -3308,11 +3370,23 @@ pub(crate) mod tests { // which means 3 and 4 are consecutiive leader slots progress_map.insert( 3, - ForkProgress::new(Hash::default(), None, Some(ValidatorStakeInfo::default())), + ForkProgress::new( + Hash::default(), + None, + Some(ValidatorStakeInfo::default()), + 0, + 0, + ), ); progress_map.insert( 2, - ForkProgress::new(Hash::default(), None, Some(ValidatorStakeInfo::default())), + ForkProgress::new( + Hash::default(), + None, + Some(ValidatorStakeInfo::default()), + 0, + 0, + ), ); // If the last leader slot has not seen propagation threshold, but diff --git a/ledger/src/bank_forks.rs b/ledger/src/bank_forks.rs index 097fd5f7d8..4a221d7810 100644 --- a/ledger/src/bank_forks.rs +++ b/ledger/src/bank_forks.rs @@ -180,12 +180,30 @@ impl BankForks { root: Slot, snapshot_package_sender: &Option, ) { + let old_epoch = self.root_bank().epoch(); self.root = root; let set_root_start = Instant::now(); let root_bank = self .banks .get(&root) .expect("root bank didn't exist in bank_forks"); + let new_epoch = root_bank.epoch(); + if old_epoch != new_epoch { + info!( + "Root entering + epoch: {}, + next_epoch_start_slot: {}, + epoch_stakes: {:#?}", + new_epoch, + root_bank + .epoch_schedule() + .get_first_slot_in_epoch(new_epoch + 1), + root_bank + .epoch_stakes(new_epoch) + .unwrap() + .node_id_to_vote_accounts() + ); + } let root_tx_count = root_bank .parents() .last() diff --git a/metrics/scripts/grafana-provisioning/dashboards/cluster-monitor.json b/metrics/scripts/grafana-provisioning/dashboards/cluster-monitor.json index 901bc429fa..6c46bf7b60 100644 --- a/metrics/scripts/grafana-provisioning/dashboards/cluster-monitor.json +++ b/metrics/scripts/grafana-provisioning/dashboards/cluster-monitor.json @@ -4618,7 +4618,7 @@ }, "yaxes": [ { - "format": "\u00b5s", + "format": "µs", "label": null, "logBase": 1, "max": null, @@ -5385,7 +5385,7 @@ }, "yaxes": [ { - "format": "\u00b5s", + "format": "µs", "label": null, "logBase": 1, "max": null, @@ -5752,7 +5752,7 @@ }, "yaxes": [ { - "format": "\u00b5s", + "format": "µs", "label": null, "logBase": 1, "max": null, @@ -5793,7 +5793,7 @@ "x": 16, "y": 62 }, - "id": 45, + "id": 71, "legend": { "alignAsTable": false, "avg": false, @@ -5812,7 +5812,16 @@ "pointradius": 2, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "replay-slot-stats.total_shreds", + "yaxis": 2 + }, + { + "alias": "replay-slot-stats.total_entries", + "yaxis": 2 + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, @@ -5836,7 +5845,7 @@ "measurement": "cluster_info-vote-count", "orderByTime": "ASC", "policy": "autogen", - "query": "SELECT mean(\"count\") FROM \"$testnet\".\"autogen\".\"bank-forks_set_root_ms\" WHERE host_id::tag =~ /$hostid/ AND $timeFilter GROUP BY time($__interval)", + "query": "SELECT mean(\"repair-total\") AS \"repair-total\" FROM \"$testnet\".\"autogen\".\"serve_repair-repair\" WHERE host_id::tag =~ /$hostid/ AND $timeFilter GROUP BY time($__interval)", "rawQuery": true, "refId": "A", "resultFormat": "time_series", @@ -5875,7 +5884,7 @@ "measurement": "cluster_info-vote-count", "orderByTime": "ASC", "policy": "autogen", - "query": "SELECT mean(\"squash_accounts_ms\") AS \"squash_account\" FROM \"$testnet\".\"autogen\".\"tower-observed\" WHERE host_id::tag =~ /$hostid/ AND $timeFilter GROUP BY time($__interval)", + "query": "SELECT mean(\"shred-count\") AS \"shred-count\" FROM \"$testnet\".\"autogen\".\"serve_repair-repair\" WHERE host_id::tag =~ /$hostid/ AND $timeFilter GROUP BY time($__interval)", "rawQuery": true, "refId": "B", "resultFormat": "time_series", @@ -5914,7 +5923,7 @@ "measurement": "cluster_info-vote-count", "orderByTime": "ASC", "policy": "autogen", - "query": "SELECT mean(\"count\") AS \"serialize_bank\" FROM \"$testnet\".\"autogen\".\"bank-serialize-ms\" WHERE host_id::tag =~ /$hostid/ AND $timeFilter GROUP BY time($__interval)", + "query": "SELECT mean(\"highest-shred-count\") AS \"highest-shred-count\" FROM \"$testnet\".\"autogen\".\"serve_repair-repair\" WHERE host_id::tag =~ /$hostid/ AND $timeFilter GROUP BY time($__interval)", "rawQuery": true, "refId": "C", "resultFormat": "time_series", @@ -5953,7 +5962,7 @@ "measurement": "cluster_info-vote-count", "orderByTime": "ASC", "policy": "autogen", - "query": "SELECT mean(\"count\") AS \"add_snapshot_ms\" FROM \"$testnet\".\"autogen\".\"add-snapshot-ms\" WHERE host_id::tag =~ /$hostid/ AND $timeFilter GROUP BY time($__interval)", + "query": "SELECT mean(\"orphan-count\") AS \"orphan-count\" FROM \"$testnet\".\"autogen\".\"serve_repair-repair\" WHERE host_id::tag =~ /$hostid/ AND $timeFilter GROUP BY time($__interval)", "rawQuery": true, "refId": "C", "resultFormat": "time_series", @@ -5992,7 +6001,7 @@ "measurement": "cluster_info-vote-count", "orderByTime": "ASC", "policy": "autogen", - "query": "SELECT mean(\"duration\") AS \"serialize_account_storage\" FROM \"$testnet\".\"autogen\".\"serialize_account_storage_ms\" WHERE host_id::tag =~ /$hostid/ AND $timeFilter GROUP BY time($__interval)", + "query": "SELECT mean(\"repair-highest-slot\") AS \"repair-highest-slot\" FROM \"$testnet\".\"autogen\".\"serve_repair-repair\" WHERE host_id::tag =~ /$hostid/ AND $timeFilter GROUP BY time($__interval)", "rawQuery": true, "refId": "C", "resultFormat": "time_series", @@ -6031,9 +6040,9 @@ "measurement": "cluster_info-vote-count", "orderByTime": "ASC", "policy": "autogen", - "query": "SELECT mean(\"squash_cache_ms\") AS \"squash_cache\" FROM \"$testnet\".\"autogen\".\"tower-observed\" WHERE host_id::tag =~ /$hostid/ AND $timeFilter GROUP BY time($__interval)", + "query": "SELECT mean(\"repair-orphan\") AS \"repair-orphan\" FROM \"$testnet\".\"autogen\".\"serve_repair-repair\" WHERE host_id::tag =~ /$hostid/ AND $timeFilter GROUP BY time($__interval)", "rawQuery": true, - "refId": "C", + "refId": "D", "resultFormat": "time_series", "select": [ [ @@ -6055,7 +6064,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Time spent in squashing ($hostid)", + "title": "Repair Stats", "tooltip": { "shared": true, "sort": 0, @@ -6071,7 +6080,7 @@ }, "yaxes": [ { - "format": "ms", + "format": "none", "label": null, "logBase": 1, "max": null, @@ -6727,7 +6736,7 @@ }, "yaxes": [ { - "format": "\u00b5s", + "format": "µs", "label": null, "logBase": 1, "max": null, @@ -6748,126 +6757,12 @@ "alignLevel": null } }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { - "h": 5, - "w": 8, - "x": 16, - "y": 68 - }, - "id": 48, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "measurement": "cluster_info-vote-count", - "orderByTime": "ASC", - "policy": "autogen", - "query": "SELECT sum(\"recovered\") AS \"recovered\" FROM \"$testnet\".\"autogen\".\"blockstore-erasure\" WHERE host_id::tag =~ /$hostid/ AND $timeFilter GROUP BY time($__interval) FILL(0)", - "rawQuery": true, - "refId": "B", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "count" - ], - "type": "field" - }, - { - "params": [], - "type": "sum" - } - ] - ], - "tags": [] - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Erasure Recovery ($hostid)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, { "aliasColors": { "cluster-info.repair": "#ba43a9", + "replay_stage-new_leader.last": "#00ffbb", + "tower-observed.squash_account": "#0a437c", + "tower-observed.squash_cache": "#ea6460", "window-service.receive": "#b7dbab", "window-stage.consumed": "#5195ce" }, @@ -6876,450 +6771,13 @@ "dashes": false, "datasource": "$datasource", "fill": 1, - "gridPos": { - "h": 5, - "w": 8, - "x": 16, - "y": 73 - }, - "id": 49, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": false, - "linewidth": 1, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 1, - "points": true, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "serve_repair-repair_highest.ix", - "yaxis": 2 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT last(\"repair-highest-slot\") AS \"slot\" FROM \"$testnet\".\"autogen\".\"serve_repair-repair\" WHERE host_id::tag =~ /$hostid/ AND $timeFilter GROUP BY time($__interval)", - "rawQuery": true, - "refId": "C", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "tags": [] - }, - { - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT last(\"repair-highest-ix\") AS \"ix\" FROM \"$testnet\".\"autogen\".\"serve_repair-repair\" WHERE host_id::tag =~ /$hostid/ AND $timeFilter GROUP BY time($__interval)", - "rawQuery": true, - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "tags": [] - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Repair highest index in slot ($hostid)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "cluster-info.repair": "#ba43a9", - "window-service.receive": "#b7dbab", - "window-stage.consumed": "#5195ce" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { - "h": 5, - "w": 8, - "x": 0, - "y": 74 - }, - "id": 50, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": false, - "linewidth": 1, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 1, - "points": true, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "serve_repair-repair.repair-ix", - "yaxis": 2 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT last(\"repair-ix\") AS \"repair-ix\" FROM \"$testnet\".\"autogen\".\"serve_repair-repair\" WHERE host_id::tag =~ /$hostid/ AND $timeFilter GROUP BY time($__interval)", - "rawQuery": true, - "refId": "C", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "tags": [] - }, - { - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT last(\"repair-slot\") AS \"repair-slot\" FROM \"$testnet\".\"autogen\".\"serve_repair-repair\" WHERE host_id::tag =~ /$hostid/ AND $timeFilter GROUP BY time($__interval)", - "rawQuery": true, - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "tags": [] - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Repair slot and index ($hostid)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "cluster-info.repair": "#ba43a9", - "window-service.receive": "#b7dbab", - "window-stage.consumed": "#5195ce" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { - "h": 5, - "w": 8, - "x": 8, - "y": 74 - }, - "id": 51, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": false, - "linewidth": 1, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 1, - "points": true, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "orderByTime": "ASC", - "policy": "default", - "query": "SELECT last(\"repair-orphan\") AS \"slot\" FROM \"$testnet\".\"autogen\".\"serve_repair-repair\" WHERE host_id::tag =~ /$hostid/ AND $timeFilter GROUP BY time($__interval)", - "rawQuery": true, - "refId": "C", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "tags": [] - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Repair detached heads ($hostid)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, "gridPos": { "h": 6, "w": 8, "x": 16, - "y": 78 + "y": 68 }, - "id": 52, + "id": 45, "legend": { "alignAsTable": false, "avg": false, @@ -7333,9 +6791,9 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "nullPointMode": "connected", "percentage": false, - "pointradius": 5, + "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], @@ -7358,10 +6816,11 @@ "type": "fill" } ], + "hide": false, "measurement": "cluster_info-vote-count", "orderByTime": "ASC", "policy": "autogen", - "query": "SELECT sum(\"count\") AS \"retransmit\" FROM \"$testnet\".\"autogen\".\"streamer-recv_window-retransmit\" WHERE $timeFilter GROUP BY time($__interval) FILL(0)\n", + "query": "SELECT mean(\"count\") FROM \"$testnet\".\"autogen\".\"bank-forks_set_root_ms\" WHERE host_id::tag =~ /$hostid/ AND $timeFilter GROUP BY time($__interval)", "rawQuery": true, "refId": "A", "resultFormat": "time_series", @@ -7396,9 +6855,11 @@ "type": "fill" } ], + "hide": false, + "measurement": "cluster_info-vote-count", "orderByTime": "ASC", - "policy": "default", - "query": "SELECT sum(\"count\") AS \"window receive\" FROM \"$testnet\".\"autogen\".\"streamer-recv_window-recv\" WHERE $timeFilter GROUP BY time($__interval) FILL(0)\n", + "policy": "autogen", + "query": "SELECT mean(\"squash_accounts_ms\") AS \"squash_account\" FROM \"$testnet\".\"autogen\".\"tower-observed\" WHERE host_id::tag =~ /$hostid/ AND $timeFilter GROUP BY time($__interval)", "rawQuery": true, "refId": "B", "resultFormat": "time_series", @@ -7406,13 +6867,13 @@ [ { "params": [ - "value" + "count" ], "type": "field" }, { "params": [], - "type": "mean" + "type": "sum" } ] ], @@ -7433,9 +6894,11 @@ "type": "fill" } ], + "hide": false, + "measurement": "cluster_info-vote-count", "orderByTime": "ASC", - "policy": "default", - "query": "SELECT sum(\"count\") AS \"broadcast sent\" FROM \"$testnet\".\"autogen\".\"streamer-broadcast-sent\" WHERE $timeFilter GROUP BY time($__interval) FILL(0)\n", + "policy": "autogen", + "query": "SELECT mean(\"count\") AS \"serialize_bank\" FROM \"$testnet\".\"autogen\".\"bank-serialize-ms\" WHERE host_id::tag =~ /$hostid/ AND $timeFilter GROUP BY time($__interval)", "rawQuery": true, "refId": "C", "resultFormat": "time_series", @@ -7443,13 +6906,130 @@ [ { "params": [ - "value" + "count" ], "type": "field" }, { "params": [], - "type": "mean" + "type": "sum" + } + ] + ], + "tags": [] + }, + { + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "hide": false, + "measurement": "cluster_info-vote-count", + "orderByTime": "ASC", + "policy": "autogen", + "query": "SELECT mean(\"count\") AS \"add_snapshot_ms\" FROM \"$testnet\".\"autogen\".\"add-snapshot-ms\" WHERE host_id::tag =~ /$hostid/ AND $timeFilter GROUP BY time($__interval)", + "rawQuery": true, + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "count" + ], + "type": "field" + }, + { + "params": [], + "type": "sum" + } + ] + ], + "tags": [] + }, + { + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "hide": false, + "measurement": "cluster_info-vote-count", + "orderByTime": "ASC", + "policy": "autogen", + "query": "SELECT mean(\"duration\") AS \"serialize_account_storage\" FROM \"$testnet\".\"autogen\".\"serialize_account_storage_ms\" WHERE host_id::tag =~ /$hostid/ AND $timeFilter GROUP BY time($__interval)", + "rawQuery": true, + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "count" + ], + "type": "field" + }, + { + "params": [], + "type": "sum" + } + ] + ], + "tags": [] + }, + { + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "hide": false, + "measurement": "cluster_info-vote-count", + "orderByTime": "ASC", + "policy": "autogen", + "query": "SELECT mean(\"squash_cache_ms\") AS \"squash_cache\" FROM \"$testnet\".\"autogen\".\"tower-observed\" WHERE host_id::tag =~ /$hostid/ AND $timeFilter GROUP BY time($__interval)", + "rawQuery": true, + "refId": "C", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "count" + ], + "type": "field" + }, + { + "params": [], + "type": "sum" } ] ], @@ -7459,7 +7039,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Send/Receive/Retransmit", + "title": "Time spent in squashing ($hostid)", "tooltip": { "shared": true, "sort": 0, @@ -7475,7 +7055,7 @@ }, "yaxes": [ { - "format": "short", + "format": "ms", "label": null, "logBase": 1, "max": null, @@ -7521,7 +7101,7 @@ "h": 5, "w": 8, "x": 0, - "y": 79 + "y": 74 }, "id": 53, "legend": { @@ -7823,7 +7403,7 @@ "h": 5, "w": 8, "x": 8, - "y": 79 + "y": 74 }, "id": 54, "legend": { @@ -7965,13 +7545,521 @@ "alignLevel": null } }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + "h": 5, + "w": 8, + "x": 16, + "y": 74 + }, + "id": 48, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "cluster_info-vote-count", + "orderByTime": "ASC", + "policy": "autogen", + "query": "SELECT sum(\"recovered\") AS \"recovered\" FROM \"$testnet\".\"autogen\".\"blockstore-erasure\" WHERE host_id::tag =~ /$hostid/ AND $timeFilter GROUP BY time($__interval) FILL(0)", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "count" + ], + "type": "field" + }, + { + "params": [], + "type": "sum" + } + ] + ], + "tags": [] + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Erasure Recovery ($hostid)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + "h": 6, + "w": 8, + "x": 0, + "y": 79 + }, + "id": 61, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "cluster_info-vote-count", + "orderByTime": "ASC", + "policy": "autogen", + "query": "SELECT max(\"num_blocks_on_fork\") as \"num_blocks\" FROM \"$testnet\".\"autogen\".\"blocks_produced\" WHERE host_id::tag =~ /$hostid/ AND $timeFilter GROUP BY time(1s) fill(null)\n\n\n\n", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "count" + ], + "type": "field" + }, + { + "params": [], + "type": "sum" + } + ] + ], + "tags": [] + }, + { + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "cluster_info-vote-count", + "orderByTime": "ASC", + "policy": "autogen", + "query": "SELECT max(\"num_dropped_blocks_on_fork\") as \"num_dropped_blocks\" FROM \"$testnet\".\"autogen\".\"blocks_produced\" WHERE host_id::tag =~ /$hostid/ AND $timeFilter GROUP BY time(1s) fill(null)\n\n\n\n", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "count" + ], + "type": "field" + }, + { + "params": [], + "type": "sum" + } + ] + ], + "tags": [] + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Block Production ($hostid)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "(Must pick a host id for this to make sense)", + "fill": 1, + "gridPos": { + "h": 6, + "w": 8, + "x": 8, + "y": 79 + }, + "id": 73, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "cluster_info-vote-count", + "orderByTime": "ASC", + "policy": "autogen", + "query": "SELECT count(\"slot\") AS \"num_my_leader_slots\" FROM \"$testnet\".\"autogen\".\"replay_stage-my_leader_slot\" WHERE host_id::tag =~ /$hostid/ AND $timeFilter GROUP BY time(1s) fill(null)\n", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "count" + ], + "type": "field" + }, + { + "params": [], + "type": "sum" + } + ] + ], + "tags": [] + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "My Leader Slots ($hostid)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 79 + }, + "id": 52, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "measurement": "cluster_info-vote-count", + "orderByTime": "ASC", + "policy": "autogen", + "query": "SELECT count(\"slot\") AS \"num_skipped\" FROM \"$testnet\".\"autogen\".\"replay_stage-skip_leader_slot\" WHERE host_id::tag =~ /$hostid/ AND $timeFilter GROUP BY time(1s) fill(null)\n", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "count" + ], + "type": "field" + }, + { + "params": [], + "type": "sum" + } + ] + ], + "tags": [] + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Skipped Leader Slots ($hostid)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, { "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 84 + "y": 85 }, "id": 55, "panels": [], @@ -7994,7 +8082,7 @@ "h": 5, "w": 8, "x": 0, - "y": 85 + "y": 86 }, "id": 56, "legend": { @@ -8154,7 +8242,7 @@ "h": 5, "w": 8, "x": 8, - "y": 85 + "y": 86 }, "id": 57, "legend": { @@ -8314,7 +8402,7 @@ "h": 5, "w": 8, "x": 16, - "y": 85 + "y": 86 }, "id": 58, "legend": { @@ -8499,7 +8587,7 @@ "h": 1, "w": 24, "x": 0, - "y": 90 + "y": 91 }, "id": 59, "panels": [], @@ -8518,7 +8606,7 @@ "h": 5, "w": 12, "x": 0, - "y": 91 + "y": 92 }, "id": 60, "legend": { @@ -8751,9 +8839,9 @@ "h": 5, "w": 12, "x": 12, - "y": 91 + "y": 92 }, - "id": 61, + "id": 72, "legend": { "alignAsTable": false, "avg": false, @@ -8798,7 +8886,7 @@ "measurement": "cluster_info-vote-count", "orderByTime": "ASC", "policy": "autogen", - "query": "SELECT mean(\"in_octets\") as \"recv\" FROM \"$testnet\".\"autogen\".\"net-stats\" WHERE $timeFilter GROUP BY time(1s) fill(null)\n\n\n\n", + "query": "SELECT mean(\"in_octets\") as \"recv\" FROM \"$testnet\".\"autogen\".\"net-stats\" WHERE $timeFilter GROUP BY time(1s) fill(null)\n\n", "rawQuery": true, "refId": "A", "resultFormat": "time_series", @@ -8836,7 +8924,7 @@ "measurement": "cluster_info-vote-count", "orderByTime": "ASC", "policy": "autogen", - "query": "SELECT mean(\"out_octets\") as \"sent\" FROM \"$testnet\".\"autogen\".\"net-stats\" WHERE $timeFilter GROUP BY time(1s) fill(null)\n\n\n\n", + "query": "SELECT mean(\"out_octets\") as \"sent\" FROM \"$testnet\".\"autogen\".\"net-stats\" WHERE $timeFilter GROUP BY time(1s) fill(null)\n\n", "rawQuery": true, "refId": "B", "resultFormat": "time_series", @@ -8904,7 +8992,7 @@ "h": 1, "w": 24, "x": 0, - "y": 96 + "y": 97 }, "id": 62, "panels": [], @@ -8922,7 +9010,7 @@ "h": 5, "w": 12, "x": 0, - "y": 97 + "y": 98 }, "id": 63, "legend": { @@ -9124,7 +9212,7 @@ "h": 5, "w": 12, "x": 12, - "y": 97 + "y": 98 }, "id": 64, "legend": { @@ -9273,7 +9361,7 @@ "h": 1, "w": 24, "x": 0, - "y": 102 + "y": 103 }, "id": 65, "panels": [], @@ -9291,7 +9379,7 @@ "h": 6, "w": 8, "x": 0, - "y": 103 + "y": 104 }, "id": 66, "legend": { @@ -9483,7 +9571,7 @@ "h": 6, "w": 8, "x": 8, - "y": 103 + "y": 104 }, "id": 67, "legend": { @@ -9751,7 +9839,7 @@ "h": 6, "w": 8, "x": 16, - "y": 103 + "y": 104 }, "id": 68, "legend": { @@ -9940,7 +10028,7 @@ "h": 1, "w": 24, "x": 0, - "y": 109 + "y": 110 }, "id": 69, "panels": [], @@ -9958,7 +10046,7 @@ "h": 4, "w": 8, "x": 0, - "y": 110 + "y": 111 }, "id": 70, "legend": { @@ -10181,8 +10269,8 @@ "list": [ { "current": { - "text": "$datasource", - "value": "$datasource" + "text": "Solana Metrics (read-only)", + "value": "Solana Metrics (read-only)" }, "hide": 1, "label": "Data Source", @@ -10218,6 +10306,7 @@ }, { "allValue": ".*", + "current": {}, "datasource": "$datasource", "hide": 0, "includeAll": true, @@ -10270,4 +10359,4 @@ "title": "Cluster Telemetry (edge)", "uid": "monitor-edge", "version": 2 -} +} \ No newline at end of file diff --git a/runtime/src/epoch_stakes.rs b/runtime/src/epoch_stakes.rs index 2d0d33e2d6..7b95ecabe1 100644 --- a/runtime/src/epoch_stakes.rs +++ b/runtime/src/epoch_stakes.rs @@ -7,7 +7,7 @@ use std::{collections::HashMap, sync::Arc}; pub type NodeIdToVoteAccounts = HashMap; pub type EpochAuthorizedVoters = HashMap; -#[derive(Clone, Serialize, Deserialize, Default, PartialEq)] +#[derive(Clone, Serialize, Debug, Deserialize, Default, PartialEq)] pub struct NodeVoteAccounts { pub vote_accounts: Vec, pub total_stake: u64,