From 87598c761271fdf497006c89e5230221ec1c0bf0 Mon Sep 17 00:00:00 2001 From: Justin Starry Date: Fri, 17 Jan 2020 15:26:50 +0800 Subject: [PATCH] Consolidate tx error counters and update metrics dashboard (#7724) automerge --- core/src/poh_recorder.rs | 2 +- .../dashboards/testnet-monitor.json | 129 ++++++++++++++++-- runtime/benches/bank.rs | 5 +- runtime/src/accounts.rs | 39 +----- runtime/src/accounts_db.rs | 14 +- runtime/src/bank.rs | 100 +++++++++----- runtime/src/blockhash_queue.rs | 26 ++-- 7 files changed, 223 insertions(+), 92 deletions(-) diff --git a/core/src/poh_recorder.rs b/core/src/poh_recorder.rs index 25e7fcf1c..a54708ee5 100644 --- a/core/src/poh_recorder.rs +++ b/core/src/poh_recorder.rs @@ -1397,7 +1397,7 @@ mod tests { poh_recorder.tick(); } poh_recorder.set_bank(&bank.clone()); - assert!(!bank.check_hash_age(&genesis_hash, 1)); + assert_eq!(Some(false), bank.check_hash_age(&genesis_hash, 1)); } } diff --git a/metrics/scripts/grafana-provisioning/dashboards/testnet-monitor.json b/metrics/scripts/grafana-provisioning/dashboards/testnet-monitor.json index edda3cc56..5b2487e57 100644 --- a/metrics/scripts/grafana-provisioning/dashboards/testnet-monitor.json +++ b/metrics/scripts/grafana-provisioning/dashboards/testnet-monitor.json @@ -3599,7 +3599,7 @@ "policy": "default", "query": "SELECT sum(\"count\") AS \"total_errors\" FROM \"$testnet\".\"autogen\".\"bank-process_transactions-error_count\" WHERE $timeFilter GROUP BY time(1s) FILL(0)\n", "rawQuery": true, - "refId": "B", + "refId": "A", "resultFormat": "time_series", "select": [ [ @@ -3634,9 +3634,9 @@ ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT sum(\"count\") AS \"blockhash_too_old\" FROM \"$testnet\".\"autogen\".\"bank-process_transactions-error-reserve_blockhash\" WHERE $timeFilter GROUP BY time(1s) FILL(0)\n", + "query": "SELECT sum(\"count\") AS \"account_in_use\" FROM \"$testnet\".\"autogen\".\"bank-process_transactions-account_in_use\" WHERE $timeFilter GROUP BY time(1s) FILL(0)\n\n", "rawQuery": true, - "refId": "A", + "refId": "B", "resultFormat": "time_series", "select": [ [ @@ -3708,7 +3708,7 @@ ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT sum(\"count\") AS \"blockhash_not_found\" FROM \"$testnet\".\"autogen\".\"bank-process_transactions-error-blockhash_not_found\" WHERE $timeFilter GROUP BY time(1s) FILL(0)\n", + "query": "SELECT sum(\"count\") AS \"account_not_found\" FROM \"$testnet\".\"autogen\".\"bank-process_transactions-account_not_found\" WHERE $timeFilter GROUP BY time(1s) FILL(0)\n\n\n", "rawQuery": true, "refId": "D", "resultFormat": "time_series", @@ -3745,7 +3745,7 @@ ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT sum(\"count\") AS \"duplicate_signature\" FROM \"$testnet\".\"autogen\".\"bank-process_transactions-error-duplicate_signature\" WHERE $timeFilter GROUP BY time(1s) FILL(0)\n", + "query": "SELECT sum(\"count\") AS \"blockhash_not_found\" FROM \"$testnet\".\"autogen\".\"bank-process_transactions-error-blockhash_not_found\" WHERE $timeFilter GROUP BY time(1s) FILL(0)\n", "rawQuery": true, "refId": "E", "resultFormat": "time_series", @@ -3782,7 +3782,7 @@ ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT sum(\"count\") AS \"insufficient_funds\" FROM \"$testnet\".\"autogen\".\"bank-process_transactions-error-insufficient_funds\" WHERE $timeFilter GROUP BY time(1s) FILL(0)\n", + "query": "SELECT sum(\"count\") AS \"blockhash_too_old\" FROM \"$testnet\".\"autogen\".\"bank-process_transactions-error-blockhash_too_old\" WHERE $timeFilter GROUP BY time(1s) FILL(0)\n", "rawQuery": true, "refId": "F", "resultFormat": "time_series", @@ -3819,7 +3819,7 @@ ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT sum(\"count\") AS \"account_in_use\" FROM \"$testnet\".\"autogen\".\"bank-process_transactions-account_in_use\" WHERE $timeFilter GROUP BY time(1s) FILL(0)\n", + "query": "SELECT sum(\"count\") AS \"duplicate_signature\" FROM \"$testnet\".\"autogen\".\"bank-process_transactions-error-duplicate_signature\" WHERE $timeFilter GROUP BY time(1s) FILL(0)\n\n\n\n", "rawQuery": true, "refId": "G", "resultFormat": "time_series", @@ -3856,7 +3856,7 @@ ], "orderByTime": "ASC", "policy": "default", - "query": "SELECT sum(\"count\") AS \"account_not_found\" FROM \"$testnet\".\"autogen\".\"bank-process_transactions-account_not_found\" WHERE $timeFilter GROUP BY time(1s) FILL(0)\n\n", + "query": "SELECT sum(\"count\") AS \"instruction_error\" FROM \"$testnet\".\"autogen\".\"bank-process_transactions-error-instruction_error\" WHERE $timeFilter GROUP BY time(1s) FILL(0)\n", "rawQuery": true, "refId": "H", "resultFormat": "time_series", @@ -3875,6 +3875,117 @@ ] ], "tags": [] + }, + { + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "orderByTime": "ASC", + "policy": "default", + "query": "SELECT sum(\"count\") AS \"insufficient_funds\" FROM \"$testnet\".\"autogen\".\"bank-process_transactions-error-insufficient_funds\" WHERE $timeFilter GROUP BY time(1s) FILL(0)\n", + "rawQuery": true, + "refId": "I", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + }, + { + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "orderByTime": "ASC", + "policy": "default", + "query": "SELECT sum(\"count\") AS \"invalid_account_for_fee\" FROM \"$testnet\".\"autogen\".\"bank-process_transactions-error-invalid_account_for_fee\" WHERE $timeFilter GROUP BY time(1s) FILL(0)\n", + "rawQuery": true, + "refId": "J", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] + }, + { + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "orderByTime": "ASC", + "policy": "default", + "query": "SELECT sum(\"count\") AS \"invalid_account_index\" FROM \"$testnet\".\"autogen\".\"bank-process_transactions-error-invalid_account_index\" WHERE $timeFilter GROUP BY time(1s) FILL(0)\n", + "rawQuery": true, + "refId": "K", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "tags": [] } ], "thresholds": [], @@ -10161,4 +10272,4 @@ "title": "Testnet Monitor (edge)", "uid": "testnet-edge", "version": 2 -} \ No newline at end of file +} diff --git a/runtime/benches/bank.rs b/runtime/benches/bank.rs index a2c3b0a09..7412d3d8c 100644 --- a/runtime/benches/bank.rs +++ b/runtime/benches/bank.rs @@ -192,7 +192,10 @@ fn bench_bank_update_recent_blockhashes(bencher: &mut Bencher) { goto_end_of_slot(Arc::get_mut(&mut bank).unwrap()); } // Verify blockhash_queue is full (genesis hash has been kicked out) - assert!(!bank.check_hash_age(&genesis_hash, MAX_RECENT_BLOCKHASHES)); + assert_eq!( + Some(false), + bank.check_hash_age(&genesis_hash, MAX_RECENT_BLOCKHASHES) + ); bencher.iter(|| { bank.update_recent_blockhashes(); }); diff --git a/runtime/src/accounts.rs b/runtime/src/accounts.rs index 349d05742..b1c4008a0 100644 --- a/runtime/src/accounts.rs +++ b/runtime/src/accounts.rs @@ -11,7 +11,6 @@ use crate::rent_collector::RentCollector; use crate::system_instruction_processor::{get_system_account_kind, SystemAccountKind}; use log::*; use rayon::slice::ParallelSliceMut; -use solana_metrics::inc_new_counter_error; use solana_sdk::account::Account; use solana_sdk::bank_hash::BankHash; use solana_sdk::clock::Slot; @@ -435,18 +434,15 @@ impl Accounts { locks: &mut HashSet, writable_keys: Vec<&Pubkey>, readonly_keys: Vec<&Pubkey>, - error_counters: &mut ErrorCounters, ) -> Result<()> { for k in writable_keys.iter() { if locks.contains(k) || self.is_locked_readonly(k) { - error_counters.account_in_use += 1; debug!("CD Account in use: {:?}", k); return Err(TransactionError::AccountInUse); } } for k in readonly_keys.iter() { if locks.contains(k) { - error_counters.account_in_use += 1; debug!("CO Account in use: {:?}", k); return Err(TransactionError::AccountInUse); } @@ -508,36 +504,15 @@ impl Accounts { txs: &[Transaction], txs_iteration_order: Option<&[usize]>, ) -> Vec> { - let mut error_counters = ErrorCounters::default(); let keys: Vec<_> = OrderedIterator::new(txs, txs_iteration_order) - .map(|tx| { - let message = &tx.message(); - message.get_account_keys_by_lock_type() - }) + .map(|tx| tx.message().get_account_keys_by_lock_type()) .collect(); - - let rv = { - let mut account_locks = &mut self.account_locks.lock().unwrap(); - keys.into_iter() - .map(|(writable_keys, readonly_keys)| { - self.lock_account( - &mut account_locks, - writable_keys, - readonly_keys, - &mut error_counters, - ) - }) - .collect() - }; - if error_counters.account_in_use != 0 { - inc_new_counter_error!( - "bank-process_transactions-account_in_use", - error_counters.account_in_use, - 0, - 100 - ); - } - rv + let mut account_locks = &mut self.account_locks.lock().unwrap(); + keys.into_iter() + .map(|(writable_keys, readonly_keys)| { + self.lock_account(&mut account_locks, writable_keys, readonly_keys) + }) + .collect() } /// Once accounts are unlocked, new transactions that modify that state can enter the pipeline diff --git a/runtime/src/accounts_db.rs b/runtime/src/accounts_db.rs index d8ca52976..ada84c6c7 100644 --- a/runtime/src/accounts_db.rs +++ b/runtime/src/accounts_db.rs @@ -54,18 +54,18 @@ pub const DEFAULT_NUM_DIRS: u32 = 4; #[derive(Debug, Default)] pub struct ErrorCounters { - pub account_not_found: usize, + pub total: usize, pub account_in_use: usize, pub account_loaded_twice: usize, + pub account_not_found: usize, pub blockhash_not_found: usize, pub blockhash_too_old: usize, - pub reserve_blockhash: usize, - pub invalid_account_for_fee: usize, - pub insufficient_funds: usize, - pub invalid_account_index: usize, - pub duplicate_signature: usize, pub call_chain_too_deep: usize, - pub missing_signature_for_fee: usize, + pub duplicate_signature: usize, + pub instruction_error: usize, + pub insufficient_funds: usize, + pub invalid_account_for_fee: usize, + pub invalid_account_index: usize, } #[derive(Deserialize, Serialize, Default, Debug, PartialEq, Clone)] diff --git a/runtime/src/bank.rs b/runtime/src/bank.rs index 2634012e3..090f80d89 100644 --- a/runtime/src/bank.rs +++ b/runtime/src/bank.rs @@ -993,12 +993,16 @@ impl Bank { .map(|(tx, lock_res)| match lock_res { Ok(()) => { let message = tx.message(); - if hash_queue.check_hash_age(&message.recent_blockhash, max_age) { + let hash_age = hash_queue.check_hash_age(&message.recent_blockhash, max_age); + if hash_age == Some(true) { (Ok(()), Some(HashAgeKind::Extant)) } else if let Some((pubkey, acc)) = self.check_tx_durable_nonce(&tx) { (Ok(()), Some(HashAgeKind::DurableNonce(pubkey, acc))) + } else if hash_age == Some(false) { + error_counters.blockhash_too_old += 1; + (Err(TransactionError::BlockhashNotFound), None) } else { - error_counters.reserve_blockhash += 1; + error_counters.blockhash_not_found += 1; (Err(TransactionError::BlockhashNotFound), None) } } @@ -1043,7 +1047,7 @@ impl Bank { .collect() } - pub fn check_hash_age(&self, hash: &Hash, max_age: usize) -> bool { + pub fn check_hash_age(&self, hash: &Hash, max_age: usize) -> Option { self.blockhash_queue .read() .unwrap() @@ -1093,31 +1097,50 @@ impl Bank { balances } + #[allow(clippy::cognitive_complexity)] fn update_error_counters(error_counters: &ErrorCounters) { + if 0 != error_counters.total { + inc_new_counter_error!( + "bank-process_transactions-error_count", + error_counters.total + ); + } + if 0 != error_counters.account_not_found { + inc_new_counter_error!( + "bank-process_transactions-account_not_found", + error_counters.account_not_found + ); + } + if 0 != error_counters.account_in_use { + inc_new_counter_error!( + "bank-process_transactions-account_in_use", + error_counters.account_in_use + ); + } + if 0 != error_counters.account_loaded_twice { + inc_new_counter_error!( + "bank-process_transactions-account_loaded_twice", + error_counters.account_loaded_twice + ); + } if 0 != error_counters.blockhash_not_found { inc_new_counter_error!( "bank-process_transactions-error-blockhash_not_found", error_counters.blockhash_not_found ); } + if 0 != error_counters.blockhash_too_old { + inc_new_counter_error!( + "bank-process_transactions-error-blockhash_too_old", + error_counters.blockhash_too_old + ); + } if 0 != error_counters.invalid_account_index { inc_new_counter_error!( "bank-process_transactions-error-invalid_account_index", error_counters.invalid_account_index ); } - if 0 != error_counters.reserve_blockhash { - inc_new_counter_error!( - "bank-process_transactions-error-reserve_blockhash", - error_counters.reserve_blockhash - ); - } - if 0 != error_counters.duplicate_signature { - inc_new_counter_error!( - "bank-process_transactions-error-duplicate_signature", - error_counters.duplicate_signature - ); - } if 0 != error_counters.invalid_account_for_fee { inc_new_counter_error!( "bank-process_transactions-error-invalid_account_for_fee", @@ -1130,10 +1153,16 @@ impl Bank { error_counters.insufficient_funds ); } - if 0 != error_counters.account_loaded_twice { + if 0 != error_counters.instruction_error { inc_new_counter_error!( - "bank-process_transactions-account_loaded_twice", - error_counters.account_loaded_twice + "bank-process_transactions-error-instruction_error", + error_counters.instruction_error + ); + } + if 0 != error_counters.duplicate_signature { + inc_new_counter_error!( + "bank-process_transactions-error-duplicate_signature", + error_counters.duplicate_signature ); } } @@ -1160,7 +1189,10 @@ impl Bank { OrderedIterator::new(batch.lock_results(), batch.iteration_order()) .enumerate() .filter_map(|(index, res)| match res { - Err(TransactionError::AccountInUse) => Some(index), + Err(TransactionError::AccountInUse) => { + error_counters.account_in_use += 1; + Some(index) + } Ok(_) => None, Err(_) => None, }) @@ -1190,11 +1222,13 @@ impl Bank { (Err(e), hash_age_kind) => (Err(e.clone()), hash_age_kind.clone()), (Ok((accounts, loaders, _rents)), hash_age_kind) => { signature_count += u64::from(tx.message().header.num_required_signatures); - ( + let process_result = self.message_processor - .process_message(tx.message(), loaders, accounts), - hash_age_kind.clone(), - ) + .process_message(tx.message(), loaders, accounts); + if let Err(TransactionError::InstructionError(_, _)) = &process_result { + error_counters.instruction_error += 1; + } + (process_result, hash_age_kind.clone()) } }) .collect(); @@ -1208,26 +1242,24 @@ impl Bank { txs.len(), ); let mut tx_count: u64 = 0; - let mut err_count = 0; + let err_count = &mut error_counters.total; for ((r, _hash_age_kind), tx) in executed.iter().zip(txs.iter()) { if r.is_ok() { tx_count += 1; } else { - if err_count == 0 { + if *err_count == 0 { debug!("tx error: {:?} {:?}", r, tx); } - err_count += 1; + *err_count += 1; } } - if err_count > 0 { - debug!("{} errors of {} txs", err_count, err_count + tx_count); - inc_new_counter_error!( - "bank-process_transactions-account_not_found", - error_counters.account_not_found + if *err_count > 0 { + debug!( + "{} errors of {} txs", + *err_count, + *err_count as u64 + tx_count ); - inc_new_counter_error!("bank-process_transactions-error_count", err_count as usize); } - Self::update_error_counters(&error_counters); ( loaded_accounts, @@ -4591,7 +4623,7 @@ mod tests { assert_eq!(recent_blockhashes.len(), i); let most_recent_hash = recent_blockhashes.iter().nth(0).unwrap(); // Check order - assert!(bank.check_hash_age(most_recent_hash, 0)); + assert_eq!(Some(true), bank.check_hash_age(most_recent_hash, 0)); goto_end_of_slot(Arc::get_mut(&mut bank).unwrap()); bank = Arc::new(new_from_parent(&bank)); } diff --git a/runtime/src/blockhash_queue.rs b/runtime/src/blockhash_queue.rs index 09a9a1a3f..a0c386389 100644 --- a/runtime/src/blockhash_queue.rs +++ b/runtime/src/blockhash_queue.rs @@ -51,12 +51,11 @@ impl BlockhashQueue { /// Check if the age of the hash is within the max_age /// return false for any hashes with an age above max_age - pub fn check_hash_age(&self, hash: &Hash, max_age: usize) -> bool { - let hash_age = self.ages.get(hash); - match hash_age { - Some(age) => self.hash_height - age.hash_height <= max_age as u64, - _ => false, - } + /// return None for any hashes that were not found + pub fn check_hash_age(&self, hash: &Hash, max_age: usize) -> Option { + self.ages + .get(hash) + .map(|age| self.hash_height - age.hash_height <= max_age as u64) } /// check if hash is valid @@ -135,6 +134,7 @@ mod tests { assert!(hash_queue.check_hash(last_hash)); assert_eq!(hash_queue.hash_height(), 1); } + #[test] fn test_reject_old_last_hash() { let mut hash_queue = BlockhashQueue::new(100); @@ -145,7 +145,14 @@ mod tests { } // Assert we're no longer able to use the oldest hash. assert!(!hash_queue.check_hash(last_hash)); + assert_eq!(None, hash_queue.check_hash_age(&last_hash, 0)); + + // Assert we are not able to use the oldest remaining hash. + let last_valid_hash = hash(&serialize(&1).unwrap()); + assert!(hash_queue.check_hash(last_valid_hash)); + assert_eq!(Some(false), hash_queue.check_hash_age(&last_valid_hash, 0)); } + /// test that when max age is 0, that a valid last_hash still passes the age check #[test] fn test_queue_init_blockhash() { @@ -153,7 +160,7 @@ mod tests { let mut hash_queue = BlockhashQueue::new(100); hash_queue.register_hash(&last_hash, &FeeCalculator::default()); assert_eq!(last_hash, hash_queue.last_hash()); - assert!(hash_queue.check_hash_age(&last_hash, 0)); + assert_eq!(Some(true), hash_queue.check_hash_age(&last_hash, 0)); } #[test] @@ -169,7 +176,10 @@ mod tests { let recent_blockhashes = blockhash_queue.get_recent_blockhashes(); // Verify that the returned hashes are most recent for (_slot, hash) in recent_blockhashes { - assert!(blockhash_queue.check_hash_age(hash, MAX_RECENT_BLOCKHASHES)); + assert_eq!( + Some(true), + blockhash_queue.check_hash_age(hash, MAX_RECENT_BLOCKHASHES) + ); } } }