Consolidate tx error counters and update metrics dashboard (#7724)

automerge
This commit is contained in:
Justin Starry 2020-01-17 15:26:50 +08:00 committed by Grimes
parent 57bf618627
commit 87598c7612
7 changed files with 223 additions and 92 deletions

View File

@ -1397,7 +1397,7 @@ mod tests {
poh_recorder.tick();
}
poh_recorder.set_bank(&bank.clone());
assert!(!bank.check_hash_age(&genesis_hash, 1));
assert_eq!(Some(false), bank.check_hash_age(&genesis_hash, 1));
}
}

View File

@ -3599,7 +3599,7 @@
"policy": "default",
"query": "SELECT sum(\"count\") AS \"total_errors\" FROM \"$testnet\".\"autogen\".\"bank-process_transactions-error_count\" WHERE $timeFilter GROUP BY time(1s) FILL(0)\n",
"rawQuery": true,
"refId": "B",
"refId": "A",
"resultFormat": "time_series",
"select": [
[
@ -3634,9 +3634,9 @@
],
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(\"count\") AS \"blockhash_too_old\" FROM \"$testnet\".\"autogen\".\"bank-process_transactions-error-reserve_blockhash\" WHERE $timeFilter GROUP BY time(1s) FILL(0)\n",
"query": "SELECT sum(\"count\") AS \"account_in_use\" FROM \"$testnet\".\"autogen\".\"bank-process_transactions-account_in_use\" WHERE $timeFilter GROUP BY time(1s) FILL(0)\n\n",
"rawQuery": true,
"refId": "A",
"refId": "B",
"resultFormat": "time_series",
"select": [
[
@ -3708,7 +3708,7 @@
],
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(\"count\") AS \"blockhash_not_found\" FROM \"$testnet\".\"autogen\".\"bank-process_transactions-error-blockhash_not_found\" WHERE $timeFilter GROUP BY time(1s) FILL(0)\n",
"query": "SELECT sum(\"count\") AS \"account_not_found\" FROM \"$testnet\".\"autogen\".\"bank-process_transactions-account_not_found\" WHERE $timeFilter GROUP BY time(1s) FILL(0)\n\n\n",
"rawQuery": true,
"refId": "D",
"resultFormat": "time_series",
@ -3745,7 +3745,7 @@
],
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(\"count\") AS \"duplicate_signature\" FROM \"$testnet\".\"autogen\".\"bank-process_transactions-error-duplicate_signature\" WHERE $timeFilter GROUP BY time(1s) FILL(0)\n",
"query": "SELECT sum(\"count\") AS \"blockhash_not_found\" FROM \"$testnet\".\"autogen\".\"bank-process_transactions-error-blockhash_not_found\" WHERE $timeFilter GROUP BY time(1s) FILL(0)\n",
"rawQuery": true,
"refId": "E",
"resultFormat": "time_series",
@ -3782,7 +3782,7 @@
],
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(\"count\") AS \"insufficient_funds\" FROM \"$testnet\".\"autogen\".\"bank-process_transactions-error-insufficient_funds\" WHERE $timeFilter GROUP BY time(1s) FILL(0)\n",
"query": "SELECT sum(\"count\") AS \"blockhash_too_old\" FROM \"$testnet\".\"autogen\".\"bank-process_transactions-error-blockhash_too_old\" WHERE $timeFilter GROUP BY time(1s) FILL(0)\n",
"rawQuery": true,
"refId": "F",
"resultFormat": "time_series",
@ -3819,7 +3819,7 @@
],
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(\"count\") AS \"account_in_use\" FROM \"$testnet\".\"autogen\".\"bank-process_transactions-account_in_use\" WHERE $timeFilter GROUP BY time(1s) FILL(0)\n",
"query": "SELECT sum(\"count\") AS \"duplicate_signature\" FROM \"$testnet\".\"autogen\".\"bank-process_transactions-error-duplicate_signature\" WHERE $timeFilter GROUP BY time(1s) FILL(0)\n\n\n\n",
"rawQuery": true,
"refId": "G",
"resultFormat": "time_series",
@ -3856,7 +3856,7 @@
],
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(\"count\") AS \"account_not_found\" FROM \"$testnet\".\"autogen\".\"bank-process_transactions-account_not_found\" WHERE $timeFilter GROUP BY time(1s) FILL(0)\n\n",
"query": "SELECT sum(\"count\") AS \"instruction_error\" FROM \"$testnet\".\"autogen\".\"bank-process_transactions-error-instruction_error\" WHERE $timeFilter GROUP BY time(1s) FILL(0)\n",
"rawQuery": true,
"refId": "H",
"resultFormat": "time_series",
@ -3875,6 +3875,117 @@
]
],
"tags": []
},
{
"groupBy": [
{
"params": [
"$__interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(\"count\") AS \"insufficient_funds\" FROM \"$testnet\".\"autogen\".\"bank-process_transactions-error-insufficient_funds\" WHERE $timeFilter GROUP BY time(1s) FILL(0)\n",
"rawQuery": true,
"refId": "I",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "mean"
}
]
],
"tags": []
},
{
"groupBy": [
{
"params": [
"$__interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(\"count\") AS \"invalid_account_for_fee\" FROM \"$testnet\".\"autogen\".\"bank-process_transactions-error-invalid_account_for_fee\" WHERE $timeFilter GROUP BY time(1s) FILL(0)\n",
"rawQuery": true,
"refId": "J",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "mean"
}
]
],
"tags": []
},
{
"groupBy": [
{
"params": [
"$__interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT sum(\"count\") AS \"invalid_account_index\" FROM \"$testnet\".\"autogen\".\"bank-process_transactions-error-invalid_account_index\" WHERE $timeFilter GROUP BY time(1s) FILL(0)\n",
"rawQuery": true,
"refId": "K",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"value"
],
"type": "field"
},
{
"params": [],
"type": "mean"
}
]
],
"tags": []
}
],
"thresholds": [],
@ -10161,4 +10272,4 @@
"title": "Testnet Monitor (edge)",
"uid": "testnet-edge",
"version": 2
}
}

View File

@ -192,7 +192,10 @@ fn bench_bank_update_recent_blockhashes(bencher: &mut Bencher) {
goto_end_of_slot(Arc::get_mut(&mut bank).unwrap());
}
// Verify blockhash_queue is full (genesis hash has been kicked out)
assert!(!bank.check_hash_age(&genesis_hash, MAX_RECENT_BLOCKHASHES));
assert_eq!(
Some(false),
bank.check_hash_age(&genesis_hash, MAX_RECENT_BLOCKHASHES)
);
bencher.iter(|| {
bank.update_recent_blockhashes();
});

View File

@ -11,7 +11,6 @@ use crate::rent_collector::RentCollector;
use crate::system_instruction_processor::{get_system_account_kind, SystemAccountKind};
use log::*;
use rayon::slice::ParallelSliceMut;
use solana_metrics::inc_new_counter_error;
use solana_sdk::account::Account;
use solana_sdk::bank_hash::BankHash;
use solana_sdk::clock::Slot;
@ -435,18 +434,15 @@ impl Accounts {
locks: &mut HashSet<Pubkey>,
writable_keys: Vec<&Pubkey>,
readonly_keys: Vec<&Pubkey>,
error_counters: &mut ErrorCounters,
) -> Result<()> {
for k in writable_keys.iter() {
if locks.contains(k) || self.is_locked_readonly(k) {
error_counters.account_in_use += 1;
debug!("CD Account in use: {:?}", k);
return Err(TransactionError::AccountInUse);
}
}
for k in readonly_keys.iter() {
if locks.contains(k) {
error_counters.account_in_use += 1;
debug!("CO Account in use: {:?}", k);
return Err(TransactionError::AccountInUse);
}
@ -508,36 +504,15 @@ impl Accounts {
txs: &[Transaction],
txs_iteration_order: Option<&[usize]>,
) -> Vec<Result<()>> {
let mut error_counters = ErrorCounters::default();
let keys: Vec<_> = OrderedIterator::new(txs, txs_iteration_order)
.map(|tx| {
let message = &tx.message();
message.get_account_keys_by_lock_type()
})
.map(|tx| tx.message().get_account_keys_by_lock_type())
.collect();
let rv = {
let mut account_locks = &mut self.account_locks.lock().unwrap();
keys.into_iter()
.map(|(writable_keys, readonly_keys)| {
self.lock_account(
&mut account_locks,
writable_keys,
readonly_keys,
&mut error_counters,
)
})
.collect()
};
if error_counters.account_in_use != 0 {
inc_new_counter_error!(
"bank-process_transactions-account_in_use",
error_counters.account_in_use,
0,
100
);
}
rv
let mut account_locks = &mut self.account_locks.lock().unwrap();
keys.into_iter()
.map(|(writable_keys, readonly_keys)| {
self.lock_account(&mut account_locks, writable_keys, readonly_keys)
})
.collect()
}
/// Once accounts are unlocked, new transactions that modify that state can enter the pipeline

View File

@ -54,18 +54,18 @@ pub const DEFAULT_NUM_DIRS: u32 = 4;
#[derive(Debug, Default)]
pub struct ErrorCounters {
pub account_not_found: usize,
pub total: usize,
pub account_in_use: usize,
pub account_loaded_twice: usize,
pub account_not_found: usize,
pub blockhash_not_found: usize,
pub blockhash_too_old: usize,
pub reserve_blockhash: usize,
pub invalid_account_for_fee: usize,
pub insufficient_funds: usize,
pub invalid_account_index: usize,
pub duplicate_signature: usize,
pub call_chain_too_deep: usize,
pub missing_signature_for_fee: usize,
pub duplicate_signature: usize,
pub instruction_error: usize,
pub insufficient_funds: usize,
pub invalid_account_for_fee: usize,
pub invalid_account_index: usize,
}
#[derive(Deserialize, Serialize, Default, Debug, PartialEq, Clone)]

View File

@ -993,12 +993,16 @@ impl Bank {
.map(|(tx, lock_res)| match lock_res {
Ok(()) => {
let message = tx.message();
if hash_queue.check_hash_age(&message.recent_blockhash, max_age) {
let hash_age = hash_queue.check_hash_age(&message.recent_blockhash, max_age);
if hash_age == Some(true) {
(Ok(()), Some(HashAgeKind::Extant))
} else if let Some((pubkey, acc)) = self.check_tx_durable_nonce(&tx) {
(Ok(()), Some(HashAgeKind::DurableNonce(pubkey, acc)))
} else if hash_age == Some(false) {
error_counters.blockhash_too_old += 1;
(Err(TransactionError::BlockhashNotFound), None)
} else {
error_counters.reserve_blockhash += 1;
error_counters.blockhash_not_found += 1;
(Err(TransactionError::BlockhashNotFound), None)
}
}
@ -1043,7 +1047,7 @@ impl Bank {
.collect()
}
pub fn check_hash_age(&self, hash: &Hash, max_age: usize) -> bool {
pub fn check_hash_age(&self, hash: &Hash, max_age: usize) -> Option<bool> {
self.blockhash_queue
.read()
.unwrap()
@ -1093,31 +1097,50 @@ impl Bank {
balances
}
#[allow(clippy::cognitive_complexity)]
fn update_error_counters(error_counters: &ErrorCounters) {
if 0 != error_counters.total {
inc_new_counter_error!(
"bank-process_transactions-error_count",
error_counters.total
);
}
if 0 != error_counters.account_not_found {
inc_new_counter_error!(
"bank-process_transactions-account_not_found",
error_counters.account_not_found
);
}
if 0 != error_counters.account_in_use {
inc_new_counter_error!(
"bank-process_transactions-account_in_use",
error_counters.account_in_use
);
}
if 0 != error_counters.account_loaded_twice {
inc_new_counter_error!(
"bank-process_transactions-account_loaded_twice",
error_counters.account_loaded_twice
);
}
if 0 != error_counters.blockhash_not_found {
inc_new_counter_error!(
"bank-process_transactions-error-blockhash_not_found",
error_counters.blockhash_not_found
);
}
if 0 != error_counters.blockhash_too_old {
inc_new_counter_error!(
"bank-process_transactions-error-blockhash_too_old",
error_counters.blockhash_too_old
);
}
if 0 != error_counters.invalid_account_index {
inc_new_counter_error!(
"bank-process_transactions-error-invalid_account_index",
error_counters.invalid_account_index
);
}
if 0 != error_counters.reserve_blockhash {
inc_new_counter_error!(
"bank-process_transactions-error-reserve_blockhash",
error_counters.reserve_blockhash
);
}
if 0 != error_counters.duplicate_signature {
inc_new_counter_error!(
"bank-process_transactions-error-duplicate_signature",
error_counters.duplicate_signature
);
}
if 0 != error_counters.invalid_account_for_fee {
inc_new_counter_error!(
"bank-process_transactions-error-invalid_account_for_fee",
@ -1130,10 +1153,16 @@ impl Bank {
error_counters.insufficient_funds
);
}
if 0 != error_counters.account_loaded_twice {
if 0 != error_counters.instruction_error {
inc_new_counter_error!(
"bank-process_transactions-account_loaded_twice",
error_counters.account_loaded_twice
"bank-process_transactions-error-instruction_error",
error_counters.instruction_error
);
}
if 0 != error_counters.duplicate_signature {
inc_new_counter_error!(
"bank-process_transactions-error-duplicate_signature",
error_counters.duplicate_signature
);
}
}
@ -1160,7 +1189,10 @@ impl Bank {
OrderedIterator::new(batch.lock_results(), batch.iteration_order())
.enumerate()
.filter_map(|(index, res)| match res {
Err(TransactionError::AccountInUse) => Some(index),
Err(TransactionError::AccountInUse) => {
error_counters.account_in_use += 1;
Some(index)
}
Ok(_) => None,
Err(_) => None,
})
@ -1190,11 +1222,13 @@ impl Bank {
(Err(e), hash_age_kind) => (Err(e.clone()), hash_age_kind.clone()),
(Ok((accounts, loaders, _rents)), hash_age_kind) => {
signature_count += u64::from(tx.message().header.num_required_signatures);
(
let process_result =
self.message_processor
.process_message(tx.message(), loaders, accounts),
hash_age_kind.clone(),
)
.process_message(tx.message(), loaders, accounts);
if let Err(TransactionError::InstructionError(_, _)) = &process_result {
error_counters.instruction_error += 1;
}
(process_result, hash_age_kind.clone())
}
})
.collect();
@ -1208,26 +1242,24 @@ impl Bank {
txs.len(),
);
let mut tx_count: u64 = 0;
let mut err_count = 0;
let err_count = &mut error_counters.total;
for ((r, _hash_age_kind), tx) in executed.iter().zip(txs.iter()) {
if r.is_ok() {
tx_count += 1;
} else {
if err_count == 0 {
if *err_count == 0 {
debug!("tx error: {:?} {:?}", r, tx);
}
err_count += 1;
*err_count += 1;
}
}
if err_count > 0 {
debug!("{} errors of {} txs", err_count, err_count + tx_count);
inc_new_counter_error!(
"bank-process_transactions-account_not_found",
error_counters.account_not_found
if *err_count > 0 {
debug!(
"{} errors of {} txs",
*err_count,
*err_count as u64 + tx_count
);
inc_new_counter_error!("bank-process_transactions-error_count", err_count as usize);
}
Self::update_error_counters(&error_counters);
(
loaded_accounts,
@ -4591,7 +4623,7 @@ mod tests {
assert_eq!(recent_blockhashes.len(), i);
let most_recent_hash = recent_blockhashes.iter().nth(0).unwrap();
// Check order
assert!(bank.check_hash_age(most_recent_hash, 0));
assert_eq!(Some(true), bank.check_hash_age(most_recent_hash, 0));
goto_end_of_slot(Arc::get_mut(&mut bank).unwrap());
bank = Arc::new(new_from_parent(&bank));
}

View File

@ -51,12 +51,11 @@ impl BlockhashQueue {
/// Check if the age of the hash is within the max_age
/// return false for any hashes with an age above max_age
pub fn check_hash_age(&self, hash: &Hash, max_age: usize) -> bool {
let hash_age = self.ages.get(hash);
match hash_age {
Some(age) => self.hash_height - age.hash_height <= max_age as u64,
_ => false,
}
/// return None for any hashes that were not found
pub fn check_hash_age(&self, hash: &Hash, max_age: usize) -> Option<bool> {
self.ages
.get(hash)
.map(|age| self.hash_height - age.hash_height <= max_age as u64)
}
/// check if hash is valid
@ -135,6 +134,7 @@ mod tests {
assert!(hash_queue.check_hash(last_hash));
assert_eq!(hash_queue.hash_height(), 1);
}
#[test]
fn test_reject_old_last_hash() {
let mut hash_queue = BlockhashQueue::new(100);
@ -145,7 +145,14 @@ mod tests {
}
// Assert we're no longer able to use the oldest hash.
assert!(!hash_queue.check_hash(last_hash));
assert_eq!(None, hash_queue.check_hash_age(&last_hash, 0));
// Assert we are not able to use the oldest remaining hash.
let last_valid_hash = hash(&serialize(&1).unwrap());
assert!(hash_queue.check_hash(last_valid_hash));
assert_eq!(Some(false), hash_queue.check_hash_age(&last_valid_hash, 0));
}
/// test that when max age is 0, that a valid last_hash still passes the age check
#[test]
fn test_queue_init_blockhash() {
@ -153,7 +160,7 @@ mod tests {
let mut hash_queue = BlockhashQueue::new(100);
hash_queue.register_hash(&last_hash, &FeeCalculator::default());
assert_eq!(last_hash, hash_queue.last_hash());
assert!(hash_queue.check_hash_age(&last_hash, 0));
assert_eq!(Some(true), hash_queue.check_hash_age(&last_hash, 0));
}
#[test]
@ -169,7 +176,10 @@ mod tests {
let recent_blockhashes = blockhash_queue.get_recent_blockhashes();
// Verify that the returned hashes are most recent
for (_slot, hash) in recent_blockhashes {
assert!(blockhash_queue.check_hash_age(hash, MAX_RECENT_BLOCKHASHES));
assert_eq!(
Some(true),
blockhash_queue.check_hash_age(hash, MAX_RECENT_BLOCKHASHES)
);
}
}
}