Reject blocks for costs above the max block cost (#18994)
* added realtime cost checking logic to reject block that would exceed max limit: - defines max limits at block_cost_limits.rs - right after each bath's execution, accumulate its cost and check again limit, return error if limit is exceeded * update abi that changed due to adding additional TransactionError * To avoid counting stats mltiple times, only accumulate execute-timing when a bank is completed * gate it by a feature * move cost const def into block_cost_limits.rs * redefine the cost for signature and account access, removed signer part as it is not well defined for now * check if per_program_timings of execute_timings before sending
This commit is contained in:
parent
9d8594a046
commit
414d904959
|
@ -9,30 +9,10 @@
|
|||
//!
|
||||
use crate::execute_cost_table::ExecuteCostTable;
|
||||
use log::*;
|
||||
use solana_ledger::block_cost_limits::*;
|
||||
use solana_sdk::{pubkey::Pubkey, sanitized_transaction::SanitizedTransaction};
|
||||
use std::collections::HashMap;
|
||||
|
||||
// 07-27-2021, compute_unit to microsecond conversion ratio collected from mainnet-beta
|
||||
// differs between instructions. Some bpf instruction has much higher CU/US ratio
|
||||
// (eg 7vxeyaXGLqcp66fFShqUdHxdacp4k4kwUpRSSeoZLCZ4 has average ratio 135), others
|
||||
// have lower ratio (eg 9xQeWvG816bUx9EPjHmaT23yvVM2ZWbrrpZb9PusVFin has an average ratio 14).
|
||||
// With this, I am guestimating the flat_fee for sigver and account read/write
|
||||
// as following. This can be adjusted when needed.
|
||||
const SIGVER_COST: u64 = 1;
|
||||
const NON_SIGNED_READONLY_ACCOUNT_ACCESS_COST: u64 = 1;
|
||||
const NON_SIGNED_WRITABLE_ACCOUNT_ACCESS_COST: u64 = 2;
|
||||
const SIGNED_READONLY_ACCOUNT_ACCESS_COST: u64 =
|
||||
SIGVER_COST + NON_SIGNED_READONLY_ACCOUNT_ACCESS_COST;
|
||||
const SIGNED_WRITABLE_ACCOUNT_ACCESS_COST: u64 =
|
||||
SIGVER_COST + NON_SIGNED_WRITABLE_ACCOUNT_ACCESS_COST;
|
||||
|
||||
// 07-27-2021, cost model limit is set to "worst case scenario", which is the
|
||||
// max compute unit it can execute. From mainnet-beta, the max CU of instruction
|
||||
// is 3753, round up to 4_000. Say we allows max 50_000 instruction per writable i
|
||||
// account, and 1_000_000 instruction per block. It comes to following limits:
|
||||
pub const ACCOUNT_MAX_COST: u64 = 200_000_000;
|
||||
pub const BLOCK_MAX_COST: u64 = 4_000_000_000;
|
||||
|
||||
const MAX_WRITABLE_ACCOUNTS: usize = 256;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
|
@ -88,7 +68,7 @@ pub struct CostModel {
|
|||
|
||||
impl Default for CostModel {
|
||||
fn default() -> Self {
|
||||
CostModel::new(ACCOUNT_MAX_COST, BLOCK_MAX_COST)
|
||||
CostModel::new(account_cost_max(), block_cost_max())
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -142,21 +122,13 @@ impl CostModel {
|
|||
// calculate account access cost
|
||||
let message = transaction.message();
|
||||
message.account_keys.iter().enumerate().for_each(|(i, k)| {
|
||||
let is_signer = message.is_signer(i);
|
||||
let is_writable = message.is_writable(i);
|
||||
|
||||
if is_signer && is_writable {
|
||||
if is_writable {
|
||||
self.transaction_cost.writable_accounts.push(*k);
|
||||
self.transaction_cost.account_access_cost += SIGNED_WRITABLE_ACCOUNT_ACCESS_COST;
|
||||
} else if is_signer && !is_writable {
|
||||
self.transaction_cost.account_access_cost += SIGNED_READONLY_ACCOUNT_ACCESS_COST;
|
||||
} else if !is_signer && is_writable {
|
||||
self.transaction_cost.writable_accounts.push(*k);
|
||||
self.transaction_cost.account_access_cost +=
|
||||
NON_SIGNED_WRITABLE_ACCOUNT_ACCESS_COST;
|
||||
self.transaction_cost.account_access_cost += account_write_cost();
|
||||
} else {
|
||||
self.transaction_cost.account_access_cost +=
|
||||
NON_SIGNED_READONLY_ACCOUNT_ACCESS_COST;
|
||||
self.transaction_cost.account_access_cost += account_read_cost();
|
||||
}
|
||||
});
|
||||
debug!(
|
||||
|
@ -418,9 +390,8 @@ mod tests {
|
|||
.try_into()
|
||||
.unwrap();
|
||||
|
||||
let expected_account_cost = SIGNED_WRITABLE_ACCOUNT_ACCESS_COST
|
||||
+ NON_SIGNED_WRITABLE_ACCOUNT_ACCESS_COST
|
||||
+ NON_SIGNED_READONLY_ACCOUNT_ACCESS_COST;
|
||||
let expected_account_cost =
|
||||
account_write_cost() + account_write_cost() + account_read_cost();
|
||||
let expected_execution_cost = 8;
|
||||
|
||||
let mut cost_model = CostModel::default();
|
||||
|
@ -475,9 +446,8 @@ mod tests {
|
|||
);
|
||||
|
||||
let number_threads = 10;
|
||||
let expected_account_cost = SIGNED_WRITABLE_ACCOUNT_ACCESS_COST
|
||||
+ NON_SIGNED_WRITABLE_ACCOUNT_ACCESS_COST * 2
|
||||
+ NON_SIGNED_READONLY_ACCOUNT_ACCESS_COST * 2;
|
||||
let expected_account_cost =
|
||||
account_write_cost() + account_write_cost() * 2 + account_read_cost() * 2;
|
||||
let cost1 = 100;
|
||||
let cost2 = 200;
|
||||
// execution cost can be either 2 * Default (before write) or cost1+cost2 (after write)
|
||||
|
|
|
@ -1931,7 +1931,6 @@ impl ReplayStage {
|
|||
replay_vote_sender,
|
||||
verify_recyclers,
|
||||
);
|
||||
execute_timings.accumulate(&bank_progress.replay_stats.execute_timings);
|
||||
match replay_result {
|
||||
Ok(replay_tx_count) => tx_count += replay_tx_count,
|
||||
Err(err) => {
|
||||
|
@ -1957,6 +1956,12 @@ impl ReplayStage {
|
|||
}
|
||||
assert_eq!(*bank_slot, bank.slot());
|
||||
if bank.is_complete() {
|
||||
execute_timings.accumulate(&bank_progress.replay_stats.execute_timings);
|
||||
debug!("bank {} is completed replay from blockstore, contribute to update cost with {:?}",
|
||||
bank.slot(),
|
||||
bank_progress.replay_stats.execute_timings
|
||||
);
|
||||
|
||||
bank_progress.replay_stats.report_stats(
|
||||
bank.slot(),
|
||||
bank_progress.replay_progress.num_entries,
|
||||
|
@ -2031,9 +2036,11 @@ impl ReplayStage {
|
|||
}
|
||||
|
||||
// send accumulated excute-timings to cost_update_service
|
||||
cost_update_sender
|
||||
.send(execute_timings)
|
||||
.unwrap_or_else(|err| warn!("cost_update_sender failed: {:?}", err));
|
||||
if !execute_timings.details.per_program_timings.is_empty() {
|
||||
cost_update_sender
|
||||
.send(execute_timings)
|
||||
.unwrap_or_else(|err| warn!("cost_update_sender failed: {:?}", err));
|
||||
}
|
||||
|
||||
inc_new_counter_info!("replay_stage-replay_transactions", tx_count);
|
||||
did_complete_bank
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
//! defines block cost related limits
|
||||
//!
|
||||
|
||||
// see https://github.com/solana-labs/solana/issues/18944
|
||||
// and https://github.com/solana-labs/solana/pull/18994#issuecomment-896128992
|
||||
//
|
||||
pub const MAX_BLOCK_TIME_US: u64 = 400_000; // aiming at 400ms/block max time
|
||||
pub const AVG_INSTRUCTION_TIME_US: u64 = 1_000; // average instruction execution time
|
||||
pub const SYSTEM_PARALLELISM: u64 = 10;
|
||||
pub const MAX_INSTRUCTION_COST: u64 = 200_000;
|
||||
pub const MAX_NUMBER_BPF_INSTRUCTIONS_PER_ACCOUNT: u64 = 200;
|
||||
|
||||
pub const fn max_instructions_per_block() -> u64 {
|
||||
(MAX_BLOCK_TIME_US / AVG_INSTRUCTION_TIME_US) * SYSTEM_PARALLELISM
|
||||
}
|
||||
|
||||
pub const fn block_cost_max() -> u64 {
|
||||
MAX_INSTRUCTION_COST * max_instructions_per_block()
|
||||
}
|
||||
|
||||
pub const fn account_cost_max() -> u64 {
|
||||
MAX_INSTRUCTION_COST * max_instructions_per_block()
|
||||
}
|
||||
|
||||
pub const fn compute_unit_to_us_ratio() -> u64 {
|
||||
block_cost_max() / MAX_BLOCK_TIME_US
|
||||
}
|
||||
|
||||
pub const fn signature_cost() -> u64 {
|
||||
// signature takes average 10us
|
||||
compute_unit_to_us_ratio() * 10
|
||||
}
|
||||
|
||||
pub const fn account_read_cost() -> u64 {
|
||||
// read account averages 5us
|
||||
compute_unit_to_us_ratio() * 5
|
||||
}
|
||||
|
||||
pub const fn account_write_cost() -> u64 {
|
||||
// write account averages 25us
|
||||
compute_unit_to_us_ratio() * 25
|
||||
}
|
|
@ -1,6 +1,7 @@
|
|||
use crate::{
|
||||
block_error::BlockError, blockstore::Blockstore, blockstore_db::BlockstoreError,
|
||||
blockstore_meta::SlotMeta, leader_schedule_cache::LeaderScheduleCache,
|
||||
block_cost_limits::*, block_error::BlockError, blockstore::Blockstore,
|
||||
blockstore_db::BlockstoreError, blockstore_meta::SlotMeta,
|
||||
leader_schedule_cache::LeaderScheduleCache,
|
||||
};
|
||||
use chrono_humanize::{Accuracy, HumanTime, Tense};
|
||||
use crossbeam_channel::Sender;
|
||||
|
@ -31,6 +32,7 @@ use solana_runtime::{
|
|||
};
|
||||
use solana_sdk::{
|
||||
clock::{Slot, MAX_PROCESSING_AGE},
|
||||
feature_set,
|
||||
genesis_config::GenesisConfig,
|
||||
hash::Hash,
|
||||
pubkey::Pubkey,
|
||||
|
@ -48,11 +50,40 @@ use std::{
|
|||
convert::TryFrom,
|
||||
path::PathBuf,
|
||||
result,
|
||||
sync::Arc,
|
||||
sync::{Arc, RwLock},
|
||||
time::{Duration, Instant},
|
||||
};
|
||||
use thiserror::Error;
|
||||
|
||||
// it tracks the block cost available capacity - number of compute-units allowed
|
||||
// by max blockl cost limit
|
||||
#[derive(Debug)]
|
||||
pub struct BlockCostCapacityMeter {
|
||||
pub capacity: u64,
|
||||
pub accumulated_cost: u64,
|
||||
}
|
||||
|
||||
impl Default for BlockCostCapacityMeter {
|
||||
fn default() -> Self {
|
||||
BlockCostCapacityMeter::new(block_cost_max())
|
||||
}
|
||||
}
|
||||
|
||||
impl BlockCostCapacityMeter {
|
||||
pub fn new(capacity_limit: u64) -> Self {
|
||||
Self {
|
||||
capacity: capacity_limit,
|
||||
accumulated_cost: 0_u64,
|
||||
}
|
||||
}
|
||||
|
||||
// return the remaining capacity
|
||||
pub fn accumulate(&mut self, cost: u64) -> u64 {
|
||||
self.accumulated_cost += cost;
|
||||
self.capacity.saturating_sub(self.accumulated_cost)
|
||||
}
|
||||
}
|
||||
|
||||
pub type BlockstoreProcessorResult =
|
||||
result::Result<(BankForks, LeaderScheduleCache), BlockstoreProcessorError>;
|
||||
|
||||
|
@ -100,12 +131,25 @@ fn get_first_error(
|
|||
first_err
|
||||
}
|
||||
|
||||
fn aggregate_total_execution_units(execute_timings: &ExecuteTimings) -> u64 {
|
||||
let mut execute_cost_units: u64 = 0;
|
||||
for (program_id, timing) in &execute_timings.details.per_program_timings {
|
||||
if timing.count < 1 {
|
||||
continue;
|
||||
}
|
||||
execute_cost_units += timing.accumulated_units / timing.count as u64;
|
||||
trace!("aggregated execution cost of {:?} {:?}", program_id, timing);
|
||||
}
|
||||
execute_cost_units
|
||||
}
|
||||
|
||||
fn execute_batch(
|
||||
batch: &TransactionBatch,
|
||||
bank: &Arc<Bank>,
|
||||
transaction_status_sender: Option<&TransactionStatusSender>,
|
||||
replay_vote_sender: Option<&ReplayVoteSender>,
|
||||
timings: &mut ExecuteTimings,
|
||||
cost_capacity_meter: Arc<RwLock<BlockCostCapacityMeter>>,
|
||||
) -> Result<()> {
|
||||
let record_token_balances = transaction_status_sender.is_some();
|
||||
|
||||
|
@ -117,6 +161,8 @@ fn execute_batch(
|
|||
vec![]
|
||||
};
|
||||
|
||||
let pre_process_units: u64 = aggregate_total_execution_units(timings);
|
||||
|
||||
let (tx_results, balances, inner_instructions, transaction_logs) =
|
||||
batch.bank().load_execute_and_commit_transactions(
|
||||
batch,
|
||||
|
@ -127,6 +173,29 @@ fn execute_batch(
|
|||
timings,
|
||||
);
|
||||
|
||||
if bank
|
||||
.feature_set
|
||||
.is_active(&feature_set::gate_large_block::id())
|
||||
{
|
||||
let execution_cost_units = aggregate_total_execution_units(timings) - pre_process_units;
|
||||
let remaining_block_cost_cap = cost_capacity_meter
|
||||
.write()
|
||||
.unwrap()
|
||||
.accumulate(execution_cost_units);
|
||||
|
||||
debug!(
|
||||
"bank {} executed a batch, number of transactions {}, total execute cu {}, remaining block cost cap {}",
|
||||
bank.slot(),
|
||||
batch.sanitized_transactions().len(),
|
||||
execution_cost_units,
|
||||
remaining_block_cost_cap,
|
||||
);
|
||||
|
||||
if remaining_block_cost_cap == 0_u64 {
|
||||
return Err(TransactionError::WouldExceedMaxBlockCostLimit);
|
||||
}
|
||||
}
|
||||
|
||||
bank_utils::find_and_send_votes(
|
||||
batch.sanitized_transactions(),
|
||||
&tx_results,
|
||||
|
@ -174,6 +243,7 @@ fn execute_batches(
|
|||
transaction_status_sender: Option<&TransactionStatusSender>,
|
||||
replay_vote_sender: Option<&ReplayVoteSender>,
|
||||
timings: &mut ExecuteTimings,
|
||||
cost_capacity_meter: Arc<RwLock<BlockCostCapacityMeter>>,
|
||||
) -> Result<()> {
|
||||
inc_new_counter_debug!("bank-par_execute_entries-count", batches.len());
|
||||
let (results, new_timings): (Vec<Result<()>>, Vec<ExecuteTimings>) =
|
||||
|
@ -189,6 +259,7 @@ fn execute_batches(
|
|||
transaction_status_sender,
|
||||
replay_vote_sender,
|
||||
&mut timings,
|
||||
cost_capacity_meter.clone(),
|
||||
);
|
||||
if let Some(entry_callback) = entry_callback {
|
||||
entry_callback(bank);
|
||||
|
@ -233,6 +304,7 @@ pub fn process_entries(
|
|||
transaction_status_sender,
|
||||
replay_vote_sender,
|
||||
&mut timings,
|
||||
Arc::new(RwLock::new(BlockCostCapacityMeter::default())),
|
||||
);
|
||||
|
||||
debug!("process_entries: {:?}", timings);
|
||||
|
@ -248,6 +320,7 @@ fn process_entries_with_callback(
|
|||
transaction_status_sender: Option<&TransactionStatusSender>,
|
||||
replay_vote_sender: Option<&ReplayVoteSender>,
|
||||
timings: &mut ExecuteTimings,
|
||||
cost_capacity_meter: Arc<RwLock<BlockCostCapacityMeter>>,
|
||||
) -> Result<()> {
|
||||
// accumulator for entries that can be processed in parallel
|
||||
let mut batches = vec![];
|
||||
|
@ -269,6 +342,7 @@ fn process_entries_with_callback(
|
|||
transaction_status_sender,
|
||||
replay_vote_sender,
|
||||
timings,
|
||||
cost_capacity_meter.clone(),
|
||||
)?;
|
||||
batches.clear();
|
||||
for hash in &tick_hashes {
|
||||
|
@ -320,6 +394,7 @@ fn process_entries_with_callback(
|
|||
transaction_status_sender,
|
||||
replay_vote_sender,
|
||||
timings,
|
||||
cost_capacity_meter.clone(),
|
||||
)?;
|
||||
batches.clear();
|
||||
}
|
||||
|
@ -334,6 +409,7 @@ fn process_entries_with_callback(
|
|||
transaction_status_sender,
|
||||
replay_vote_sender,
|
||||
timings,
|
||||
cost_capacity_meter,
|
||||
)?;
|
||||
for hash in tick_hashes {
|
||||
bank.register_tick(hash);
|
||||
|
@ -820,6 +896,7 @@ pub fn confirm_slot(
|
|||
|
||||
let mut replay_elapsed = Measure::start("replay_elapsed");
|
||||
let mut execute_timings = ExecuteTimings::default();
|
||||
let cost_capacity_meter = Arc::new(RwLock::new(BlockCostCapacityMeter::default()));
|
||||
// Note: This will shuffle entries' transactions in-place.
|
||||
let process_result = process_entries_with_callback(
|
||||
bank,
|
||||
|
@ -829,6 +906,7 @@ pub fn confirm_slot(
|
|||
transaction_status_sender,
|
||||
replay_vote_sender,
|
||||
&mut execute_timings,
|
||||
cost_capacity_meter,
|
||||
)
|
||||
.map_err(BlockstoreProcessorError::from);
|
||||
replay_elapsed.stop();
|
||||
|
|
|
@ -10,6 +10,7 @@ pub mod block_error;
|
|||
#[macro_use]
|
||||
pub mod blockstore;
|
||||
pub mod ancestor_iterator;
|
||||
pub mod block_cost_limits;
|
||||
pub mod blockstore_db;
|
||||
pub mod blockstore_meta;
|
||||
pub mod blockstore_processor;
|
||||
|
|
|
@ -185,7 +185,7 @@ impl ExecuteTimings {
|
|||
}
|
||||
|
||||
type BankStatusCache = StatusCache<Result<()>>;
|
||||
#[frozen_abi(digest = "HhY4tMP5KZU9fw9VLpMMUikfvNVCLksocZBUKjt8ZjYH")]
|
||||
#[frozen_abi(digest = "9iDANtGXnSv6WK4vc2rvtrhVMHidKeBM9nQxm34nC79C")]
|
||||
pub type BankSlotDelta = SlotDelta<Result<()>>;
|
||||
type TransactionAccountRefCells = Vec<(Pubkey, Rc<RefCell<AccountSharedData>>)>;
|
||||
type TransactionLoaderRefCells = Vec<Vec<(Pubkey, Rc<RefCell<AccountSharedData>>)>>;
|
||||
|
|
|
@ -187,6 +187,10 @@ pub mod stake_merge_with_unmatched_credits_observed {
|
|||
solana_sdk::declare_id!("meRgp4ArRPhD3KtCY9c5yAf2med7mBLsjKTPeVUHqBL");
|
||||
}
|
||||
|
||||
pub mod gate_large_block {
|
||||
solana_sdk::declare_id!("2ry7ygxiYURULZCrypHhveanvP5tzZ4toRwVp89oCNSj");
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
/// Map of feature identifiers to user-visible description
|
||||
pub static ref FEATURE_NAMES: HashMap<Pubkey, &'static str> = [
|
||||
|
@ -228,6 +232,7 @@ lazy_static! {
|
|||
(merge_nonce_error_into_system_error::id(), "merge NonceError into SystemError"),
|
||||
(disable_fees_sysvar::id(), "disable fees sysvar"),
|
||||
(stake_merge_with_unmatched_credits_observed::id(), "allow merging active stakes with unmatched credits_observed #18985"),
|
||||
(gate_large_block::id(), "validator checks block cost against max limit in realtime, reject if exceeds."),
|
||||
/*************** ADD NEW FEATURES HERE ***************/
|
||||
]
|
||||
.iter()
|
||||
|
|
|
@ -99,6 +99,11 @@ pub enum TransactionError {
|
|||
/// Transaction processing left an account with an outstanding borrowed reference
|
||||
#[error("Transaction processing left an account with an outstanding borrowed reference")]
|
||||
AccountBorrowOutstanding,
|
||||
|
||||
#[error(
|
||||
"Transaction could not fit into current block without exceeding the Max Block Cost Limit"
|
||||
)]
|
||||
WouldExceedMaxBlockCostLimit,
|
||||
}
|
||||
|
||||
pub type Result<T> = result::Result<T, TransactionError>;
|
||||
|
|
|
@ -41,6 +41,7 @@ enum TransactionErrorType {
|
|||
SANITIZE_FAILURE = 14;
|
||||
CLUSTER_MAINTENANCE = 15;
|
||||
ACCOUNT_BORROW_OUTSTANDING_TX = 16;
|
||||
WOULD_EXCEED_MAX_BLOCK_COST_LIMIT = 17;
|
||||
}
|
||||
|
||||
message InstructionError {
|
||||
|
|
|
@ -606,6 +606,9 @@ impl From<TransactionError> for tx_by_addr::TransactionError {
|
|||
TransactionError::AccountBorrowOutstanding => {
|
||||
tx_by_addr::TransactionErrorType::AccountBorrowOutstandingTx
|
||||
}
|
||||
TransactionError::WouldExceedMaxBlockCostLimit => {
|
||||
tx_by_addr::TransactionErrorType::WouldExceedMaxBlockCostLimit
|
||||
}
|
||||
} as i32,
|
||||
instruction_error: match transaction_error {
|
||||
TransactionError::InstructionError(index, ref instruction_error) => {
|
||||
|
|
Loading…
Reference in New Issue