From a9509f56b7897b08bcdd16d3a056257a7d396681 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Mei=C3=9Fner?= Date: Thu, 9 Nov 2023 13:10:59 +0100 Subject: [PATCH] Feature - Epoch boundary recompilation phase (#33477) * Adds LoadedPrograms::upcoming_environments. * Moves LoadedPrograms::prune_feature_set_transition() into LoadedPrograms::prune(). * Adds parameter recompile to Bank::load_program(). * Sets latest_root_slot/epoch and environments in Bank::finish_init(). * Removes FEATURES_AFFECTING_RBPF list. * Adjusts test_feature_activation_loaded_programs_recompilation_phase(). --- ledger-tool/src/program.rs | 2 +- program-runtime/src/loaded_programs.rs | 54 +++++---- runtime/src/bank.rs | 160 +++++++++++++++++-------- runtime/src/bank/metrics.rs | 2 + runtime/src/bank/tests.rs | 24 +--- 5 files changed, 151 insertions(+), 91 deletions(-) diff --git a/ledger-tool/src/program.rs b/ledger-tool/src/program.rs index 7420a1f7a..c1a65170a 100644 --- a/ledger-tool/src/program.rs +++ b/ledger-tool/src/program.rs @@ -552,7 +552,7 @@ pub fn program(ledger_path: &Path, matches: &ArgMatches<'_>) { .clone(), ); for key in cached_account_keys { - loaded_programs.replenish(key, bank.load_program(&key, false)); + loaded_programs.replenish(key, bank.load_program(&key, false, None)); debug!("Loaded program {}", key); } invoke_context.programs_loaded_for_tx_batch = &loaded_programs; diff --git a/program-runtime/src/loaded_programs.rs b/program-runtime/src/loaded_programs.rs index 84902f31b..ac16578ac 100644 --- a/program-runtime/src/loaded_programs.rs +++ b/program-runtime/src/loaded_programs.rs @@ -459,6 +459,14 @@ pub struct LoadedPrograms { pub latest_root_epoch: Epoch, /// Environments of the current epoch pub environments: ProgramRuntimeEnvironments, + /// Anticipated replacement for `environments` at the next epoch + /// + /// This is `None` during most of an epoch, and only `Some` around the boundaries (at the end and beginning of an epoch). + /// More precisely, it starts with the recompilation phase a few hundred slots before the epoch boundary, + /// and it ends with the first rerooting after the epoch boundary. + pub upcoming_environments: Option, + /// List of loaded programs which should be recompiled before the next epoch (but don't have to). + pub programs_to_recompile: Vec<(Pubkey, Arc)>, pub stats: Stats, pub fork_graph: Option>>, } @@ -481,6 +489,8 @@ impl Default for LoadedPrograms { latest_root_slot: 0, latest_root_epoch: 0, environments: ProgramRuntimeEnvironments::default(), + upcoming_environments: None, + programs_to_recompile: Vec::default(), stats: Stats::default(), fork_graph: None, } @@ -567,7 +577,12 @@ impl LoadedPrograms { } /// Returns the current environments depending on the given epoch - pub fn get_environments_for_epoch(&self, _epoch: Epoch) -> &ProgramRuntimeEnvironments { + pub fn get_environments_for_epoch(&self, epoch: Epoch) -> &ProgramRuntimeEnvironments { + if epoch != self.latest_root_epoch { + if let Some(upcoming_environments) = self.upcoming_environments.as_ref() { + return upcoming_environments; + } + } &self.environments } @@ -630,22 +645,6 @@ impl LoadedPrograms { entry } - /// On the epoch boundary this removes all programs of the outdated feature set - pub fn prune_feature_set_transition(&mut self) { - for second_level in self.entries.values_mut() { - second_level.retain(|entry| { - if Self::matches_environment(entry, &self.environments) { - return true; - } - self.stats - .prunes_environment - .fetch_add(1, Ordering::Relaxed); - false - }); - } - self.remove_programs_with_no_entries(); - } - pub fn prune_by_deployment_slot(&mut self, slot: Slot) { self.entries.retain(|_key, second_level| { *second_level = second_level @@ -668,6 +667,15 @@ impl LoadedPrograms { error!("Failed to lock fork graph for reading."); return; }; + let mut recompilation_phase_ends = false; + if self.latest_root_epoch != new_root_epoch { + self.latest_root_epoch = new_root_epoch; + if let Some(upcoming_environments) = self.upcoming_environments.take() { + recompilation_phase_ends = true; + self.environments = upcoming_environments; + self.programs_to_recompile.clear(); + } + } for second_level in self.entries.values_mut() { // Remove entries un/re/deployed on orphan forks let mut first_ancestor_found = false; @@ -697,6 +705,15 @@ impl LoadedPrograms { return false; } } + // Remove outdated environment of previous feature set + if recompilation_phase_ends + && !Self::matches_environment(entry, &self.environments) + { + self.stats + .prunes_environment + .fetch_add(1, Ordering::Relaxed); + return false; + } true }) .cloned() @@ -706,9 +723,6 @@ impl LoadedPrograms { self.remove_programs_with_no_entries(); debug_assert!(self.latest_root_slot <= new_root_slot); self.latest_root_slot = new_root_slot; - if self.latest_root_epoch < new_root_epoch { - self.latest_root_epoch = new_root_epoch; - } } fn matches_environment( diff --git a/runtime/src/bank.rs b/runtime/src/bank.rs index 9f3636e65..2b8cbe349 100644 --- a/runtime/src/bank.rs +++ b/runtime/src/bank.rs @@ -103,6 +103,7 @@ use { }, solana_bpf_loader_program::syscalls::create_program_runtime_environment_v1, solana_cost_model::cost_tracker::CostTracker, + solana_loader_v4_program::create_program_runtime_environment_v2, solana_measure::{measure, measure::Measure, measure_us}, solana_perf::perf_libs, solana_program_runtime::{ @@ -1442,11 +1443,10 @@ impl Bank { }); // Following code may touch AccountsDb, requiring proper ancestors - let parent_epoch = parent.epoch(); let (_, update_epoch_time_us) = measure_us!({ - if parent_epoch < new.epoch() { + if parent.epoch() < new.epoch() { new.process_new_epoch( - parent_epoch, + parent.epoch(), parent.slot(), parent.block_height(), reward_calc_tracer, @@ -1461,11 +1461,71 @@ impl Bank { } }); + let (_, recompilation_time_us) = measure_us!({ + // Recompile loaded programs one at a time before the next epoch hits + let (_epoch, slot_index) = new.get_epoch_and_slot_index(new.slot()); + let slots_in_epoch = new.get_slots_in_epoch(new.epoch()); + let slots_in_recompilation_phase = + (solana_program_runtime::loaded_programs::MAX_LOADED_ENTRY_COUNT as u64) + .min(slots_in_epoch) + .checked_div(2) + .unwrap(); + let mut loaded_programs_cache = new.loaded_programs_cache.write().unwrap(); + if loaded_programs_cache.upcoming_environments.is_some() { + if let Some((key, program_to_recompile)) = + loaded_programs_cache.programs_to_recompile.pop() + { + drop(loaded_programs_cache); + let recompiled = new.load_program(&key, false, Some(program_to_recompile)); + let mut loaded_programs_cache = new.loaded_programs_cache.write().unwrap(); + loaded_programs_cache.replenish(key, recompiled); + } + } else if new.epoch() != loaded_programs_cache.latest_root_epoch + || slot_index.saturating_add(slots_in_recompilation_phase) >= slots_in_epoch + { + // Anticipate the upcoming program runtime environment for the next epoch, + // so we can try to recompile loaded programs before the feature transition hits. + drop(loaded_programs_cache); + let (feature_set, _new_feature_activations) = new.compute_active_feature_set(true); + let mut loaded_programs_cache = new.loaded_programs_cache.write().unwrap(); + let program_runtime_environment_v1 = create_program_runtime_environment_v1( + &feature_set, + &new.runtime_config.compute_budget.unwrap_or_default(), + false, /* deployment */ + false, /* debugging_features */ + ) + .unwrap(); + let program_runtime_environment_v2 = create_program_runtime_environment_v2( + &new.runtime_config.compute_budget.unwrap_or_default(), + false, /* debugging_features */ + ); + let mut upcoming_environments = loaded_programs_cache.environments.clone(); + let changed_program_runtime_v1 = + *upcoming_environments.program_runtime_v1 != program_runtime_environment_v1; + let changed_program_runtime_v2 = + *upcoming_environments.program_runtime_v2 != program_runtime_environment_v2; + if changed_program_runtime_v1 { + upcoming_environments.program_runtime_v1 = + Arc::new(program_runtime_environment_v1); + } + if changed_program_runtime_v2 { + upcoming_environments.program_runtime_v2 = + Arc::new(program_runtime_environment_v2); + } + loaded_programs_cache.upcoming_environments = Some(upcoming_environments); + loaded_programs_cache.programs_to_recompile = loaded_programs_cache + .get_entries_sorted_by_tx_usage( + changed_program_runtime_v1, + changed_program_runtime_v2, + ); + } + }); + // Update sysvars before processing transactions let (_, update_sysvars_time_us) = measure_us!({ new.update_slot_hashes(); - new.update_stake_history(Some(parent_epoch)); - new.update_clock(Some(parent_epoch)); + new.update_stake_history(Some(parent.epoch())); + new.update_clock(Some(parent.epoch())); new.update_fees(); new.update_last_restart_slot() }); @@ -1493,6 +1553,7 @@ impl Bank { feature_set_time_us, ancestors_time_us, update_epoch_time_us, + recompilation_time_us, update_sysvars_time_us, fill_sysvar_cache_time_us, }, @@ -4642,16 +4703,25 @@ impl Bank { ProgramAccountLoadResult::InvalidAccountData } - pub fn load_program(&self, pubkey: &Pubkey, reload: bool) -> Arc { + pub fn load_program( + &self, + pubkey: &Pubkey, + reload: bool, + recompile: Option>, + ) -> Arc { let loaded_programs_cache = self.loaded_programs_cache.read().unwrap(); - let environments = loaded_programs_cache.get_environments_for_epoch(self.epoch); - + let effective_epoch = if recompile.is_some() { + loaded_programs_cache.latest_root_epoch.saturating_add(1) + } else { + self.epoch + }; + let environments = loaded_programs_cache.get_environments_for_epoch(effective_epoch); let mut load_program_metrics = LoadProgramMetrics { program_id: pubkey.to_string(), ..LoadProgramMetrics::default() }; - let loaded_program = match self.load_program_accounts(pubkey) { + let mut loaded_program = match self.load_program_accounts(pubkey) { ProgramAccountLoadResult::AccountNotFound => Ok(LoadedProgram::new_tombstone( self.slot, LoadedProgramType::Closed, @@ -4758,6 +4828,16 @@ impl Bank { let mut timings = ExecuteDetailsTimings::default(); load_program_metrics.submit_datapoint(&mut timings); + if let Some(recompile) = recompile { + loaded_program.effective_slot = loaded_program.effective_slot.max( + self.epoch_schedule() + .get_first_slot_in_epoch(effective_epoch), + ); + loaded_program.tx_usage_counter = + AtomicU64::new(recompile.tx_usage_counter.load(Ordering::Relaxed)); + loaded_program.ix_usage_counter = + AtomicU64::new(recompile.ix_usage_counter.load(Ordering::Relaxed)); + } Arc::new(loaded_program) } @@ -5004,7 +5084,7 @@ impl Bank { let missing_programs: Vec<(Pubkey, Arc)> = missing .iter() .map(|(key, count)| { - let program = self.load_program(key, false); + let program = self.load_program(key, false, None); program.tx_usage_counter.store(*count, Ordering::Relaxed); (*key, program) }) @@ -5014,7 +5094,7 @@ impl Bank { let unloaded_programs: Vec<(Pubkey, Arc)> = unloaded .iter() .map(|(key, count)| { - let program = self.load_program(key, true); + let program = self.load_program(key, true, None); program.tx_usage_counter.store(*count, Ordering::Relaxed); (*key, program) }) @@ -6559,6 +6639,24 @@ impl Bank { } } + let mut loaded_programs_cache = self.loaded_programs_cache.write().unwrap(); + loaded_programs_cache.latest_root_slot = self.slot(); + loaded_programs_cache.latest_root_epoch = self.epoch(); + loaded_programs_cache.environments.program_runtime_v1 = Arc::new( + create_program_runtime_environment_v1( + &self.feature_set, + &self.runtime_config.compute_budget.unwrap_or_default(), + false, /* deployment */ + false, /* debugging_features */ + ) + .unwrap(), + ); + loaded_programs_cache.environments.program_runtime_v2 = + Arc::new(create_program_runtime_environment_v2( + &self.runtime_config.compute_budget.unwrap_or_default(), + false, /* debugging_features */ + )); + if self .feature_set .is_active(&feature_set::cap_accounts_data_len::id()) @@ -7924,46 +8022,6 @@ impl Bank { only_apply_transitions_for_new_features: bool, new_feature_activations: &HashSet, ) { - const FEATURES_AFFECTING_RBPF: &[Pubkey] = &[ - feature_set::error_on_syscall_bpf_function_hash_collisions::id(), - feature_set::reject_callx_r10::id(), - feature_set::switch_to_new_elf_parser::id(), - feature_set::bpf_account_data_direct_mapping::id(), - feature_set::enable_alt_bn128_syscall::id(), - feature_set::enable_alt_bn128_compression_syscall::id(), - feature_set::enable_big_mod_exp_syscall::id(), - feature_set::blake3_syscall_enabled::id(), - feature_set::curve25519_syscall_enabled::id(), - feature_set::disable_fees_sysvar::id(), - feature_set::enable_partitioned_epoch_reward::id(), - feature_set::disable_deploy_of_alloc_free_syscall::id(), - feature_set::last_restart_slot_sysvar::id(), - feature_set::remaining_compute_units_syscall_enabled::id(), - ]; - if !only_apply_transitions_for_new_features - || FEATURES_AFFECTING_RBPF - .iter() - .any(|key| new_feature_activations.contains(key)) - { - let program_runtime_environment_v1 = create_program_runtime_environment_v1( - &self.feature_set, - &self.runtime_config.compute_budget.unwrap_or_default(), - false, /* deployment */ - false, /* debugging_features */ - ) - .unwrap(); - let mut loaded_programs_cache = self.loaded_programs_cache.write().unwrap(); - loaded_programs_cache.environments.program_runtime_v1 = - Arc::new(program_runtime_environment_v1); - let program_runtime_environment_v2 = - solana_loader_v4_program::create_program_runtime_environment_v2( - &self.runtime_config.compute_budget.unwrap_or_default(), - false, /* debugging_features */ - ); - loaded_programs_cache.environments.program_runtime_v2 = - Arc::new(program_runtime_environment_v2); - loaded_programs_cache.prune_feature_set_transition(); - } for builtin in BUILTINS.iter() { if let Some(feature_id) = builtin.feature_id { let should_apply_action_for_feature_transition = diff --git a/runtime/src/bank/metrics.rs b/runtime/src/bank/metrics.rs index 1fa33b2e7..ccf8c4837 100644 --- a/runtime/src/bank/metrics.rs +++ b/runtime/src/bank/metrics.rs @@ -39,6 +39,7 @@ pub(crate) struct NewBankTimings { pub(crate) feature_set_time_us: u64, pub(crate) ancestors_time_us: u64, pub(crate) update_epoch_time_us: u64, + pub(crate) recompilation_time_us: u64, pub(crate) update_sysvars_time_us: u64, pub(crate) fill_sysvar_cache_time_us: u64, } @@ -144,6 +145,7 @@ pub(crate) fn report_new_bank_metrics( ("feature_set_us", timings.feature_set_time_us, i64), ("ancestors_us", timings.ancestors_time_us, i64), ("update_epoch_us", timings.update_epoch_time_us, i64), + ("recompilation_time_us", timings.recompilation_time_us, i64), ("update_sysvars_us", timings.update_sysvars_time_us, i64), ( "fill_sysvar_cache_us", diff --git a/runtime/src/bank/tests.rs b/runtime/src/bank/tests.rs index 1f4ed9d8b..cddac40fe 100644 --- a/runtime/src/bank/tests.rs +++ b/runtime/src/bank/tests.rs @@ -7,9 +7,7 @@ use { *, }, crate::{ - accounts_background_service::{ - AbsRequestSender, PrunedBanksRequestHandler, SendDroppedBankCallback, - }, + accounts_background_service::{PrunedBanksRequestHandler, SendDroppedBankCallback}, bank_client::BankClient, bank_forks::BankForks, epoch_rewards_hasher::hash_rewards_into_partitions, @@ -6990,7 +6988,7 @@ fn test_bank_load_program() { programdata_account.set_rent_epoch(1); bank.store_account_and_update_capitalization(&key1, &program_account); bank.store_account_and_update_capitalization(&programdata_key, &programdata_account); - let program = bank.load_program(&key1, false); + let program = bank.load_program(&key1, false, None); assert_matches!(program.program, LoadedProgramType::LegacyV1(_)); assert_eq!( program.account_size, @@ -7145,7 +7143,7 @@ fn test_bpf_loader_upgradeable_deploy_with_max_len() { assert_eq!(*elf.get(i).unwrap(), *byte); } - let loaded_program = bank.load_program(&program_keypair.pubkey(), false); + let loaded_program = bank.load_program(&program_keypair.pubkey(), false, None); // Invoke deployed program mock_process_instruction( @@ -11903,7 +11901,7 @@ fn test_is_in_slot_hashes_history() { } #[test] -fn test_runtime_feature_enable_with_program_cache() { +fn test_feature_activation_loaded_programs_recompilation_phase() { solana_logger::setup(); // Bank Setup @@ -11969,20 +11967,8 @@ fn test_runtime_feature_enable_with_program_cache() { &feature::create_account(&Feature { activated_at: None }, feature_account_balance), ); - // Reroot to call LoadedPrograms::prune() and end the current recompilation phase goto_end_of_slot(bank.clone()); - bank_forks - .write() - .unwrap() - .insert(Arc::into_inner(bank).unwrap()); - let bank = bank_forks.read().unwrap().working_bank(); - bank_forks.read().unwrap().prune_program_cache(bank.slot); - bank_forks - .write() - .unwrap() - .set_root(bank.slot, &AbsRequestSender::default(), None); - - // Advance to next epoch, which starts the next recompilation phase + // Advance to next epoch, which starts the recompilation phase let bank = new_from_parent_next_epoch(bank, 1); // Execute after feature is enabled to check it was filtered out and reverified.