validator restart

This commit is contained in:
Sathish Ambley 2019-06-02 14:05:32 -07:00 committed by Michael Vines
parent bd633d2b81
commit dc5c6e7cf8
4 changed files with 72 additions and 18 deletions

View File

@ -3,6 +3,7 @@
use bincode::{deserialize_from, serialize_into};
use solana_metrics::inc_new_counter_info;
use solana_runtime::bank::{Bank, BankRc, StatusCacheRc};
use solana_sdk::genesis_block::GenesisBlock;
use solana_sdk::timing;
use std::collections::{HashMap, HashSet};
use std::fs;
@ -224,6 +225,7 @@ impl BankForks {
}
fn load_snapshots(
genesis_block: &GenesisBlock,
names: &[u64],
bank_maps: &mut Vec<(u64, u64, Bank)>,
status_cache_rc: &StatusCacheRc,
@ -232,9 +234,11 @@ impl BankForks {
let path = BankForks::get_snapshot_path(snapshot_path);
let mut bank_rc: Option<(BankRc, u64)> = None;
println!("names: {:?}", names);
for bank_slot in names.iter().rev() {
let bank_path = format!("{}", bank_slot);
let bank_file_path = path.join(bank_path.clone());
println!("Load from {:?}", bank_file_path);
info!("Load from {:?}", bank_file_path);
let file = File::open(bank_file_path);
if file.is_err() {
@ -274,6 +278,9 @@ impl BankForks {
warn!("Load snapshot rc failed for {}", bank_slot);
}
}
let bank0 = Bank::new(&genesis_block);
bank0.freeze();
bank_maps.push((0, 0, bank0));
bank_rc
}
@ -310,7 +317,7 @@ impl BankForks {
(banks, slots, last_slot)
}
pub fn load_from_snapshot(snapshot_path: &Option<String>) -> Result<Self, Error> {
pub fn load_from_snapshot(genesis_block: &GenesisBlock, snapshot_path: &Option<String>) -> Result<Self, Error> {
let path = BankForks::get_snapshot_path(snapshot_path);
let paths = fs::read_dir(path)?;
let mut names = paths
@ -323,10 +330,13 @@ impl BankForks {
})
.collect::<Vec<u64>>();
println!("names before: {:?}", names);
// names.retain(|&x| x != 0);
println!("names after : {:?}", names);
names.sort();
let mut bank_maps = vec![];
let status_cache_rc = StatusCacheRc::default();
let rc = BankForks::load_snapshots(&names, &mut bank_maps, &status_cache_rc, snapshot_path);
let rc = BankForks::load_snapshots(&genesis_block, &names, &mut bank_maps, &status_cache_rc, snapshot_path);
if bank_maps.is_empty() || rc.is_none() {
BankForks::remove_snapshot(0, snapshot_path);
return Err(Error::new(ErrorKind::Other, "no snapshots loaded"));
@ -476,12 +486,14 @@ mod tests {
}
}
fn restore_from_snapshot(bank_forks: BankForks, last_slot: u64) {
let new = BankForks::load_from_snapshot(&bank_forks.snapshot_path).unwrap();
fn restore_from_snapshot(genesis_block: &GenesisBlock, bank_forks: BankForks, last_slot: u64) {
let new = BankForks::load_from_snapshot(&genesis_block, &bank_forks.snapshot_path).unwrap();
for (slot, _) in new.banks.iter() {
let bank = bank_forks.banks.get(slot).unwrap().clone();
let new_bank = new.banks.get(slot).unwrap();
bank.compare_bank(&new_bank);
if *slot > 0 {
let bank = bank_forks.banks.get(slot).unwrap().clone();
let new_bank = new.banks.get(slot).unwrap();
bank.compare_bank(&new_bank);
}
}
assert_eq!(new.working_bank().slot(), last_slot);
for (slot, _) in new.banks.iter() {
@ -519,9 +531,10 @@ mod tests {
bank.freeze();
let slot = bank.slot();
bank_forks.insert(bank);
println!("add snapshot {}", slot);
bank_forks.add_snapshot(slot, 0).unwrap();
}
restore_from_snapshot(bank_forks, index);
restore_from_snapshot(&genesis_block, bank_forks, index);
}
}
}

View File

@ -298,7 +298,7 @@ fn get_bank_forks(
snapshot_path: Option<String>,
) -> (BankForks, Vec<BankForksInfo>, LeaderScheduleCache) {
if snapshot_path.is_some() {
let bank_forks = BankForks::load_from_snapshot(&snapshot_path);
let bank_forks = BankForks::load_from_snapshot(&genesis_block, &snapshot_path);
match bank_forks {
Ok(v) => {
let bank = &v.working_bank();

View File

@ -316,7 +316,11 @@ elif [[ $node_type = bootstrap_leader ]]; then
ledger_config_dir="$SOLANA_CONFIG_DIR"/bootstrap-leader-ledger
accounts_config_dir="$SOLANA_CONFIG_DIR"/bootstrap-leader-accounts
storage_keypair_path=$SOLANA_CONFIG_DIR/bootstrap-leader-storage-keypair.json
<<<<<<< HEAD
configured_flag=$SOLANA_CONFIG_DIR/bootstrap-leader.configured
=======
snapshot_config_dir="$SOLANA_CONFIG_DIR"/bootstrap-leader-snapshots
>>>>>>> validator restart
default_arg --rpc-port 8899
if ((airdrops_enabled)); then
@ -338,7 +342,11 @@ elif [[ $node_type = validator ]]; then
storage_keypair_path=$SOLANA_CONFIG_DIR/validator-storage-keypair$label.json
ledger_config_dir=$SOLANA_CONFIG_DIR/validator-ledger$label
accounts_config_dir=$SOLANA_CONFIG_DIR/validator-accounts$label
<<<<<<< HEAD
configured_flag=$SOLANA_CONFIG_DIR/validator$label.configured
=======
snapshot_config_dir="$SOLANA_CONFIG_DIR"/validator-snapshots$label
>>>>>>> validator restart
mkdir -p "$SOLANA_CONFIG_DIR"
[[ -r "$identity_keypair_path" ]] || $solana_keygen new -o "$identity_keypair_path"
@ -370,6 +378,7 @@ vote pubkey: $vote_pubkey
storage pubkey: $storage_pubkey
ledger: $ledger_config_dir
accounts: $accounts_config_dir
snapshots: $snapshot_config_dir
========================================================================
EOF
@ -379,6 +388,7 @@ EOF
default_arg --storage-keypair "$storage_keypair_path"
default_arg --ledger "$ledger_config_dir"
default_arg --accounts "$accounts_config_dir"
default_arg --snapshot-path "$snapshot_config_dir"
if [[ -n $SOLANA_CUDA ]]; then
program=$solana_validator_cuda
@ -403,7 +413,12 @@ while true; do
if [[ $node_type = bootstrap_leader ]]; then
ledger_not_setup "$SOLANA_RSYNC_CONFIG_DIR/ledger does not exist"
fi
$rsync -vPr "${rsync_entrypoint_url:?}"/config/ledger "$SOLANA_RSYNC_CONFIG_DIR"
(
set -x
$rsync -qvPr "${rsync_entrypoint_url:?}"/config/ledger "$SOLANA_RSYNC_CONFIG_DIR"
$rsync -vqPr "${rsync_entrypoint_url:?}"/config/snapshots "$SOLANA_RSYNC_CONFIG_DIR"
$rsync -vqPr "${rsync_entrypoint_url:?}"/config/accounts "$SOLANA_RSYNC_CONFIG_DIR"
) || true
fi
if new_gensis_block; then
@ -411,14 +426,23 @@ while true; do
# keypair for the node and start all over again
(
set -x
rm -rf "$ledger_config_dir" "$accounts_config_dir" "$configured_flag"
rm -rf "$ledger_config_dir" "$accounts_config_dir" "$snapshot_config_dir" "$configured_flag"
)
fi
if [[ ! -d "$ledger_config_dir" ]]; then
cp -a "$SOLANA_RSYNC_CONFIG_DIR"/ledger/ "$ledger_config_dir"
$solana_ledger_tool --ledger "$ledger_config_dir" verify
fi
(
set -x
if [[ -d "$SOLANA_RSYNC_CONFIG_DIR"/snapshots ]]; then
if [[ ! -d $snapshot_config_dir ]]; then
cp -a "$SOLANA_RSYNC_CONFIG_DIR"/snapshots/ "$snapshot_config_dir"
cp -a "$SOLANA_RSYNC_CONFIG_DIR"/accounts/ "$accounts_config_dir"
fi
fi
if [[ ! -d "$ledger_config_dir" ]]; then
cp -a "$SOLANA_RSYNC_CONFIG_DIR"/ledger/ "$ledger_config_dir"
$solana_ledger_tool --ledger "$ledger_config_dir" verify
fi
)
trap '[[ -n $pid ]] && kill "$pid" >/dev/null 2>&1 && wait "$pid"' INT TERM ERR
@ -450,9 +474,25 @@ while true; do
fi
if [[ $node_type = bootstrap_leader ]]; then
wait "$pid" || true
echo "############## $node_type exited, restarting ##############"
sleep 1
secs_to_next_sync_poll=30
while true; do
if ! kill -0 "$pid"; then
wait "$pid"
exit 0
fi
sleep 1
((secs_to_next_sync_poll--)) && continue
(
if [[ -d $snapshot_config_dir ]]; then
$rsync -qrt --delete-after "$snapshot_config_dir"/ "$SOLANA_RSYNC_CONFIG_DIR"/snapshots
$rsync -qrt --delete-after "$accounts_config_dir"/ "$SOLANA_RSYNC_CONFIG_DIR"/accounts
# $rsync -qrt --delete-after "$ledger_config_dir"/ "$SOLANA_RSYNC_CONFIG_DIR"/ledger
fi
) || true
secs_to_next_sync_poll=30
done
else
secs_to_next_genesis_poll=1
while true; do

View File

@ -190,6 +190,7 @@ impl Accounts {
Some(program) => program,
None => {
error_counters.account_not_found += 1;
info!("ancestors {:?}, accouts index {:?}, id {:?}", ancestors, accounts_index, program_id);
return Err(TransactionError::ProgramAccountNotFound);
}
};