excludes epoch-slots from nodes with unknown or different shred version (#17899)

Inspecting TDS gossip table shows that crds values of nodes with
different shred-versions are creeping in. Their epoch-slots are
accumulated in ClusterSlots causing bogus slots very far from current
root which are not purged and so cause ClusterSlots keep consuming more
memory:
https://github.com/solana-labs/solana/issues/17789
https://github.com/solana-labs/solana/issues/14366#issuecomment-769896036
https://github.com/solana-labs/solana/issues/14366#issuecomment-832754654

This commit updates ClusterInfo::get_epoch_slots, and discards entries
from nodes with unknown or different shred-version.

Follow up commits will patch gossip not to waste bandwidth and memory
over crds values of nodes with different shred-version.
This commit is contained in:
behzad nouri 2021-06-13 14:08:08 +00:00 committed by GitHub
parent 361c1bdd57
commit 985280ec0b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 46 additions and 0 deletions

View File

@ -1170,10 +1170,19 @@ impl ClusterInfo {
.map(map)
}
/// Returns epoch-slots inserted since the given cursor.
/// Excludes entries from nodes with unkown or different shred version.
pub fn get_epoch_slots(&self, cursor: &mut Cursor) -> Vec<EpochSlots> {
let self_shred_version = self.my_shred_version();
let gossip = self.gossip.read().unwrap();
let entries = gossip.crds.get_epoch_slots(cursor);
entries
.filter(
|entry| match gossip.crds.get_contact_info(entry.value.pubkey()) {
Some(node) => node.shred_version == self_shred_version,
None => false,
},
)
.map(|entry| match &entry.value.data {
CrdsData::EpochSlots(_, slots) => slots.clone(),
_ => panic!("this should not happen!"),
@ -3839,6 +3848,43 @@ mod tests {
let slots = cluster_info.get_epoch_slots(&mut cursor);
assert!(slots.is_empty());
// Test with different shred versions.
let mut rng = rand::thread_rng();
let node_pubkey = Pubkey::new_unique();
let mut node = ContactInfo::new_rand(&mut rng, Some(node_pubkey));
node.shred_version = 42;
let epoch_slots = EpochSlots::new_rand(&mut rng, Some(node_pubkey));
let entries = vec![
CrdsValue::new_unsigned(CrdsData::ContactInfo(node)),
CrdsValue::new_unsigned(CrdsData::EpochSlots(0, epoch_slots)),
];
{
let mut gossip = cluster_info.gossip.write().unwrap();
for entry in entries {
assert!(gossip.crds.insert(entry, /*now=*/ 0).is_ok());
}
}
// Should exclude other node's epoch-slot because of different
// shred-version.
let slots = cluster_info.get_epoch_slots(&mut Cursor::default());
assert_eq!(slots.len(), 1);
assert_eq!(slots[0].from, cluster_info.id);
// Match shred versions.
{
let mut node = cluster_info.my_contact_info.write().unwrap();
node.shred_version = 42;
}
cluster_info.push_self(
&HashMap::default(), // stakes
None, // gossip validators
);
cluster_info.flush_push_queue();
// Should now include both epoch slots.
let slots = cluster_info.get_epoch_slots(&mut Cursor::default());
assert_eq!(slots.len(), 2);
assert_eq!(slots[0].from, cluster_info.id);
assert_eq!(slots[1].from, node_pubkey);
}
#[test]