From 811d384bd45f391b082e1ff73230c6da74d55fcb Mon Sep 17 00:00:00 2001 From: Kris Nuttycombe Date: Wed, 15 May 2024 14:36:27 -0600 Subject: [PATCH 1/7] shardtree: Improve tracing --- shardtree/src/lib.rs | 33 +++++++++++++++++++++++--------- shardtree/src/prunable.rs | 40 +++++++++++++++++++++++++-------------- shardtree/src/tree.rs | 21 ++++++++++---------- 3 files changed, 61 insertions(+), 33 deletions(-) diff --git a/shardtree/src/lib.rs b/shardtree/src/lib.rs index 1f4510a..6f1e213 100644 --- a/shardtree/src/lib.rs +++ b/shardtree/src/lib.rs @@ -27,7 +27,7 @@ use either::Either; use incrementalmerkletree::frontier::Frontier; use std::collections::{BTreeMap, BTreeSet}; use std::sync::Arc; -use tracing::trace; +use tracing::{debug, trace}; use incrementalmerkletree::{ frontier::NonEmptyFrontier, Address, Hashable, Level, MerklePath, Position, Retention, @@ -271,6 +271,7 @@ impl< /// /// This method may be used to add a checkpoint for the empty tree; note that /// [`Retention::Marked`] is invalid for the empty tree. + #[tracing::instrument(skip(self, frontier, leaf_retention))] pub fn insert_frontier( &mut self, frontier: Frontier, @@ -301,6 +302,7 @@ impl< /// Add the leaf and ommers of the provided frontier as nodes within the subtree corresponding /// to the frontier's position, and update the cap to include the ommer nodes at levels greater /// than or equal to the shard height. + #[tracing::instrument(skip(self, frontier, leaf_retention))] pub fn insert_frontier_nodes( &mut self, frontier: NonEmptyFrontier, @@ -308,15 +310,24 @@ impl< ) -> Result<(), ShardTreeError> { let leaf_position = frontier.position(); let subtree_root_addr = Address::above_position(Self::subtree_level(), leaf_position); - trace!("Subtree containing nodes: {:?}", subtree_root_addr); - let (updated_subtree, supertree) = self + let current_shard = self .store .get_shard(subtree_root_addr) .map_err(ShardTreeError::Storage)? - .unwrap_or_else(|| LocatedTree::empty(subtree_root_addr)) - .insert_frontier_nodes(frontier, &leaf_retention)?; + .unwrap_or_else(|| LocatedTree::empty(subtree_root_addr)); + trace!( + max_position = ?current_shard.max_position(), + subtree = ?current_shard, + "Current shard"); + let (updated_subtree, supertree) = + current_shard.insert_frontier_nodes(frontier, &leaf_retention)?; + trace!( + max_position = ?updated_subtree.max_position(), + subtree = ?updated_subtree, + "Replacement shard" + ); self.store .put_shard(updated_subtree) .map_err(ShardTreeError::Storage)?; @@ -346,6 +357,7 @@ impl< /// Insert a tree by decomposing it into its `SHARD_HEIGHT` or smaller parts (if necessary) /// and inserting those at their appropriate locations. + #[tracing::instrument(skip(self, tree, checkpoints))] pub fn insert_tree( &mut self, tree: LocatedPrunableTree, @@ -359,6 +371,7 @@ impl< // for some inputs, and given that it is always correct to not insert an empty // subtree into `self`, we maintain the invariant by skipping empty subtrees. if subtree.root().is_empty() { + debug!("Subtree with root {:?} is empty.", subtree.root_addr); continue; } @@ -368,14 +381,15 @@ impl< let root_addr = Self::subtree_addr(subtree.root_addr.position_range_start()); let contains_marked = subtree.root.contains_marked(); - let (new_subtree, mut incomplete) = self + let current_shard = self .store .get_shard(root_addr) .map_err(ShardTreeError::Storage)? - .unwrap_or_else(|| LocatedTree::empty(root_addr)) - .insert_subtree(subtree, contains_marked)?; + .unwrap_or_else(|| LocatedTree::empty(root_addr)); + let (replacement_shard, mut incomplete) = + current_shard.insert_subtree(subtree, contains_marked)?; self.store - .put_shard(new_subtree) + .put_shard(replacement_shard) .map_err(ShardTreeError::Storage)?; all_incomplete.append(&mut incomplete); } @@ -478,6 +492,7 @@ impl< Ok(true) } + #[tracing::instrument(skip(self))] fn prune_excess_checkpoints(&mut self) -> Result<(), ShardTreeError> { let checkpoint_count = self .store diff --git a/shardtree/src/prunable.rs b/shardtree/src/prunable.rs index 673a801..ebe27cf 100644 --- a/shardtree/src/prunable.rs +++ b/shardtree/src/prunable.rs @@ -7,7 +7,7 @@ use bitflags::bitflags; use incrementalmerkletree::{ frontier::NonEmptyFrontier, Address, Hashable, Level, Position, Retention, }; -use tracing::trace; +use tracing::{trace, warn}; use crate::error::{InsertionError, QueryError}; use crate::{LocatedTree, Node, Tree}; @@ -206,6 +206,7 @@ impl PrunableTree { /// The merge operation is checked to be strictly additive and returns an error if merging /// would cause information loss or if a conflict between root hashes occurs at a node. The /// returned error contains the address of the node where such a conflict occurred. + #[tracing::instrument()] pub fn merge_checked(self, root_addr: Address, other: Self) -> Result { #[allow(clippy::type_complexity)] fn go( @@ -279,12 +280,11 @@ impl PrunableTree { } } - trace!(this = ?self, other = ?other, "Merging subtrees"); go(root_addr, self, other) } /// Unite two nodes by either constructing a new parent node, or, if both nodes are ephemeral - /// leaves or Nil, constructing a replacement root by hashing leaf values together (or a + /// leaves or Nil, constructing a replacement parent by hashing leaf values together (or a /// replacement `Nil` value). /// /// `level` must be the level of the two nodes that are being joined. @@ -579,9 +579,10 @@ impl LocatedPrunableTree { }; trace!( - "Node at {:?} contains subtree at {:?}", - root_addr, - subtree.root_addr(), + root_addr = ?root_addr, + max_position = ?LocatedTree::max_position_internal(root_addr, into), + to_insert = ?subtree.root_addr(), + "Subtree insert" ); match into { Tree(Node::Nil) => Ok(replacement(None, subtree)), @@ -656,17 +657,28 @@ impl LocatedPrunableTree { } } + let max_position = self.max_position(); + trace!( + max_position = ?max_position, + tree = ?self, + subtree_complete = subtree.root.is_complete(), + to_insert = ?subtree, + "Current shard" + ); let LocatedTree { root_addr, root } = self; if root_addr.contains(&subtree.root_addr) { let complete = subtree.root.is_complete(); go(*root_addr, root, subtree, complete, contains_marked).map(|(root, incomplete)| { - ( - LocatedTree { - root_addr: *root_addr, - root, - }, - incomplete, - ) + let new_tree = LocatedTree { + root_addr: *root_addr, + root, + }; + trace!( + max_position = ?new_tree.max_position(), + "Replacement shard" + ); + assert!(new_tree.max_position() >= max_position); + (new_tree, incomplete) }) } else { Err(InsertionError::NotContained(subtree.root_addr)) @@ -835,7 +847,7 @@ impl LocatedPrunableTree { let p = to_clear.partition_point(|(p, _)| p < &l_addr.position_range_end()); trace!( - "In {:?}, partitioned: {:?} {:?}", + "Tree::unite at {:?}, partitioned: {:?} {:?}", root_addr, &to_clear[0..p], &to_clear[p..], diff --git a/shardtree/src/tree.rs b/shardtree/src/tree.rs index 5a87eae..54ad57e 100644 --- a/shardtree/src/tree.rs +++ b/shardtree/src/tree.rs @@ -202,18 +202,19 @@ impl LocatedTree { /// Note that no actual leaf value may exist at this position, as it may have previously been /// pruned. pub fn max_position(&self) -> Option { - fn go(addr: Address, root: &Tree) -> Option { - match &root.0 { - Node::Nil => None, - Node::Leaf { .. } => Some(addr.position_range_end() - 1), - Node::Parent { left, right, .. } => { - let (l_addr, r_addr) = addr.children().unwrap(); - go(r_addr, right.as_ref()).or_else(|| go(l_addr, left.as_ref())) - } + Self::max_position_internal(self.root_addr, &self.root) + } + + pub(crate) fn max_position_internal(addr: Address, root: &Tree) -> Option { + match &root.0 { + Node::Nil => None, + Node::Leaf { .. } => Some(addr.position_range_end() - 1), + Node::Parent { left, right, .. } => { + let (l_addr, r_addr) = addr.children().unwrap(); + Self::max_position_internal(r_addr, right.as_ref()) + .or_else(|| Self::max_position_internal(l_addr, left.as_ref())) } } - - go(self.root_addr, &self.root) } /// Returns the value at the specified position, if any. From 57b6e8999fe467a7a63145e6f9a5c20509aeef20 Mon Sep 17 00:00:00 2001 From: Kris Nuttycombe Date: Wed, 15 May 2024 14:36:27 -0600 Subject: [PATCH 2/7] shardtree: `Nil` nodes cannot replace any other sort of node in the tree. --- shardtree/src/prunable.rs | 195 +++++++++++++++++++------------------- 1 file changed, 100 insertions(+), 95 deletions(-) diff --git a/shardtree/src/prunable.rs b/shardtree/src/prunable.rs index ebe27cf..1054c50 100644 --- a/shardtree/src/prunable.rs +++ b/shardtree/src/prunable.rs @@ -547,111 +547,116 @@ impl LocatedPrunableTree { is_complete: bool, contains_marked: bool, ) -> Result<(PrunableTree, Vec), InsertionError> { - // In the case that we are replacing a node entirely, we need to extend the - // subtree up to the level of the node being replaced, adding Nil siblings - // and recording the presence of those incomplete nodes when necessary - let replacement = |ann: Option>, mut node: LocatedPrunableTree| { - // construct the replacement node bottom-up - let mut incomplete = vec![]; - while node.root_addr.level() < root_addr.level() { - incomplete.push(IncompleteAt { - address: node.root_addr.sibling(), - required_for_witness: contains_marked, - }); - node = LocatedTree { - root_addr: node.root_addr.parent(), - root: if node.root_addr.is_right_child() { - Tree(Node::Parent { - ann: None, - left: Arc::new(Tree(Node::Nil)), - right: Arc::new(node.root), - }) - } else { - Tree(Node::Parent { - ann: None, - left: Arc::new(node.root), - right: Arc::new(Tree(Node::Nil)), - }) - }, - }; - } - (node.root.reannotate_root(ann), incomplete) - }; - trace!( root_addr = ?root_addr, max_position = ?LocatedTree::max_position_internal(root_addr, into), to_insert = ?subtree.root_addr(), "Subtree insert" ); - match into { - Tree(Node::Nil) => Ok(replacement(None, subtree)), - Tree(Node::Leaf { value: (value, _) }) => { - if root_addr == subtree.root_addr { - if is_complete { - // It is safe to replace the existing root unannotated, because we - // can always recompute the root from a complete subtree. - Ok((subtree.root, vec![])) - } else if subtree.root.node_value().iter().all(|v| v == &value) { + // An empty tree cannot replace any other type of tree. + if subtree.root().is_nil() { + Ok((into.clone(), vec![])) + } else { + // In the case that we are replacing a node entirely, we need to extend the + // subtree up to the level of the node being replaced, adding Nil siblings + // and recording the presence of those incomplete nodes when necessary + let replacement = |ann: Option>, mut node: LocatedPrunableTree| { + // construct the replacement node bottom-up + let mut incomplete = vec![]; + while node.root_addr.level() < root_addr.level() { + incomplete.push(IncompleteAt { + address: node.root_addr.sibling(), + required_for_witness: contains_marked, + }); + node = LocatedTree { + root_addr: node.root_addr.parent(), + root: if node.root_addr.is_right_child() { + Tree(Node::Parent { + ann: None, + left: Arc::new(Tree(Node::Nil)), + right: Arc::new(node.root), + }) + } else { + Tree(Node::Parent { + ann: None, + left: Arc::new(node.root), + right: Arc::new(Tree(Node::Nil)), + }) + }, + }; + } + (node.root.reannotate_root(ann), incomplete) + }; + + match into { + Tree(Node::Nil) => Ok(replacement(None, subtree)), + Tree(Node::Leaf { value: (value, _) }) => { + if root_addr == subtree.root_addr { + if is_complete { + // It is safe to replace the existing root unannotated, because we + // can always recompute the root from a complete subtree. + Ok((subtree.root, vec![])) + } else if subtree.root.node_value().iter().all(|v| v == &value) { + Ok(( + // at this point we statically know the root to be a parent + subtree.root.reannotate_root(Some(Arc::new(value.clone()))), + vec![], + )) + } else { + warn!( + cur_root = ?value, + new_root = ?subtree.root.node_value(), + "Insertion conflict", + ); + Err(InsertionError::Conflict(root_addr)) + } + } else { + Ok(replacement(Some(Arc::new(value.clone())), subtree)) + } + } + parent if root_addr == subtree.root_addr => { + // Merge the existing subtree with the subtree being inserted. + // A merge operation can't introduce any new incomplete roots. + parent + .clone() + .merge_checked(root_addr, subtree.root) + .map_err(InsertionError::Conflict) + .map(|tree| (tree, vec![])) + } + Tree(Node::Parent { ann, left, right }) => { + // In this case, we have an existing parent but we need to dig down farther + // before we can insert the subtree that we're carrying for insertion. + let (l_addr, r_addr) = root_addr.children().unwrap(); + if l_addr.contains(&subtree.root_addr) { + let (new_left, incomplete) = + go(l_addr, left.as_ref(), subtree, is_complete, contains_marked)?; Ok(( - // at this point we statically know the root to be a parent - subtree.root.reannotate_root(Some(Arc::new(value.clone()))), - vec![], + Tree::unite( + root_addr.level() - 1, + ann.clone(), + new_left, + right.as_ref().clone(), + ), + incomplete, )) } else { - trace!( - cur_root = ?value, - new_root = ?subtree.root.node_value(), - "Insertion conflict", - ); - Err(InsertionError::Conflict(root_addr)) + let (new_right, incomplete) = go( + r_addr, + right.as_ref(), + subtree, + is_complete, + contains_marked, + )?; + Ok(( + Tree::unite( + root_addr.level() - 1, + ann.clone(), + left.as_ref().clone(), + new_right, + ), + incomplete, + )) } - } else { - Ok(replacement(Some(Arc::new(value.clone())), subtree)) - } - } - parent if root_addr == subtree.root_addr => { - // Merge the existing subtree with the subtree being inserted. - // A merge operation can't introduce any new incomplete roots. - parent - .clone() - .merge_checked(root_addr, subtree.root) - .map_err(InsertionError::Conflict) - .map(|tree| (tree, vec![])) - } - Tree(Node::Parent { ann, left, right }) => { - // In this case, we have an existing parent but we need to dig down farther - // before we can insert the subtree that we're carrying for insertion. - let (l_addr, r_addr) = root_addr.children().unwrap(); - if l_addr.contains(&subtree.root_addr) { - let (new_left, incomplete) = - go(l_addr, left.as_ref(), subtree, is_complete, contains_marked)?; - Ok(( - Tree::unite( - root_addr.level() - 1, - ann.clone(), - new_left, - right.as_ref().clone(), - ), - incomplete, - )) - } else { - let (new_right, incomplete) = go( - r_addr, - right.as_ref(), - subtree, - is_complete, - contains_marked, - )?; - Ok(( - Tree::unite( - root_addr.level() - 1, - ann.clone(), - left.as_ref().clone(), - new_right, - ), - incomplete, - )) } } } From e55ff2d7f2a9b135567d7647b91e9e2e9817ff08 Mon Sep 17 00:00:00 2001 From: Kris Nuttycombe Date: Mon, 20 May 2024 11:14:40 -0600 Subject: [PATCH 3/7] incrementalmerkletree: add `Reference` retention type. This new retention type is intended to be used when inserting frontiers that should not automatically be pruned. Also, improve documentation for the `Retention` type. --- incrementalmerkletree/CHANGELOG.md | 9 ++++ incrementalmerkletree/src/lib.rs | 49 ++++++++++++++++--- incrementalmerkletree/src/testing.rs | 42 ++++++++++------ .../src/testing/complete_tree.rs | 7 +-- shardtree/src/legacy.rs | 6 ++- shardtree/src/lib.rs | 20 ++++---- shardtree/src/prunable.rs | 24 ++++++--- shardtree/src/store/caching.rs | 28 +++++------ shardtree/src/testing.rs | 21 +++++--- 9 files changed, 143 insertions(+), 63 deletions(-) diff --git a/incrementalmerkletree/CHANGELOG.md b/incrementalmerkletree/CHANGELOG.md index 81e5fd3..efa8906 100644 --- a/incrementalmerkletree/CHANGELOG.md +++ b/incrementalmerkletree/CHANGELOG.md @@ -7,6 +7,15 @@ and this project adheres to Rust's notion of ## Unreleased +### Added +- `incrementalmerkletree::Marking` + +### Changed +- `incrementalmerkletree::Retention` + - Has added variant `Retention::Reference` + - `Retention::Checkpoint::is_marked` has been replaced by `Retention::Checkpoint::marking` + to permit checkpoints with `Reference` retention to be represented. + ## [0.5.1] - 2024-03-25 ### Added diff --git a/incrementalmerkletree/src/lib.rs b/incrementalmerkletree/src/lib.rs index 2391dba..a300c67 100644 --- a/incrementalmerkletree/src/lib.rs +++ b/incrementalmerkletree/src/lib.rs @@ -62,35 +62,72 @@ pub mod witness; #[cfg_attr(docsrs, doc(cfg(feature = "test-dependencies")))] pub mod testing; +/// An enumeration of the additional marking states that can be applied +/// to leaves with [`Retention::Checkpoint`] retention. +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum Marking { + /// A checkpoint with `Marked` marking will have `Marked` retention after `Checkpoint` + /// retention is removed. + Marked, + /// A checkpoint with `Reference` marking will have `Reference` retention after `Checkpoint` + /// retention is removed. + Reference, + /// A checkpoint with `None` marking will have `Ephemeral` retention after `Checkpoint` + /// retention is removed. + None, +} + /// A type for metadata that is used to determine when and how a leaf can be pruned from a tree. #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] pub enum Retention { + /// A leaf with `Ephemeral` retention will be pruned whenever its sibling is also a leaf with + /// `Ephemeral` retention. Ephemeral, - Checkpoint { id: C, is_marked: bool }, + /// A leaf with `Checkpoint` retention will be retained in the tree during pruning. If + /// `Checkpoint` retention is removed from the leaf, then the retention for the leaf will + /// become either `Ephemeral`, `Marked`, or `Reference` depending upon the value of the + /// `marking` field. + Checkpoint { id: C, marking: Marking }, + /// A leaf with `Marked` retention will be retained in the tree during pruning. `Marked` + /// retention may only be explicitly removed. Marked, + /// A leaf with `Reference` retention will be retained in the tree during pruning. `Reference` + /// retention is removed whenever the associated leaf is overwritten with a tree node having + /// `Ephemeral`, `Checkpoint`, or `Marked` retention. + Reference, } impl Retention { + /// Returns whether the associated node has [`Retention::Checkpoint`] retention. pub fn is_checkpoint(&self) -> bool { matches!(self, Retention::Checkpoint { .. }) } + /// Returns whether the associated node has [`Retention::Marked`] retention + /// or [`Retention::Checkpoint`] retention with [`Marking::Marked`] marking. pub fn is_marked(&self) -> bool { match self { - Retention::Ephemeral => false, - Retention::Checkpoint { is_marked, .. } => *is_marked, - Retention::Marked => true, + Retention::Marked + | Retention::Checkpoint { + marking: Marking::Marked, + .. + } => true, + _ => false, } } + /// Applies the provided function to the checkpoint identifier, if any, and returns a new + /// `Retention` value having the same structure with the resulting value used as the checkpoint + /// identifier. pub fn map<'a, D, F: Fn(&'a C) -> D>(&'a self, f: F) -> Retention { match self { Retention::Ephemeral => Retention::Ephemeral, - Retention::Checkpoint { id, is_marked } => Retention::Checkpoint { + Retention::Checkpoint { id, marking } => Retention::Checkpoint { id: f(id), - is_marked: *is_marked, + marking: *marking, }, Retention::Marked => Retention::Marked, + Retention::Reference => Retention::Reference, } } } diff --git a/incrementalmerkletree/src/testing.rs b/incrementalmerkletree/src/testing.rs index f1232e8..0a724d2 100644 --- a/incrementalmerkletree/src/testing.rs +++ b/incrementalmerkletree/src/testing.rs @@ -5,7 +5,7 @@ use core::marker::PhantomData; use proptest::prelude::*; use std::collections::BTreeSet; -use crate::{Hashable, Level, Position, Retention}; +use crate::{Hashable, Level, Marking, Position, Retention}; pub mod complete_tree; @@ -208,10 +208,22 @@ impl Operation { } } +/// Returns a strategy for creating a uniformly-distributed [`Marking`] +/// value. +pub fn arb_marking() -> impl Strategy { + prop_oneof![ + Just(Marking::Marked), + Just(Marking::Reference), + Just(Marking::None) + ] +} + +/// Returns a strategy for creating a uniformly-distributed [`Retention`] +/// value. pub fn arb_retention() -> impl Strategy> { prop_oneof![ Just(Retention::Ephemeral), - any::().prop_map(|is_marked| Retention::Checkpoint { id: (), is_marked }), + arb_marking().prop_map(|marking| Retention::Checkpoint { id: (), marking }), Just(Retention::Marked), ] } @@ -557,7 +569,7 @@ pub fn check_root_hashes, F: F 0, Retention::Checkpoint { id: 1, - is_marked: true, + marking: Marking::Marked, }, ); for _ in 0..3 { @@ -735,7 +747,7 @@ pub fn check_witnesses, F: Fn( 0, Checkpoint { id: 1, - is_marked: true, + marking: Marking::Marked, }, ); assert!(tree.rewind()); @@ -769,7 +781,7 @@ pub fn check_witnesses, F: Fn( 6, Checkpoint { id: 1, - is_marked: true, + marking: Marking::Marked, }, ); tree.assert_append(7, Ephemeral); @@ -838,7 +850,7 @@ pub fn check_witnesses, F: Fn( H::from_u64(3), Checkpoint { id: C::from_u64(1), - is_marked: true, + marking: Marking::Marked, }, ), Append(H::from_u64(4), Marked), @@ -847,21 +859,21 @@ pub fn check_witnesses, F: Fn( H::from_u64(5), Checkpoint { id: C::from_u64(3), - is_marked: false, + marking: Marking::None, }, ), Append( H::from_u64(6), Checkpoint { id: C::from_u64(4), - is_marked: false, + marking: Marking::None, }, ), Append( H::from_u64(7), Checkpoint { id: C::from_u64(5), - is_marked: false, + marking: Marking::None, }, ), Witness(3u64.into(), 5), @@ -890,7 +902,7 @@ pub fn check_witnesses, F: Fn( H::from_u64(0), Checkpoint { id: C::from_u64(1), - is_marked: true, + marking: Marking::Marked, }, ), Append(H::from_u64(0), Ephemeral), @@ -900,7 +912,7 @@ pub fn check_witnesses, F: Fn( H::from_u64(0), Checkpoint { id: C::from_u64(2), - is_marked: false, + marking: Marking::None, }, ), Append(H::from_u64(0), Ephemeral), @@ -939,7 +951,7 @@ pub fn check_witnesses, F: Fn( H::from_u64(0), Checkpoint { id: C::from_u64(4), - is_marked: false, + marking: Marking::None, }, ), Rewind, @@ -958,14 +970,14 @@ pub fn check_witnesses, F: Fn( H::from_u64(0), Checkpoint { id: C::from_u64(1), - is_marked: true, + marking: Marking::Marked, }, ), Append( H::from_u64(0), Checkpoint { id: C::from_u64(4), - is_marked: false, + marking: Marking::None, }, ), Witness(Position(2), 2), @@ -1034,7 +1046,7 @@ pub fn check_remove_mark, F: Fn(usize) -> "a", Retention::Checkpoint { id: C::from_u64(1), - is_marked: true, + marking: Marking::Marked, }, ), witness(1, 1), diff --git a/incrementalmerkletree/src/testing/complete_tree.rs b/incrementalmerkletree/src/testing/complete_tree.rs index e6984ae..79c189d 100644 --- a/incrementalmerkletree/src/testing/complete_tree.rs +++ b/incrementalmerkletree/src/testing/complete_tree.rs @@ -2,6 +2,7 @@ use std::cmp::min; use std::collections::{BTreeMap, BTreeSet}; +use crate::Marking; use crate::{testing::Tree, Hashable, Level, Position, Retention}; const MAX_COMPLETE_SIZE_ERROR: &str = "Positions of a `CompleteTree` must fit into the platform word size, because larger complete trees are not representable."; @@ -104,11 +105,11 @@ impl CompleteTr append(&mut self.leaves, value, DEPTH)?; self.mark(); } - Retention::Checkpoint { id, is_marked } => { + Retention::Checkpoint { id, marking } => { let latest_checkpoint = self.checkpoints.keys().rev().next(); if Some(&id) > latest_checkpoint { append(&mut self.leaves, value, DEPTH)?; - if is_marked { + if marking == Marking::Marked { self.mark(); } self.checkpoint(id, self.current_position()); @@ -119,7 +120,7 @@ impl CompleteTr }); } } - Retention::Ephemeral => { + Retention::Ephemeral | Retention::Reference => { append(&mut self.leaves, value, DEPTH)?; } } diff --git a/shardtree/src/legacy.rs b/shardtree/src/legacy.rs index 3957061..19e6818 100644 --- a/shardtree/src/legacy.rs +++ b/shardtree/src/legacy.rs @@ -1,6 +1,8 @@ use std::fmt; -use incrementalmerkletree::{witness::IncrementalWitness, Address, Hashable, Level, Retention}; +use incrementalmerkletree::{ + witness::IncrementalWitness, Address, Hashable, Level, Marking, Retention, +}; use crate::{ store::ShardStore, InsertionError, LocatedPrunableTree, LocatedTree, PrunableTree, @@ -198,7 +200,7 @@ impl LocatedPrunableTree { c.ommers_iter().cloned(), &Retention::Checkpoint { id: checkpoint_id, - is_marked: false, + marking: Marking::None, }, self.root_addr.level(), ) diff --git a/shardtree/src/lib.rs b/shardtree/src/lib.rs index 6f1e213..39be92e 100644 --- a/shardtree/src/lib.rs +++ b/shardtree/src/lib.rs @@ -25,6 +25,7 @@ use core::fmt::Debug; use either::Either; use incrementalmerkletree::frontier::Frontier; +use incrementalmerkletree::Marking; use std::collections::{BTreeMap, BTreeSet}; use std::sync::Arc; use tracing::{debug, trace}; @@ -281,18 +282,19 @@ impl< self.insert_frontier_nodes(nonempty_frontier, leaf_retention) } else { match leaf_retention { - Retention::Ephemeral => Ok(()), + Retention::Ephemeral | Retention::Reference => Ok(()), Retention::Checkpoint { id, - is_marked: false, + marking: Marking::None | Marking::Reference, } => self .store .add_checkpoint(id, Checkpoint::tree_empty()) .map_err(ShardTreeError::Storage), - Retention::Checkpoint { - is_marked: true, .. - } - | Retention::Marked => Err(ShardTreeError::Insert( + Retention::Marked + | Retention::Checkpoint { + marking: Marking::Marked, + .. + } => Err(ShardTreeError::Insert( InsertionError::MarkedRetentionInvalid, )), } @@ -344,7 +346,7 @@ impl< .map_err(ShardTreeError::Storage)?; } - if let Retention::Checkpoint { id, is_marked: _ } = leaf_retention { + if let Retention::Checkpoint { id, .. } = leaf_retention { trace!("Adding checkpoint {:?} at {:?}", id, leaf_position); self.store .add_checkpoint(id, Checkpoint::at_position(leaf_position)) @@ -1310,7 +1312,7 @@ mod tests { check_witness_consistency, check_witnesses, complete_tree::CompleteTree, CombinedTree, SipHashable, }, - Address, Hashable, Level, Position, Retention, + Address, Hashable, Level, Marking, Position, Retention, }; use crate::{ @@ -1411,7 +1413,7 @@ mod tests { 'd'.to_string(), Retention::Checkpoint { id: 11, - is_marked: false + marking: Marking::None }, ), Ok(()), diff --git a/shardtree/src/prunable.rs b/shardtree/src/prunable.rs index 1054c50..b50e84f 100644 --- a/shardtree/src/prunable.rs +++ b/shardtree/src/prunable.rs @@ -4,6 +4,7 @@ use std::collections::{BTreeMap, BTreeSet}; use std::sync::Arc; use bitflags::bitflags; +use incrementalmerkletree::Marking; use incrementalmerkletree::{ frontier::NonEmptyFrontier, Address, Hashable, Level, Position, Retention, }; @@ -30,6 +31,12 @@ bitflags! { /// A leaf with `MARKED` retention can be pruned only as a consequence of an explicit deletion /// action. const MARKED = 0b00000010; + + /// A leaf with `REFERENCE` retention will not be considered prunable until the `REFERENCE` + /// flag is removed from the leaf. The `REFERENCE` flag will be removed at any point that + /// the leaf is overwritten without `REFERENCE` retention, and `REFERENCE` retention cannot + /// be added to an existing leaf. + const REFERENCE = 0b00000100; } } @@ -47,14 +54,17 @@ impl<'a, C> From<&'a Retention> for RetentionFlags { fn from(retention: &'a Retention) -> Self { match retention { Retention::Ephemeral => RetentionFlags::EPHEMERAL, - Retention::Checkpoint { is_marked, .. } => { - if *is_marked { - RetentionFlags::CHECKPOINT | RetentionFlags::MARKED - } else { - RetentionFlags::CHECKPOINT - } - } + Retention::Checkpoint { + marking: Marking::Marked, + .. + } => RetentionFlags::CHECKPOINT | RetentionFlags::MARKED, + Retention::Checkpoint { + marking: Marking::Reference, + .. + } => RetentionFlags::CHECKPOINT | RetentionFlags::REFERENCE, + Retention::Checkpoint { .. } => RetentionFlags::CHECKPOINT, Retention::Marked => RetentionFlags::MARKED, + Retention::Reference => RetentionFlags::REFERENCE, } } } diff --git a/shardtree/src/store/caching.rs b/shardtree/src/store/caching.rs index ab898fe..104106d 100644 --- a/shardtree/src/store/caching.rs +++ b/shardtree/src/store/caching.rs @@ -219,7 +219,7 @@ mod tests { append_str, check_operations, unmark, witness, CombinedTree, Operation, TestHashable, Tree, }, - Hashable, Position, Retention, + Hashable, Marking, Position, Retention, }; use super::CachingShardStore; @@ -300,7 +300,7 @@ mod tests { String::from_u64(0), Checkpoint { id: 1, - is_marked: true, + marking: Marking::Marked, }, )); for _ in 0..3 { @@ -542,7 +542,7 @@ mod tests { String::from_u64(0), Checkpoint { id: 1, - is_marked: true, + marking: Marking::Marked, }, )); assert!(tree.rewind()); @@ -584,7 +584,7 @@ mod tests { String::from_u64(6), Checkpoint { id: 1, - is_marked: true, + marking: Marking::Marked, }, )); assert!(tree.append(String::from_u64(7), Ephemeral)); @@ -671,7 +671,7 @@ mod tests { String::from_u64(3), Checkpoint { id: 1, - is_marked: true, + marking: Marking::Marked, }, ), Append(String::from_u64(4), Marked), @@ -680,21 +680,21 @@ mod tests { String::from_u64(5), Checkpoint { id: 3, - is_marked: false, + marking: Marking::None, }, ), Append( String::from_u64(6), Checkpoint { id: 4, - is_marked: false, + marking: Marking::None, }, ), Append( String::from_u64(7), Checkpoint { id: 5, - is_marked: false, + marking: Marking::None, }, ), Witness(3u64.into(), 5), @@ -732,7 +732,7 @@ mod tests { String::from_u64(0), Checkpoint { id: 1, - is_marked: true, + marking: Marking::Marked, }, ), Append(String::from_u64(0), Ephemeral), @@ -742,7 +742,7 @@ mod tests { String::from_u64(0), Checkpoint { id: 2, - is_marked: false, + marking: Marking::None, }, ), Append(String::from_u64(0), Ephemeral), @@ -790,7 +790,7 @@ mod tests { String::from_u64(0), Checkpoint { id: 4, - is_marked: false, + marking: Marking::None, }, ), Rewind, @@ -818,14 +818,14 @@ mod tests { String::from_u64(0), Checkpoint { id: 1, - is_marked: true, + marking: Marking::Marked, }, ), Append( String::from_u64(0), Checkpoint { id: 4, - is_marked: false, + marking: Marking::None, }, ), Witness(Position::from(2), 2), @@ -962,7 +962,7 @@ mod tests { "a", Retention::Checkpoint { id: 1, - is_marked: true, + marking: Marking::Marked, }, ), witness(1, 1), diff --git a/shardtree/src/testing.rs b/shardtree/src/testing.rs index 4771a66..1cd3046 100644 --- a/shardtree/src/testing.rs +++ b/shardtree/src/testing.rs @@ -86,7 +86,14 @@ where leaf, match (is_checkpoint, is_marked) { (false, false) => Retention::Ephemeral, - (true, is_marked) => Retention::Checkpoint { id, is_marked }, + (true, is_marked) => Retention::Checkpoint { + id, + marking: if is_marked { + Marking::Marked + } else { + Marking::None + }, + }, (false, true) => Retention::Marked, }, ) @@ -121,10 +128,10 @@ where .map(|(id, (leaf, retention))| { let pos = Position::try_from(id).unwrap(); match retention { - Retention::Ephemeral => (), - Retention::Checkpoint { is_marked, .. } => { + Retention::Ephemeral | Retention::Reference => (), + Retention::Checkpoint { marking, .. } => { checkpoint_positions.push(pos); - if is_marked { + if marking == Marking::Marked { marked_positions.push(pos); } } @@ -234,7 +241,7 @@ pub fn check_shardtree_insertion< tree.batch_insert( Position::from(1), vec![ - ("b".to_string(), Retention::Checkpoint { id: 1, is_marked: false }), + ("b".to_string(), Retention::Checkpoint { id: 1, marking: Marking::None }), ("c".to_string(), Retention::Ephemeral), ("d".to_string(), Retention::Marked), ].into_iter() @@ -285,7 +292,7 @@ pub fn check_shardtree_insertion< Position::from(10), vec![ ("k".to_string(), Retention::Ephemeral), - ("l".to_string(), Retention::Checkpoint { id: 2, is_marked: false }), + ("l".to_string(), Retention::Checkpoint { id: 2, marking: Marking::None }), ("m".to_string(), Retention::Ephemeral), ].into_iter() ), @@ -386,7 +393,7 @@ pub fn check_witness_with_pruned_subtrees< 'c' => Retention::Marked, 'h' => Retention::Checkpoint { id: 3, - is_marked: false, + marking: Marking::None, }, _ => Retention::Ephemeral, }, From ffc087424d384071351e981845e1413054d2ecc9 Mon Sep 17 00:00:00 2001 From: Kris Nuttycombe Date: Tue, 21 May 2024 17:24:04 -0600 Subject: [PATCH 4/7] shardtree: Discard `REFERENCE` retention in leaf overwrites. Also, disallow value-conflicted overwrites at the leaf level. --- shardtree/CHANGELOG.md | 4 +++ shardtree/src/prunable.rs | 63 +++++++++++++++++++++++++++---------- shardtree/src/tree.rs | 65 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 115 insertions(+), 17 deletions(-) diff --git a/shardtree/CHANGELOG.md b/shardtree/CHANGELOG.md index ec396f4..7dfcda1 100644 --- a/shardtree/CHANGELOG.md +++ b/shardtree/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to Rust's notion of ## Unreleased +### Added +- `shardtree::tree::Tree::{is_leaf, map, try_map}` +- `shardtree::tree::LocatedTree::{map, try_map}` + ## [0.3.1] - 2024-04-03 ### Fixed diff --git a/shardtree/src/prunable.rs b/shardtree/src/prunable.rs index b50e84f..62690be 100644 --- a/shardtree/src/prunable.rs +++ b/shardtree/src/prunable.rs @@ -302,10 +302,11 @@ impl PrunableTree { match (left, right) { (Tree(Node::Nil), Tree(Node::Nil)) => Tree(Node::Nil), (Tree(Node::Leaf { value: lv }), Tree(Node::Leaf { value: rv })) - // we can prune right-hand leaves that are not marked; if a leaf - // is a checkpoint then that information will be propagated to - // the replacement leaf - if lv.1 == RetentionFlags::EPHEMERAL && (rv.1 & RetentionFlags::MARKED) == RetentionFlags::EPHEMERAL => + // we can prune right-hand leaves that are not marked or reference leaves; if a + // leaf is a checkpoint then that information will be propagated to the replacement + // leaf + if lv.1 == RetentionFlags::EPHEMERAL && + (rv.1 & (RetentionFlags::MARKED | RetentionFlags::REFERENCE)) == RetentionFlags::EPHEMERAL => { Tree( Node::Leaf { @@ -600,12 +601,45 @@ impl LocatedPrunableTree { match into { Tree(Node::Nil) => Ok(replacement(None, subtree)), - Tree(Node::Leaf { value: (value, _) }) => { + Tree(Node::Leaf { + value: (value, retention), + }) => { if root_addr == subtree.root_addr { + // The current leaf is at the location we wish to transplant the root + // of the subtree being inserted, so we either replace the leaf + // entirely with the subtree, or reannotate the root so as to avoid + // discarding the existing leaf value. + if is_complete { - // It is safe to replace the existing root unannotated, because we - // can always recompute the root from a complete subtree. - Ok((subtree.root, vec![])) + Ok(( + if subtree.root.is_leaf() { + // When replacing a leaf with a leaf, `REFERENCE` retention + // will be discarded unless both leaves have `REFERENCE` + // retention. + subtree + .root + .try_map::<(H, RetentionFlags), InsertionError, _>( + &|(v0, ret0)| { + if v0 == value { + let retention_result: RetentionFlags = + ((*retention | *ret0) + - RetentionFlags::REFERENCE) + | (RetentionFlags::REFERENCE + & *retention + & *ret0); + Ok((value.clone(), retention_result)) + } else { + Err(InsertionError::Conflict(root_addr)) + } + }, + )? + } else { + // It is safe to replace the existing root unannotated, because we + // can always recompute the root from a complete subtree. + subtree.root + }, + vec![], + )) } else if subtree.root.node_value().iter().all(|v| v == &value) { Ok(( // at this point we statically know the root to be a parent @@ -932,7 +966,7 @@ mod tests { use super::{LocatedPrunableTree, PrunableTree, RetentionFlags}; use crate::{ - error::QueryError, + error::{InsertionError, QueryError}, tree::{ tests::{leaf, nil, parent}, LocatedTree, @@ -1101,21 +1135,16 @@ mod tests { root: parent(leaf(("a".to_string(), RetentionFlags::MARKED)), nil()), }; + let conflict_addr = Address::from_parts(1.into(), 2); assert_eq!( t.insert_subtree( LocatedTree { - root_addr: Address::from_parts(1.into(), 2), + root_addr: conflict_addr, root: leaf(("b".to_string(), RetentionFlags::EPHEMERAL)), }, false, ), - Ok(( - LocatedTree { - root_addr: Address::from_parts(2.into(), 1), - root: parent(leaf(("b".to_string(), RetentionFlags::EPHEMERAL)), nil()), - }, - vec![], - )), + Err(InsertionError::Conflict(conflict_addr)), ); } diff --git a/shardtree/src/tree.rs b/shardtree/src/tree.rs index 54ad57e..179ef01 100644 --- a/shardtree/src/tree.rs +++ b/shardtree/src/tree.rs @@ -117,6 +117,11 @@ impl Tree { Tree(self.0.reannotate(ann)) } + /// Returns `true` this is a [`Node::Leaf`], `false` otherwise. + pub fn is_leaf(&self) -> bool { + matches!(&self.0, Node::Leaf { .. }) + } + /// Returns `true` if no [`Node::Nil`] nodes are present in the tree, `false` otherwise. pub fn is_complete(&self) -> bool { match &self.0 { @@ -152,6 +157,41 @@ impl Tree { Node::Nil => vec![root_addr], } } + + /// Applies the provided function to each leaf of the tree and returns + /// a new tree having the same structure as the original. + pub fn map B>(&self, f: &F) -> Tree + where + A: Clone, + { + match &self.0 { + Node::Parent { ann, left, right } => Tree(Node::Parent { + ann: ann.clone(), + left: Arc::new(left.map(f)), + right: Arc::new(right.map(f)), + }), + Node::Leaf { value } => Tree(Node::Leaf { value: f(value) }), + Node::Nil => Tree(Node::Nil), + } + } + + /// Applies the provided function to each leaf of the tree and returns + /// a new tree having the same structure as the original, or an error + /// if any transformation of the leaf fails. + pub fn try_map Result>(&self, f: &F) -> Result, E> + where + A: Clone, + { + Ok(Tree(match &self.0 { + Node::Parent { ann, left, right } => Node::Parent { + ann: ann.clone(), + left: Arc::new(left.try_map(f)?), + right: Arc::new(right.try_map(f)?), + }, + Node::Leaf { value } => Node::Leaf { value: f(value)? }, + Node::Nil => Node::Nil, + })) + } } /// A binary Merkle tree with its root at the given address. @@ -240,6 +280,31 @@ impl LocatedTree { None } } + + /// Applies the provided function to each leaf of the tree and returns + /// a new tree having the same structure as the original. + pub fn map B>(&self, f: &F) -> LocatedTree + where + A: Clone, + { + LocatedTree { + root_addr: self.root_addr, + root: self.root.map(f), + } + } + + /// Applies the provided function to each leaf of the tree and returns + /// a new tree having the same structure as the original, or an error + /// if any transformation of the leaf fails. + pub fn try_map Result>(&self, f: &F) -> Result, E> + where + A: Clone, + { + Ok(LocatedTree { + root_addr: self.root_addr, + root: self.root.try_map(f)?, + }) + } } impl LocatedTree { From 7e48886fd326481f80dbfb0860ad46db05f4bd05 Mon Sep 17 00:00:00 2001 From: Kris Nuttycombe Date: Wed, 22 May 2024 13:48:35 -0600 Subject: [PATCH 5/7] shardtree: Add `Pruned` node type In circumstances where insertion into a subtree results in pruning, and then a subsequent insertion within the same range contains leaves that must be retained, it is necessary to be able to distinguish the maximum position among notes that have been observed but later pruned. This fixes a bug wherein an insertion into an already-pruned tree could cause the maximum position reported for the subtree to regress. --- incrementalmerkletree/src/lib.rs | 14 +-- shardtree/CHANGELOG.md | 16 +++- shardtree/src/lib.rs | 148 ++++++++++++++++++++++++++----- shardtree/src/prunable.rs | 112 +++++++++++++---------- shardtree/src/tree.rs | 45 +++++----- 5 files changed, 236 insertions(+), 99 deletions(-) diff --git a/incrementalmerkletree/src/lib.rs b/incrementalmerkletree/src/lib.rs index a300c67..5f00ee0 100644 --- a/incrementalmerkletree/src/lib.rs +++ b/incrementalmerkletree/src/lib.rs @@ -106,14 +106,14 @@ impl Retention { /// Returns whether the associated node has [`Retention::Marked`] retention /// or [`Retention::Checkpoint`] retention with [`Marking::Marked`] marking. pub fn is_marked(&self) -> bool { - match self { + matches!( + self, Retention::Marked - | Retention::Checkpoint { - marking: Marking::Marked, - .. - } => true, - _ => false, - } + | Retention::Checkpoint { + marking: Marking::Marked, + .. + } + ) } /// Applies the provided function to the checkpoint identifier, if any, and returns a new diff --git a/shardtree/CHANGELOG.md b/shardtree/CHANGELOG.md index 7dfcda1..5175e78 100644 --- a/shardtree/CHANGELOG.md +++ b/shardtree/CHANGELOG.md @@ -8,8 +8,22 @@ and this project adheres to Rust's notion of ## Unreleased ### Added -- `shardtree::tree::Tree::{is_leaf, map, try_map}` +- `shardtree::tree::Tree::{is_leaf, map, try_map, empty_pruned}` - `shardtree::tree::LocatedTree::{map, try_map}` +- `shardtree::prunable::PrunableTree::{has_computable_root}` + +### Changed +- `shardtree::tree::Node` has additional variant `Node::Pruned`. + +### Removed +- `shardtree::tree::Tree::is_complete` as it is no longer well-defined in the + presence of `Pruned` nodes. Use `PrunableTree::has_computable_root` to + determine whether it is possible to compute the root of a tree. + +### Fixed +- Fixes an error that could occur if an inserted `Frontier` node was + interpreted as a node that had actually had its value observed as though it + had been inserted using the ordinary tree insertion methods. ## [0.3.1] - 2024-04-03 diff --git a/shardtree/src/lib.rs b/shardtree/src/lib.rs index 39be92e..7fe6cf9 100644 --- a/shardtree/src/lib.rs +++ b/shardtree/src/lib.rs @@ -234,16 +234,17 @@ impl< let (append_result, position, checkpoint_id) = if let Some(subtree) = self.store.last_shard().map_err(ShardTreeError::Storage)? { - if subtree.root.is_complete() { - let addr = subtree.root_addr; - - if addr.index() < Self::max_subtree_index() { - LocatedTree::empty(addr.next_at_level()).append(value, retention)? - } else { - return Err(InsertionError::TreeFull.into()); + match subtree.max_position() { + // If the subtree is full, then construct a successor tree. + Some(pos) if pos == subtree.root_addr.max_position() => { + let addr = subtree.root_addr; + if subtree.root_addr.index() < Self::max_subtree_index() { + LocatedTree::empty(addr.next_at_level()).append(value, retention)? + } else { + return Err(InsertionError::TreeFull.into()); + } } - } else { - subtree.append(value, retention)? + _ => subtree.append(value, retention)?, } } else { let root_addr = Address::from_parts(Self::subtree_level(), 0); @@ -412,8 +413,8 @@ impl< root_addr: Address, root: &PrunableTree, ) -> Option<(PrunableTree, Position)> { - match root { - Tree(Node::Parent { ann, left, right }) => { + match &root.0 { + Node::Parent { ann, left, right } => { let (l_addr, r_addr) = root_addr.children().unwrap(); go(r_addr, right).map_or_else( || { @@ -442,13 +443,13 @@ impl< }, ) } - Tree(Node::Leaf { value: (h, r) }) => Some(( + Node::Leaf { value: (h, r) } => Some(( Tree(Node::Leaf { value: (h.clone(), *r | RetentionFlags::CHECKPOINT), }), root_addr.max_position(), )), - Tree(Node::Nil) => None, + Node::Nil | Node::Pruned => None, } } @@ -576,12 +577,19 @@ impl< // Prune each affected subtree for (subtree_addr, positions) in clear_positions.into_iter() { - let cleared = self + let to_clear = self .store .get_shard(subtree_addr) - .map_err(ShardTreeError::Storage)? - .map(|subtree| subtree.clear_flags(positions)); - if let Some(cleared) = cleared { + .map_err(ShardTreeError::Storage)?; + + if let Some(to_clear) = to_clear { + let pre_clearing_max_position = to_clear.max_position(); + let cleared = to_clear.clear_flags(positions); + + // Clearing flags should not modify the max position of leaves represented + // in the shard. + assert!(cleared.max_position() == pre_clearing_max_position); + self.store .put_shard(cleared) .map_err(ShardTreeError::Storage)?; @@ -757,8 +765,8 @@ impl< // roots. truncate_at: Position, ) -> Result<(H, Option>), ShardTreeError> { - match &cap.root { - Tree(Node::Parent { ann, left, right }) => { + match &cap.root.0 { + Node::Parent { ann, left, right } => { match ann { Some(cached_root) if target_addr.contains(&cap.root_addr) => { Ok((cached_root.as_ref().clone(), None)) @@ -832,7 +840,7 @@ impl< } } } - Tree(Node::Leaf { value }) => { + Node::Leaf { value } => { if truncate_at >= cap.root_addr.position_range_end() && target_addr.contains(&cap.root_addr) { @@ -857,7 +865,7 @@ impl< )) } } - Tree(Node::Nil) => { + Node::Nil | Node::Pruned => { if cap.root_addr == target_addr || cap.root_addr.level() == ShardTree::::subtree_level() { @@ -867,7 +875,7 @@ impl< Ok(( root.clone(), if truncate_at >= cap.root_addr.position_range_end() { - // return the compute root as a new leaf to be cached if it contains no + // return the computed root as a new leaf to be cached if it contains no // empty hashes due to truncation Some(Tree::leaf((root, RetentionFlags::EPHEMERAL))) } else { @@ -1301,21 +1309,24 @@ impl< #[cfg(test)] mod tests { + use std::convert::Infallible; + use assert_matches::assert_matches; use proptest::prelude::*; use incrementalmerkletree::{ - frontier::NonEmptyFrontier, + frontier::{Frontier, NonEmptyFrontier}, testing::{ arb_operation, check_append, check_checkpoint_rewind, check_operations, check_remove_mark, check_rewind_remove_mark, check_root_hashes, check_witness_consistency, check_witnesses, complete_tree::CompleteTree, CombinedTree, SipHashable, }, - Address, Hashable, Level, Marking, Position, Retention, + Address, Hashable, Level, Marking, MerklePath, Position, Retention, }; use crate::{ + error::{QueryError, ShardTreeError}, store::memory::MemoryShardStore, testing::{ arb_char_str, arb_shardtree, check_shard_sizes, check_shardtree_insertion, @@ -1420,6 +1431,95 @@ mod tests { ); } + #[test] + fn avoid_pruning_reference() { + fn test_with_marking( + frontier_marking: Marking, + ) -> Result, ShardTreeError> { + let mut tree = ShardTree::, 6, 3>::new( + MemoryShardStore::empty(), + 5, + ); + + let frontier_end = Position::from((1 << 3) - 3); + let mut f0 = Frontier::::empty(); + for c in 'a'..='f' { + f0.append(c.to_string()); + } + + let frontier = Frontier::from_parts( + frontier_end, + "f".to_owned(), + vec!["e".to_owned(), "abcd".to_owned()], + ) + .unwrap(); + + // Insert a frontier two leaves from the end of the first shard, checkpointed, + // with the specified marking. + tree.insert_frontier( + frontier, + Retention::Checkpoint { + id: 1, + marking: frontier_marking, + }, + )?; + + // Insert a few leaves beginning at the subsequent position, so as to cross the shard + // boundary. + tree.batch_insert( + frontier_end + 1, + ('g'..='j') + .into_iter() + .map(|c| (c.to_string(), Retention::Ephemeral)), + )?; + + // Trigger pruning by adding 5 more checkpoints + for i in 2..7 { + tree.checkpoint(i).unwrap(); + } + + // Insert nodes that require the pruned nodes for witnessing + tree.batch_insert( + frontier_end - 1, + ('e'..='f') + .into_iter() + .map(|c| (c.to_string(), Retention::Marked)), + )?; + + // Compute the witness + tree.witness_at_checkpoint_id(frontier_end, &6) + } + + // If we insert the frontier with Marking::None, the frontier nodes are treated + // as ephemeral nodes and are pruned, leaving an incomplete tree. + assert_matches!( + test_with_marking(Marking::None), + Err(ShardTreeError::Query(QueryError::TreeIncomplete(_))) + ); + + // If we insert the frontier with Marking::Reference, the frontier nodes will + // not be pruned on completion of the subtree, and thus we'll be able to compute + // the witness. + let expected_witness = MerklePath::from_parts( + [ + "e", + "gh", + "abcd", + "ij______", + "________________", + "________________________________", + ] + .iter() + .map(|s| s.to_string()) + .collect(), + Position::from(5), + ) + .unwrap(); + + let witness = test_with_marking(Marking::Reference).unwrap(); + assert_eq!(witness, expected_witness); + } + // Combined tree tests #[allow(clippy::type_complexity)] fn new_combined_tree( diff --git a/shardtree/src/prunable.rs b/shardtree/src/prunable.rs index 62690be..c7a5acb 100644 --- a/shardtree/src/prunable.rs +++ b/shardtree/src/prunable.rs @@ -100,12 +100,25 @@ impl PrunableTree { .map_or(false, |(_, retention)| retention.is_marked()) } + /// Returns `true` if it is possible to compute or retrieve the Merkle root of this + /// tree. + pub fn has_computable_root(&self) -> bool { + match &self.0 { + Node::Parent { ann, left, right } => { + ann.is_some() + || (left.as_ref().has_computable_root() && right.as_ref().has_computable_root()) + } + Node::Leaf { .. } => true, + Node::Nil | Node::Pruned => false, + } + } + /// Determines whether a tree has any [`Retention::Marked`] nodes. pub fn contains_marked(&self) -> bool { match &self.0 { Node::Parent { left, right, .. } => left.contains_marked() || right.contains_marked(), Node::Leaf { value: (_, r) } => r.is_marked(), - Node::Nil => false, + Node::Nil | Node::Pruned => false, } } @@ -122,8 +135,8 @@ impl PrunableTree { // so no need to inspect the tree Ok(H::empty_root(root_addr.level())) } else { - match self { - Tree(Node::Parent { ann, left, right }) => ann + match &self.0 { + Node::Parent { ann, left, right } => ann .as_ref() .filter(|_| truncate_at >= root_addr.position_range_end()) .map_or_else( @@ -145,7 +158,7 @@ impl PrunableTree { Ok(rc.as_ref().clone()) }, ), - Tree(Node::Leaf { value }) => { + Node::Leaf { value } => { if truncate_at >= root_addr.position_range_end() { // no truncation of this leaf is necessary, just use it Ok(value.0.clone()) @@ -157,7 +170,7 @@ impl PrunableTree { Err(vec![root_addr]) } } - Tree(Node::Nil) => Err(vec![root_addr]), + Node::Nil | Node::Pruned => Err(vec![root_addr]), } } } @@ -192,7 +205,7 @@ impl PrunableTree { } result } - Node::Nil => BTreeSet::new(), + Node::Nil | Node::Pruned => BTreeSet::new(), } } @@ -229,6 +242,7 @@ impl PrunableTree { let no_default_fill = addr.position_range_end(); match (t0, t1) { (Tree(Node::Nil), other) | (other, Tree(Node::Nil)) => Ok(other), + (Tree(Node::Pruned), other) | (other, Tree(Node::Pruned)) => Ok(other), (Tree(Node::Leaf { value: vl }), Tree(Node::Leaf { value: vr })) => { if vl.0 == vr.0 { // Merge the flags together. @@ -301,6 +315,8 @@ impl PrunableTree { pub(crate) fn unite(level: Level, ann: Option>, left: Self, right: Self) -> Self { match (left, right) { (Tree(Node::Nil), Tree(Node::Nil)) => Tree(Node::Nil), + (Tree(Node::Nil | Node::Pruned), Tree(Node::Pruned)) => Tree(Node::Pruned), + (Tree(Node::Pruned), Tree(Node::Nil)) => Tree(Node::Pruned), (Tree(Node::Leaf { value: lv }), Tree(Node::Leaf { value: rv })) // we can prune right-hand leaves that are not marked or reference leaves; if a // leaf is a checkpoint then that information will be propagated to the replacement @@ -518,7 +534,7 @@ impl LocatedPrunableTree { None } } - Node::Nil => None, + Node::Nil | Node::Pruned => None, } } @@ -555,7 +571,6 @@ impl LocatedPrunableTree { root_addr: Address, into: &PrunableTree, subtree: LocatedPrunableTree, - is_complete: bool, contains_marked: bool, ) -> Result<(PrunableTree, Vec), InsertionError> { trace!( @@ -571,7 +586,9 @@ impl LocatedPrunableTree { // In the case that we are replacing a node entirely, we need to extend the // subtree up to the level of the node being replaced, adding Nil siblings // and recording the presence of those incomplete nodes when necessary - let replacement = |ann: Option>, mut node: LocatedPrunableTree| { + let replacement = |ann: Option>, + mut node: LocatedPrunableTree, + pruned: bool| { // construct the replacement node bottom-up let mut incomplete = vec![]; while node.root_addr.level() < root_addr.level() { @@ -579,19 +596,21 @@ impl LocatedPrunableTree { address: node.root_addr.sibling(), required_for_witness: contains_marked, }); + let empty = Arc::new(Tree(if pruned { Node::Pruned } else { Node::Nil })); + let full = Arc::new(node.root); node = LocatedTree { root_addr: node.root_addr.parent(), root: if node.root_addr.is_right_child() { Tree(Node::Parent { ann: None, - left: Arc::new(Tree(Node::Nil)), - right: Arc::new(node.root), + left: empty, + right: full, }) } else { Tree(Node::Parent { ann: None, - left: Arc::new(node.root), - right: Arc::new(Tree(Node::Nil)), + left: full, + right: empty, }) }, }; @@ -600,7 +619,8 @@ impl LocatedPrunableTree { }; match into { - Tree(Node::Nil) => Ok(replacement(None, subtree)), + Tree(Node::Nil) => Ok(replacement(None, subtree, false)), + Tree(Node::Pruned) => Ok(replacement(None, subtree, true)), Tree(Node::Leaf { value: (value, retention), }) => { @@ -610,7 +630,7 @@ impl LocatedPrunableTree { // entirely with the subtree, or reannotate the root so as to avoid // discarding the existing leaf value. - if is_complete { + if subtree.root.has_computable_root() { Ok(( if subtree.root.is_leaf() { // When replacing a leaf with a leaf, `REFERENCE` retention @@ -635,7 +655,7 @@ impl LocatedPrunableTree { )? } else { // It is safe to replace the existing root unannotated, because we - // can always recompute the root from a complete subtree. + // can always recompute the root from the subtree. subtree.root }, vec![], @@ -655,7 +675,22 @@ impl LocatedPrunableTree { Err(InsertionError::Conflict(root_addr)) } } else { - Ok(replacement(Some(Arc::new(value.clone())), subtree)) + Ok(replacement( + Some(Arc::new(value.clone())), + subtree, + // The subtree being inserted may have its root at some level lower + // than the next level down. The siblings of nodes that will be + // generated while descending to the subtree root level will be + // `Nil` nodes (indicating that the value of these nodes have never + // been observed) if the leaf being replaced has `REFERENCE` + // retention. Any other leaf without `REFERENCE` retention will + // have been produced by pruning of previously observed node + // values, so in those cases we use `Pruned` nodes for the absent + // siblings. This allows us to retain the distinction between what + // parts of the tree have been directly observed and what parts + // have not. + !retention.contains(RetentionFlags::REFERENCE), + )) } } parent if root_addr == subtree.root_addr => { @@ -673,7 +708,7 @@ impl LocatedPrunableTree { let (l_addr, r_addr) = root_addr.children().unwrap(); if l_addr.contains(&subtree.root_addr) { let (new_left, incomplete) = - go(l_addr, left.as_ref(), subtree, is_complete, contains_marked)?; + go(l_addr, left.as_ref(), subtree, contains_marked)?; Ok(( Tree::unite( root_addr.level() - 1, @@ -684,13 +719,8 @@ impl LocatedPrunableTree { incomplete, )) } else { - let (new_right, incomplete) = go( - r_addr, - right.as_ref(), - subtree, - is_complete, - contains_marked, - )?; + let (new_right, incomplete) = + go(r_addr, right.as_ref(), subtree, contains_marked)?; Ok(( Tree::unite( root_addr.level() - 1, @@ -710,22 +740,16 @@ impl LocatedPrunableTree { trace!( max_position = ?max_position, tree = ?self, - subtree_complete = subtree.root.is_complete(), to_insert = ?subtree, "Current shard" ); let LocatedTree { root_addr, root } = self; if root_addr.contains(&subtree.root_addr) { - let complete = subtree.root.is_complete(); - go(*root_addr, root, subtree, complete, contains_marked).map(|(root, incomplete)| { + go(*root_addr, root, subtree, contains_marked).map(|(root, incomplete)| { let new_tree = LocatedTree { root_addr: *root_addr, root, }; - trace!( - max_position = ?new_tree.max_position(), - "Replacement shard" - ); assert!(new_tree.max_position() >= max_position); (new_tree, incomplete) }) @@ -786,9 +810,7 @@ impl LocatedPrunableTree { split_at: Level, ) -> (Self, Option) { let mut addr = Address::from(position); - let mut subtree = Tree(Node::Leaf { - value: (leaf, leaf_retention.into()), - }); + let mut subtree = Tree::leaf((leaf, leaf_retention.into())); while addr.level() < split_at { if addr.is_left_child() { @@ -890,8 +912,8 @@ impl LocatedPrunableTree { // nothing to do, so we just return the root root.clone() } else { - match root { - Tree(Node::Parent { ann, left, right }) => { + match &root.0 { + Node::Parent { ann, left, right } => { let (l_addr, r_addr) = root_addr.children().unwrap(); let p = to_clear.partition_point(|(p, _)| p < &l_addr.position_range_end()); @@ -908,7 +930,7 @@ impl LocatedPrunableTree { go(&to_clear[p..], r_addr, right), ) } - Tree(Node::Leaf { value: (h, r) }) => { + Node::Leaf { value: (h, r) } => { trace!("In {:?}, clearing {:?}", root_addr, to_clear); // When we reach a leaf, we should be down to just a single position // which should correspond to the last level-0 child of the address's @@ -916,18 +938,16 @@ impl LocatedPrunableTree { // a partially-pruned branch, and if it's a marked node then it will // be a level-0 leaf. match to_clear { - [(pos, flags)] => { - assert_eq!(*pos, root_addr.max_position()); - Tree(Node::Leaf { - value: (h.clone(), *r & !*flags), - }) - } + [(_, flags)] => Tree(Node::Leaf { + value: (h.clone(), *r & !*flags), + }), _ => { panic!("Tree state inconsistent with checkpoints."); } } } - Tree(Node::Nil) => Tree(Node::Nil), + Node::Nil => Tree(Node::Nil), + Node::Pruned => Tree(Node::Pruned), } } } @@ -1129,7 +1149,7 @@ mod tests { } #[test] - fn located_insert_subtree_leaf_overwrites() { + fn located_insert_subtree_prevents_leaf_overwrite_conflict() { let t: LocatedPrunableTree = LocatedTree { root_addr: Address::from_parts(2.into(), 1), root: parent(leaf(("a".to_string(), RetentionFlags::MARKED)), nil()), diff --git a/shardtree/src/tree.rs b/shardtree/src/tree.rs index 179ef01..d0f8913 100644 --- a/shardtree/src/tree.rs +++ b/shardtree/src/tree.rs @@ -19,6 +19,9 @@ pub enum Node { Leaf { value: V }, /// The empty tree; a subtree or leaf for which no information is available. Nil, + /// An empty node in the tree created as a consequence of partial reinserion of data into a + /// subtree after the subtree was previously pruned. + Pruned, } impl Node { @@ -35,7 +38,8 @@ impl Node { match self { Node::Parent { .. } => None, Node::Leaf { value } => Some(value), - Node::Nil { .. } => None, + Node::Nil => None, + Node::Pruned => None, } } @@ -45,6 +49,7 @@ impl Node { Node::Parent { ann, .. } => Some(ann), Node::Leaf { .. } => None, Node::Nil => None, + Node::Pruned => None, } } @@ -71,6 +76,7 @@ impl<'a, C: Clone, A: Clone, V: Clone> Node { value: (*value).clone(), }, Node::Nil => Node::Nil, + Node::Pruned => Node::Pruned, } } } @@ -92,6 +98,11 @@ impl Tree { Tree(Node::Nil) } + /// Constructs the empty tree consisting of a single pruned node. + pub fn empty_pruned() -> Self { + Tree(Node::Pruned) + } + /// Constructs a tree containing a single leaf. pub fn leaf(value: V) -> Self { Tree(Node::Leaf { value }) @@ -122,18 +133,8 @@ impl Tree { matches!(&self.0, Node::Leaf { .. }) } - /// Returns `true` if no [`Node::Nil`] nodes are present in the tree, `false` otherwise. - pub fn is_complete(&self) -> bool { - match &self.0 { - Node::Parent { left, right, .. } => { - left.as_ref().is_complete() && right.as_ref().is_complete() - } - Node::Leaf { .. } => true, - Node::Nil { .. } => false, - } - } - - /// Returns a vector of the addresses of [`Node::Nil`] subtree roots within this tree. + /// Returns a vector of the addresses of [`Node::Nil`] and [`Node::Pruned`] subtree roots + /// within this tree. /// /// The given address must correspond to the root of this tree, or this method will /// yield incorrect results or may panic. @@ -154,7 +155,7 @@ impl Tree { left_incomplete } Node::Leaf { .. } => vec![], - Node::Nil => vec![root_addr], + Node::Nil | Node::Pruned => vec![root_addr], } } @@ -164,15 +165,16 @@ impl Tree { where A: Clone, { - match &self.0 { - Node::Parent { ann, left, right } => Tree(Node::Parent { + Tree(match &self.0 { + Node::Parent { ann, left, right } => Node::Parent { ann: ann.clone(), left: Arc::new(left.map(f)), right: Arc::new(right.map(f)), - }), - Node::Leaf { value } => Tree(Node::Leaf { value: f(value) }), - Node::Nil => Tree(Node::Nil), - } + }, + Node::Leaf { value } => Node::Leaf { value: f(value) }, + Node::Nil => Node::Nil, + Node::Pruned => Node::Pruned, + }) } /// Applies the provided function to each leaf of the tree and returns @@ -190,6 +192,7 @@ impl Tree { }, Node::Leaf { value } => Node::Leaf { value: f(value)? }, Node::Nil => Node::Nil, + Node::Pruned => Node::Pruned, })) } } @@ -248,7 +251,7 @@ impl LocatedTree { pub(crate) fn max_position_internal(addr: Address, root: &Tree) -> Option { match &root.0 { Node::Nil => None, - Node::Leaf { .. } => Some(addr.position_range_end() - 1), + Node::Leaf { .. } | Node::Pruned => Some(addr.position_range_end() - 1), Node::Parent { left, right, .. } => { let (l_addr, r_addr) = addr.children().unwrap(); Self::max_position_internal(r_addr, right.as_ref()) From d40e178f8d0469b0cf0fc4d027a23c5e7581961f Mon Sep 17 00:00:00 2001 From: Kris Nuttycombe Date: Tue, 28 May 2024 19:11:09 -0600 Subject: [PATCH 6/7] shardtree: Do not unify pruned nodes with empty nodes. --- shardtree/src/prunable.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/shardtree/src/prunable.rs b/shardtree/src/prunable.rs index c7a5acb..5085e31 100644 --- a/shardtree/src/prunable.rs +++ b/shardtree/src/prunable.rs @@ -315,8 +315,6 @@ impl PrunableTree { pub(crate) fn unite(level: Level, ann: Option>, left: Self, right: Self) -> Self { match (left, right) { (Tree(Node::Nil), Tree(Node::Nil)) => Tree(Node::Nil), - (Tree(Node::Nil | Node::Pruned), Tree(Node::Pruned)) => Tree(Node::Pruned), - (Tree(Node::Pruned), Tree(Node::Nil)) => Tree(Node::Pruned), (Tree(Node::Leaf { value: lv }), Tree(Node::Leaf { value: rv })) // we can prune right-hand leaves that are not marked or reference leaves; if a // leaf is a checkpoint then that information will be propagated to the replacement From 93512976ddfeb1e1513191c1239f9c386593a02d Mon Sep 17 00:00:00 2001 From: Kris Nuttycombe Date: Thu, 30 May 2024 08:04:23 -0600 Subject: [PATCH 7/7] incrementalmerkletree: Documentation fix for `Checkpoint` retention. Co-authored-by: str4d --- incrementalmerkletree/src/lib.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/incrementalmerkletree/src/lib.rs b/incrementalmerkletree/src/lib.rs index 5f00ee0..ea4d009 100644 --- a/incrementalmerkletree/src/lib.rs +++ b/incrementalmerkletree/src/lib.rs @@ -83,7 +83,8 @@ pub enum Retention { /// A leaf with `Ephemeral` retention will be pruned whenever its sibling is also a leaf with /// `Ephemeral` retention. Ephemeral, - /// A leaf with `Checkpoint` retention will be retained in the tree during pruning. If + /// A leaf with `Checkpoint` retention will have its position retained in the tree + /// during pruning, but its value may be pruned (by merging with its sibling). If /// `Checkpoint` retention is removed from the leaf, then the retention for the leaf will /// become either `Ephemeral`, `Marked`, or `Reference` depending upon the value of the /// `marking` field.