diff --git a/incrementalmerkletree/src/lib.rs b/incrementalmerkletree/src/lib.rs index a300c67..5f00ee0 100644 --- a/incrementalmerkletree/src/lib.rs +++ b/incrementalmerkletree/src/lib.rs @@ -106,14 +106,14 @@ impl Retention { /// Returns whether the associated node has [`Retention::Marked`] retention /// or [`Retention::Checkpoint`] retention with [`Marking::Marked`] marking. pub fn is_marked(&self) -> bool { - match self { + matches!( + self, Retention::Marked - | Retention::Checkpoint { - marking: Marking::Marked, - .. - } => true, - _ => false, - } + | Retention::Checkpoint { + marking: Marking::Marked, + .. + } + ) } /// Applies the provided function to the checkpoint identifier, if any, and returns a new diff --git a/shardtree/CHANGELOG.md b/shardtree/CHANGELOG.md index 7dfcda1..5175e78 100644 --- a/shardtree/CHANGELOG.md +++ b/shardtree/CHANGELOG.md @@ -8,8 +8,22 @@ and this project adheres to Rust's notion of ## Unreleased ### Added -- `shardtree::tree::Tree::{is_leaf, map, try_map}` +- `shardtree::tree::Tree::{is_leaf, map, try_map, empty_pruned}` - `shardtree::tree::LocatedTree::{map, try_map}` +- `shardtree::prunable::PrunableTree::{has_computable_root}` + +### Changed +- `shardtree::tree::Node` has additional variant `Node::Pruned`. + +### Removed +- `shardtree::tree::Tree::is_complete` as it is no longer well-defined in the + presence of `Pruned` nodes. Use `PrunableTree::has_computable_root` to + determine whether it is possible to compute the root of a tree. + +### Fixed +- Fixes an error that could occur if an inserted `Frontier` node was + interpreted as a node that had actually had its value observed as though it + had been inserted using the ordinary tree insertion methods. ## [0.3.1] - 2024-04-03 diff --git a/shardtree/src/lib.rs b/shardtree/src/lib.rs index 39be92e..7fe6cf9 100644 --- a/shardtree/src/lib.rs +++ b/shardtree/src/lib.rs @@ -234,16 +234,17 @@ impl< let (append_result, position, checkpoint_id) = if let Some(subtree) = self.store.last_shard().map_err(ShardTreeError::Storage)? { - if subtree.root.is_complete() { - let addr = subtree.root_addr; - - if addr.index() < Self::max_subtree_index() { - LocatedTree::empty(addr.next_at_level()).append(value, retention)? - } else { - return Err(InsertionError::TreeFull.into()); + match subtree.max_position() { + // If the subtree is full, then construct a successor tree. + Some(pos) if pos == subtree.root_addr.max_position() => { + let addr = subtree.root_addr; + if subtree.root_addr.index() < Self::max_subtree_index() { + LocatedTree::empty(addr.next_at_level()).append(value, retention)? + } else { + return Err(InsertionError::TreeFull.into()); + } } - } else { - subtree.append(value, retention)? + _ => subtree.append(value, retention)?, } } else { let root_addr = Address::from_parts(Self::subtree_level(), 0); @@ -412,8 +413,8 @@ impl< root_addr: Address, root: &PrunableTree, ) -> Option<(PrunableTree, Position)> { - match root { - Tree(Node::Parent { ann, left, right }) => { + match &root.0 { + Node::Parent { ann, left, right } => { let (l_addr, r_addr) = root_addr.children().unwrap(); go(r_addr, right).map_or_else( || { @@ -442,13 +443,13 @@ impl< }, ) } - Tree(Node::Leaf { value: (h, r) }) => Some(( + Node::Leaf { value: (h, r) } => Some(( Tree(Node::Leaf { value: (h.clone(), *r | RetentionFlags::CHECKPOINT), }), root_addr.max_position(), )), - Tree(Node::Nil) => None, + Node::Nil | Node::Pruned => None, } } @@ -576,12 +577,19 @@ impl< // Prune each affected subtree for (subtree_addr, positions) in clear_positions.into_iter() { - let cleared = self + let to_clear = self .store .get_shard(subtree_addr) - .map_err(ShardTreeError::Storage)? - .map(|subtree| subtree.clear_flags(positions)); - if let Some(cleared) = cleared { + .map_err(ShardTreeError::Storage)?; + + if let Some(to_clear) = to_clear { + let pre_clearing_max_position = to_clear.max_position(); + let cleared = to_clear.clear_flags(positions); + + // Clearing flags should not modify the max position of leaves represented + // in the shard. + assert!(cleared.max_position() == pre_clearing_max_position); + self.store .put_shard(cleared) .map_err(ShardTreeError::Storage)?; @@ -757,8 +765,8 @@ impl< // roots. truncate_at: Position, ) -> Result<(H, Option>), ShardTreeError> { - match &cap.root { - Tree(Node::Parent { ann, left, right }) => { + match &cap.root.0 { + Node::Parent { ann, left, right } => { match ann { Some(cached_root) if target_addr.contains(&cap.root_addr) => { Ok((cached_root.as_ref().clone(), None)) @@ -832,7 +840,7 @@ impl< } } } - Tree(Node::Leaf { value }) => { + Node::Leaf { value } => { if truncate_at >= cap.root_addr.position_range_end() && target_addr.contains(&cap.root_addr) { @@ -857,7 +865,7 @@ impl< )) } } - Tree(Node::Nil) => { + Node::Nil | Node::Pruned => { if cap.root_addr == target_addr || cap.root_addr.level() == ShardTree::::subtree_level() { @@ -867,7 +875,7 @@ impl< Ok(( root.clone(), if truncate_at >= cap.root_addr.position_range_end() { - // return the compute root as a new leaf to be cached if it contains no + // return the computed root as a new leaf to be cached if it contains no // empty hashes due to truncation Some(Tree::leaf((root, RetentionFlags::EPHEMERAL))) } else { @@ -1301,21 +1309,24 @@ impl< #[cfg(test)] mod tests { + use std::convert::Infallible; + use assert_matches::assert_matches; use proptest::prelude::*; use incrementalmerkletree::{ - frontier::NonEmptyFrontier, + frontier::{Frontier, NonEmptyFrontier}, testing::{ arb_operation, check_append, check_checkpoint_rewind, check_operations, check_remove_mark, check_rewind_remove_mark, check_root_hashes, check_witness_consistency, check_witnesses, complete_tree::CompleteTree, CombinedTree, SipHashable, }, - Address, Hashable, Level, Marking, Position, Retention, + Address, Hashable, Level, Marking, MerklePath, Position, Retention, }; use crate::{ + error::{QueryError, ShardTreeError}, store::memory::MemoryShardStore, testing::{ arb_char_str, arb_shardtree, check_shard_sizes, check_shardtree_insertion, @@ -1420,6 +1431,95 @@ mod tests { ); } + #[test] + fn avoid_pruning_reference() { + fn test_with_marking( + frontier_marking: Marking, + ) -> Result, ShardTreeError> { + let mut tree = ShardTree::, 6, 3>::new( + MemoryShardStore::empty(), + 5, + ); + + let frontier_end = Position::from((1 << 3) - 3); + let mut f0 = Frontier::::empty(); + for c in 'a'..='f' { + f0.append(c.to_string()); + } + + let frontier = Frontier::from_parts( + frontier_end, + "f".to_owned(), + vec!["e".to_owned(), "abcd".to_owned()], + ) + .unwrap(); + + // Insert a frontier two leaves from the end of the first shard, checkpointed, + // with the specified marking. + tree.insert_frontier( + frontier, + Retention::Checkpoint { + id: 1, + marking: frontier_marking, + }, + )?; + + // Insert a few leaves beginning at the subsequent position, so as to cross the shard + // boundary. + tree.batch_insert( + frontier_end + 1, + ('g'..='j') + .into_iter() + .map(|c| (c.to_string(), Retention::Ephemeral)), + )?; + + // Trigger pruning by adding 5 more checkpoints + for i in 2..7 { + tree.checkpoint(i).unwrap(); + } + + // Insert nodes that require the pruned nodes for witnessing + tree.batch_insert( + frontier_end - 1, + ('e'..='f') + .into_iter() + .map(|c| (c.to_string(), Retention::Marked)), + )?; + + // Compute the witness + tree.witness_at_checkpoint_id(frontier_end, &6) + } + + // If we insert the frontier with Marking::None, the frontier nodes are treated + // as ephemeral nodes and are pruned, leaving an incomplete tree. + assert_matches!( + test_with_marking(Marking::None), + Err(ShardTreeError::Query(QueryError::TreeIncomplete(_))) + ); + + // If we insert the frontier with Marking::Reference, the frontier nodes will + // not be pruned on completion of the subtree, and thus we'll be able to compute + // the witness. + let expected_witness = MerklePath::from_parts( + [ + "e", + "gh", + "abcd", + "ij______", + "________________", + "________________________________", + ] + .iter() + .map(|s| s.to_string()) + .collect(), + Position::from(5), + ) + .unwrap(); + + let witness = test_with_marking(Marking::Reference).unwrap(); + assert_eq!(witness, expected_witness); + } + // Combined tree tests #[allow(clippy::type_complexity)] fn new_combined_tree( diff --git a/shardtree/src/prunable.rs b/shardtree/src/prunable.rs index 62690be..c7a5acb 100644 --- a/shardtree/src/prunable.rs +++ b/shardtree/src/prunable.rs @@ -100,12 +100,25 @@ impl PrunableTree { .map_or(false, |(_, retention)| retention.is_marked()) } + /// Returns `true` if it is possible to compute or retrieve the Merkle root of this + /// tree. + pub fn has_computable_root(&self) -> bool { + match &self.0 { + Node::Parent { ann, left, right } => { + ann.is_some() + || (left.as_ref().has_computable_root() && right.as_ref().has_computable_root()) + } + Node::Leaf { .. } => true, + Node::Nil | Node::Pruned => false, + } + } + /// Determines whether a tree has any [`Retention::Marked`] nodes. pub fn contains_marked(&self) -> bool { match &self.0 { Node::Parent { left, right, .. } => left.contains_marked() || right.contains_marked(), Node::Leaf { value: (_, r) } => r.is_marked(), - Node::Nil => false, + Node::Nil | Node::Pruned => false, } } @@ -122,8 +135,8 @@ impl PrunableTree { // so no need to inspect the tree Ok(H::empty_root(root_addr.level())) } else { - match self { - Tree(Node::Parent { ann, left, right }) => ann + match &self.0 { + Node::Parent { ann, left, right } => ann .as_ref() .filter(|_| truncate_at >= root_addr.position_range_end()) .map_or_else( @@ -145,7 +158,7 @@ impl PrunableTree { Ok(rc.as_ref().clone()) }, ), - Tree(Node::Leaf { value }) => { + Node::Leaf { value } => { if truncate_at >= root_addr.position_range_end() { // no truncation of this leaf is necessary, just use it Ok(value.0.clone()) @@ -157,7 +170,7 @@ impl PrunableTree { Err(vec![root_addr]) } } - Tree(Node::Nil) => Err(vec![root_addr]), + Node::Nil | Node::Pruned => Err(vec![root_addr]), } } } @@ -192,7 +205,7 @@ impl PrunableTree { } result } - Node::Nil => BTreeSet::new(), + Node::Nil | Node::Pruned => BTreeSet::new(), } } @@ -229,6 +242,7 @@ impl PrunableTree { let no_default_fill = addr.position_range_end(); match (t0, t1) { (Tree(Node::Nil), other) | (other, Tree(Node::Nil)) => Ok(other), + (Tree(Node::Pruned), other) | (other, Tree(Node::Pruned)) => Ok(other), (Tree(Node::Leaf { value: vl }), Tree(Node::Leaf { value: vr })) => { if vl.0 == vr.0 { // Merge the flags together. @@ -301,6 +315,8 @@ impl PrunableTree { pub(crate) fn unite(level: Level, ann: Option>, left: Self, right: Self) -> Self { match (left, right) { (Tree(Node::Nil), Tree(Node::Nil)) => Tree(Node::Nil), + (Tree(Node::Nil | Node::Pruned), Tree(Node::Pruned)) => Tree(Node::Pruned), + (Tree(Node::Pruned), Tree(Node::Nil)) => Tree(Node::Pruned), (Tree(Node::Leaf { value: lv }), Tree(Node::Leaf { value: rv })) // we can prune right-hand leaves that are not marked or reference leaves; if a // leaf is a checkpoint then that information will be propagated to the replacement @@ -518,7 +534,7 @@ impl LocatedPrunableTree { None } } - Node::Nil => None, + Node::Nil | Node::Pruned => None, } } @@ -555,7 +571,6 @@ impl LocatedPrunableTree { root_addr: Address, into: &PrunableTree, subtree: LocatedPrunableTree, - is_complete: bool, contains_marked: bool, ) -> Result<(PrunableTree, Vec), InsertionError> { trace!( @@ -571,7 +586,9 @@ impl LocatedPrunableTree { // In the case that we are replacing a node entirely, we need to extend the // subtree up to the level of the node being replaced, adding Nil siblings // and recording the presence of those incomplete nodes when necessary - let replacement = |ann: Option>, mut node: LocatedPrunableTree| { + let replacement = |ann: Option>, + mut node: LocatedPrunableTree, + pruned: bool| { // construct the replacement node bottom-up let mut incomplete = vec![]; while node.root_addr.level() < root_addr.level() { @@ -579,19 +596,21 @@ impl LocatedPrunableTree { address: node.root_addr.sibling(), required_for_witness: contains_marked, }); + let empty = Arc::new(Tree(if pruned { Node::Pruned } else { Node::Nil })); + let full = Arc::new(node.root); node = LocatedTree { root_addr: node.root_addr.parent(), root: if node.root_addr.is_right_child() { Tree(Node::Parent { ann: None, - left: Arc::new(Tree(Node::Nil)), - right: Arc::new(node.root), + left: empty, + right: full, }) } else { Tree(Node::Parent { ann: None, - left: Arc::new(node.root), - right: Arc::new(Tree(Node::Nil)), + left: full, + right: empty, }) }, }; @@ -600,7 +619,8 @@ impl LocatedPrunableTree { }; match into { - Tree(Node::Nil) => Ok(replacement(None, subtree)), + Tree(Node::Nil) => Ok(replacement(None, subtree, false)), + Tree(Node::Pruned) => Ok(replacement(None, subtree, true)), Tree(Node::Leaf { value: (value, retention), }) => { @@ -610,7 +630,7 @@ impl LocatedPrunableTree { // entirely with the subtree, or reannotate the root so as to avoid // discarding the existing leaf value. - if is_complete { + if subtree.root.has_computable_root() { Ok(( if subtree.root.is_leaf() { // When replacing a leaf with a leaf, `REFERENCE` retention @@ -635,7 +655,7 @@ impl LocatedPrunableTree { )? } else { // It is safe to replace the existing root unannotated, because we - // can always recompute the root from a complete subtree. + // can always recompute the root from the subtree. subtree.root }, vec![], @@ -655,7 +675,22 @@ impl LocatedPrunableTree { Err(InsertionError::Conflict(root_addr)) } } else { - Ok(replacement(Some(Arc::new(value.clone())), subtree)) + Ok(replacement( + Some(Arc::new(value.clone())), + subtree, + // The subtree being inserted may have its root at some level lower + // than the next level down. The siblings of nodes that will be + // generated while descending to the subtree root level will be + // `Nil` nodes (indicating that the value of these nodes have never + // been observed) if the leaf being replaced has `REFERENCE` + // retention. Any other leaf without `REFERENCE` retention will + // have been produced by pruning of previously observed node + // values, so in those cases we use `Pruned` nodes for the absent + // siblings. This allows us to retain the distinction between what + // parts of the tree have been directly observed and what parts + // have not. + !retention.contains(RetentionFlags::REFERENCE), + )) } } parent if root_addr == subtree.root_addr => { @@ -673,7 +708,7 @@ impl LocatedPrunableTree { let (l_addr, r_addr) = root_addr.children().unwrap(); if l_addr.contains(&subtree.root_addr) { let (new_left, incomplete) = - go(l_addr, left.as_ref(), subtree, is_complete, contains_marked)?; + go(l_addr, left.as_ref(), subtree, contains_marked)?; Ok(( Tree::unite( root_addr.level() - 1, @@ -684,13 +719,8 @@ impl LocatedPrunableTree { incomplete, )) } else { - let (new_right, incomplete) = go( - r_addr, - right.as_ref(), - subtree, - is_complete, - contains_marked, - )?; + let (new_right, incomplete) = + go(r_addr, right.as_ref(), subtree, contains_marked)?; Ok(( Tree::unite( root_addr.level() - 1, @@ -710,22 +740,16 @@ impl LocatedPrunableTree { trace!( max_position = ?max_position, tree = ?self, - subtree_complete = subtree.root.is_complete(), to_insert = ?subtree, "Current shard" ); let LocatedTree { root_addr, root } = self; if root_addr.contains(&subtree.root_addr) { - let complete = subtree.root.is_complete(); - go(*root_addr, root, subtree, complete, contains_marked).map(|(root, incomplete)| { + go(*root_addr, root, subtree, contains_marked).map(|(root, incomplete)| { let new_tree = LocatedTree { root_addr: *root_addr, root, }; - trace!( - max_position = ?new_tree.max_position(), - "Replacement shard" - ); assert!(new_tree.max_position() >= max_position); (new_tree, incomplete) }) @@ -786,9 +810,7 @@ impl LocatedPrunableTree { split_at: Level, ) -> (Self, Option) { let mut addr = Address::from(position); - let mut subtree = Tree(Node::Leaf { - value: (leaf, leaf_retention.into()), - }); + let mut subtree = Tree::leaf((leaf, leaf_retention.into())); while addr.level() < split_at { if addr.is_left_child() { @@ -890,8 +912,8 @@ impl LocatedPrunableTree { // nothing to do, so we just return the root root.clone() } else { - match root { - Tree(Node::Parent { ann, left, right }) => { + match &root.0 { + Node::Parent { ann, left, right } => { let (l_addr, r_addr) = root_addr.children().unwrap(); let p = to_clear.partition_point(|(p, _)| p < &l_addr.position_range_end()); @@ -908,7 +930,7 @@ impl LocatedPrunableTree { go(&to_clear[p..], r_addr, right), ) } - Tree(Node::Leaf { value: (h, r) }) => { + Node::Leaf { value: (h, r) } => { trace!("In {:?}, clearing {:?}", root_addr, to_clear); // When we reach a leaf, we should be down to just a single position // which should correspond to the last level-0 child of the address's @@ -916,18 +938,16 @@ impl LocatedPrunableTree { // a partially-pruned branch, and if it's a marked node then it will // be a level-0 leaf. match to_clear { - [(pos, flags)] => { - assert_eq!(*pos, root_addr.max_position()); - Tree(Node::Leaf { - value: (h.clone(), *r & !*flags), - }) - } + [(_, flags)] => Tree(Node::Leaf { + value: (h.clone(), *r & !*flags), + }), _ => { panic!("Tree state inconsistent with checkpoints."); } } } - Tree(Node::Nil) => Tree(Node::Nil), + Node::Nil => Tree(Node::Nil), + Node::Pruned => Tree(Node::Pruned), } } } @@ -1129,7 +1149,7 @@ mod tests { } #[test] - fn located_insert_subtree_leaf_overwrites() { + fn located_insert_subtree_prevents_leaf_overwrite_conflict() { let t: LocatedPrunableTree = LocatedTree { root_addr: Address::from_parts(2.into(), 1), root: parent(leaf(("a".to_string(), RetentionFlags::MARKED)), nil()), diff --git a/shardtree/src/tree.rs b/shardtree/src/tree.rs index 179ef01..d0f8913 100644 --- a/shardtree/src/tree.rs +++ b/shardtree/src/tree.rs @@ -19,6 +19,9 @@ pub enum Node { Leaf { value: V }, /// The empty tree; a subtree or leaf for which no information is available. Nil, + /// An empty node in the tree created as a consequence of partial reinserion of data into a + /// subtree after the subtree was previously pruned. + Pruned, } impl Node { @@ -35,7 +38,8 @@ impl Node { match self { Node::Parent { .. } => None, Node::Leaf { value } => Some(value), - Node::Nil { .. } => None, + Node::Nil => None, + Node::Pruned => None, } } @@ -45,6 +49,7 @@ impl Node { Node::Parent { ann, .. } => Some(ann), Node::Leaf { .. } => None, Node::Nil => None, + Node::Pruned => None, } } @@ -71,6 +76,7 @@ impl<'a, C: Clone, A: Clone, V: Clone> Node { value: (*value).clone(), }, Node::Nil => Node::Nil, + Node::Pruned => Node::Pruned, } } } @@ -92,6 +98,11 @@ impl Tree { Tree(Node::Nil) } + /// Constructs the empty tree consisting of a single pruned node. + pub fn empty_pruned() -> Self { + Tree(Node::Pruned) + } + /// Constructs a tree containing a single leaf. pub fn leaf(value: V) -> Self { Tree(Node::Leaf { value }) @@ -122,18 +133,8 @@ impl Tree { matches!(&self.0, Node::Leaf { .. }) } - /// Returns `true` if no [`Node::Nil`] nodes are present in the tree, `false` otherwise. - pub fn is_complete(&self) -> bool { - match &self.0 { - Node::Parent { left, right, .. } => { - left.as_ref().is_complete() && right.as_ref().is_complete() - } - Node::Leaf { .. } => true, - Node::Nil { .. } => false, - } - } - - /// Returns a vector of the addresses of [`Node::Nil`] subtree roots within this tree. + /// Returns a vector of the addresses of [`Node::Nil`] and [`Node::Pruned`] subtree roots + /// within this tree. /// /// The given address must correspond to the root of this tree, or this method will /// yield incorrect results or may panic. @@ -154,7 +155,7 @@ impl Tree { left_incomplete } Node::Leaf { .. } => vec![], - Node::Nil => vec![root_addr], + Node::Nil | Node::Pruned => vec![root_addr], } } @@ -164,15 +165,16 @@ impl Tree { where A: Clone, { - match &self.0 { - Node::Parent { ann, left, right } => Tree(Node::Parent { + Tree(match &self.0 { + Node::Parent { ann, left, right } => Node::Parent { ann: ann.clone(), left: Arc::new(left.map(f)), right: Arc::new(right.map(f)), - }), - Node::Leaf { value } => Tree(Node::Leaf { value: f(value) }), - Node::Nil => Tree(Node::Nil), - } + }, + Node::Leaf { value } => Node::Leaf { value: f(value) }, + Node::Nil => Node::Nil, + Node::Pruned => Node::Pruned, + }) } /// Applies the provided function to each leaf of the tree and returns @@ -190,6 +192,7 @@ impl Tree { }, Node::Leaf { value } => Node::Leaf { value: f(value)? }, Node::Nil => Node::Nil, + Node::Pruned => Node::Pruned, })) } } @@ -248,7 +251,7 @@ impl LocatedTree { pub(crate) fn max_position_internal(addr: Address, root: &Tree) -> Option { match &root.0 { Node::Nil => None, - Node::Leaf { .. } => Some(addr.position_range_end() - 1), + Node::Leaf { .. } | Node::Pruned => Some(addr.position_range_end() - 1), Node::Parent { left, right, .. } => { let (l_addr, r_addr) = addr.children().unwrap(); Self::max_position_internal(r_addr, right.as_ref())