From ebe3efa135ae934089a497241f3be1e42b226328 Mon Sep 17 00:00:00 2001 From: Kris Nuttycombe Date: Fri, 13 Jan 2023 08:40:57 -0700 Subject: [PATCH] Add ShardTree types & implement append operation. --- incrementalmerkletree/src/testing.rs | 32 +- .../src/testing/complete_tree.rs | 9 +- shardtree/src/lib.rs | 598 ++++++++++++++++-- 3 files changed, 567 insertions(+), 72 deletions(-) diff --git a/incrementalmerkletree/src/testing.rs b/incrementalmerkletree/src/testing.rs index c015cf8..8435157 100644 --- a/incrementalmerkletree/src/testing.rs +++ b/incrementalmerkletree/src/testing.rs @@ -500,9 +500,37 @@ pub fn check_root_hashes, F: Fn(usize) -> T>(new_tree: F) assert_eq!(t.root(0).unwrap(), "aaaa____________"); } -pub fn check_witnesses + std::fmt::Debug, F: Fn(usize) -> T>(new_tree: F) { +/// This test expects a depth-4 tree and verifies that the tree reports itself as full after 2^4 +/// appends. +pub fn check_append + std::fmt::Debug, F: Fn(usize) -> T>(new_tree: F) { + use Retention::*; + let mut tree = new_tree(100); - tree.append("a".to_string(), Retention::Marked); + assert_eq!(tree.depth(), 4); + + // 16 appends should succeed + for i in 0..16 { + assert!(tree.append(i.to_string(), Ephemeral)); + assert_eq!(tree.current_position(), Some(Position::from(i))); + } + + // 17th append should fail + assert!(!tree.append("16".to_string(), Ephemeral)); + + // The following checks a condition on state restoration in the case that an append fails. + // We want to ensure that a failed append does not cause a loss of information. + let ops = (0..17) + .map(|i| Append(i.to_string(), Ephemeral)) + .collect::>(); + let tree = new_tree(100); + check_operations(tree, &ops).unwrap(); +} + +pub fn check_witnesses + std::fmt::Debug, F: Fn(usize) -> T>(new_tree: F) { + use Retention::*; + + let mut tree = new_tree(100); + tree.append("a".to_string(), Marked); assert_eq!( tree.witness(Position::from(0), 0), Some(vec![ diff --git a/incrementalmerkletree/src/testing/complete_tree.rs b/incrementalmerkletree/src/testing/complete_tree.rs index 073da85..53a2989 100644 --- a/incrementalmerkletree/src/testing/complete_tree.rs +++ b/incrementalmerkletree/src/testing/complete_tree.rs @@ -321,8 +321,8 @@ mod tests { use super::CompleteTree; use crate::{ testing::{ - check_checkpoint_rewind, check_rewind_remove_mark, check_root_hashes, check_witnesses, - compute_root_from_witness, SipHashable, Tree, + check_append, check_checkpoint_rewind, check_rewind_remove_mark, check_root_hashes, + check_witnesses, compute_root_from_witness, SipHashable, Tree, }, Hashable, Level, Position, Retention, }; @@ -367,6 +367,11 @@ mod tests { assert_eq!(tree.root(0).unwrap(), expected); } + #[test] + fn append() { + check_append(|max_checkpoints| CompleteTree::::new(max_checkpoints, 0)); + } + #[test] fn root_hashes() { check_root_hashes(|max_checkpoints| { diff --git a/shardtree/src/lib.rs b/shardtree/src/lib.rs index c83eb34..f4811bf 100644 --- a/shardtree/src/lib.rs +++ b/shardtree/src/lib.rs @@ -1,4 +1,6 @@ +use core::convert::Infallible; use core::fmt::Debug; +use core::marker::PhantomData; use core::ops::{BitAnd, BitOr, Deref, Not, Range}; use either::Either; use std::collections::{BTreeMap, BTreeSet}; @@ -1436,6 +1438,383 @@ impl LocatedPrunableTree { } } +/// An enumeration of possible checkpoint locations. +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum TreeState { + /// Checkpoints of the empty tree. + Empty, + /// Checkpoint at a (possibly pruned) leaf state corresponding to the + /// wrapped leaf position. + AtPosition(Position), +} + +#[derive(Clone, Debug)] +pub struct Checkpoint { + tree_state: TreeState, + marks_removed: BTreeSet, +} + +impl Checkpoint { + pub fn tree_empty() -> Self { + Checkpoint { + tree_state: TreeState::Empty, + marks_removed: BTreeSet::new(), + } + } + + pub fn at_position(position: Position) -> Self { + Checkpoint { + tree_state: TreeState::AtPosition(position), + marks_removed: BTreeSet::new(), + } + } + + pub fn is_tree_empty(&self) -> bool { + matches!(self.tree_state, TreeState::Empty) + } + + pub fn position(&self) -> Option { + match self.tree_state { + TreeState::Empty => None, + TreeState::AtPosition(pos) => Some(pos), + } + } +} + +/// A capability for storage of fragment subtrees of the `ShardTree` type. +/// +/// All fragment subtrees must have roots at level `SHARD_HEIGHT - 1` +pub trait ShardStore { + type Error; + + /// Returns the subtree at the given root address, if any such subtree exists. + fn get_shard(&self, shard_root: Address) -> Option<&LocatedPrunableTree>; + + /// Returns the subtree containing the maximum inserted leaf position. + fn last_shard(&self) -> Option<&LocatedPrunableTree>; + + /// Inserts or replaces the subtree having the same root address as the provided tree. + /// + /// Implementations of this method MUST enforce the constraint that the root address + /// of the provided subtree has level `SHARD_HEIGHT - 1`. + fn put_shard(&mut self, subtree: LocatedPrunableTree) -> Result<(), Self::Error>; + + /// Returns the vector of addresses corresponding to the roots of subtrees stored in this + /// store. + fn get_shard_roots(&self) -> Vec
; + + /// Removes subtrees from the underlying store having root addresses at indices greater + /// than or equal to that of the specified address. + /// + /// Implementations of this method MUST enforce the constraint that the root address + /// provided has level `SHARD_HEIGHT - 1`. + fn truncate(&mut self, from: Address) -> Result<(), Self::Error>; +} + +impl ShardStore for Vec> { + type Error = Infallible; + + fn get_shard(&self, shard_root: Address) -> Option<&LocatedPrunableTree> { + self.get(shard_root.index()) + } + + fn last_shard(&self) -> Option<&LocatedPrunableTree> { + self.last() + } + + fn put_shard(&mut self, subtree: LocatedPrunableTree) -> Result<(), Self::Error> { + let subtree_addr = subtree.root_addr; + for subtree_idx in self.last().map_or(0, |s| s.root_addr.index() + 1)..=subtree_addr.index() + { + self.push(LocatedTree { + root_addr: Address::from_parts(subtree_addr.level(), subtree_idx), + root: Tree(Node::Nil), + }) + } + self[subtree_addr.index()] = subtree; + Ok(()) + } + + fn get_shard_roots(&self) -> Vec
{ + self.iter().map(|s| s.root_addr).collect() + } + + fn truncate(&mut self, from: Address) -> Result<(), Self::Error> { + self.truncate(from.index()); + Ok(()) + } +} + +/// A left-dense, sparse binary Merkle tree of the specified depth, represented as a vector of +/// subtrees (shards) of the given maximum height. +/// +/// This tree maintains a collection of "checkpoints" which represent positions, usually near the +/// front of the tree, that are maintained such that it's possible to truncate nodes to the right +/// of the specified position. +#[derive(Debug)] +pub struct ShardTree, const DEPTH: u8, const SHARD_HEIGHT: u8> { + /// The vector of tree shards. + store: S, + /// The maximum number of checkpoints to retain before pruning. + max_checkpoints: usize, + /// A map from position to the count of checkpoints at this position. + checkpoints: BTreeMap, + // /// A tree that is used to cache the known roots of subtrees in the "cap" of nodes between + // /// `SHARD_HEIGHT` and `DEPTH` that are otherwise not directly represented in the tree. This + // /// cache is automatically updated when computing roots and witnesses. Leaf nodes are empty + // /// because the annotation slot is consistently used to store the subtree hashes at each node. + // cap_cache: Tree>, ()> + _hash_type: PhantomData, +} + +impl< + H: Hashable + Clone + PartialEq, + C: Clone + Ord + core::fmt::Debug, + S: ShardStore, + const DEPTH: u8, + const SHARD_HEIGHT: u8, + > ShardTree +{ + /// Creates a new empty tree. + pub fn new(store: S, max_checkpoints: usize, initial_checkpoint_id: C) -> Self { + Self { + store, + max_checkpoints, + checkpoints: BTreeMap::from([(initial_checkpoint_id, Checkpoint::tree_empty())]), + //cap_cache: Tree(None, ()) + _hash_type: PhantomData, + } + } + + /// Returns the root address of the tree. + pub fn root_addr() -> Address { + Address::from_parts(Level::from(DEPTH), 0) + } + + /// Returns the fixed level of subtree roots within the vector of subtrees used as this tree's + /// representation. + pub fn subtree_level() -> Level { + Level::from(SHARD_HEIGHT - 1) + } + + /// Returns the position and checkpoint count for each checkpointed position in the tree. + pub fn checkpoints(&self) -> &BTreeMap { + &self.checkpoints + } + + /// Returns the leaf value at the specified position, if it is a marked leaf. + pub fn get_marked_leaf(&self, position: Position) -> Option<&H> { + self.store + .get_shard(Address::above_position(Self::subtree_level(), position)) + .and_then(|t| t.value_at_position(position)) + .and_then(|(v, r)| if r.is_marked() { Some(v) } else { None }) + } + + /// Returns the positions of marked leaves in the tree. + pub fn marked_positions(&self) -> BTreeSet { + let mut result = BTreeSet::new(); + for subtree_addr in &self.store.get_shard_roots() { + if let Some(subtree) = self.store.get_shard(*subtree_addr) { + result.append(&mut subtree.marked_positions()); + } + } + result + } + + /// Inserts a new root into the tree at the given address. + /// + /// This will pad from the left until the tree's subtrees vector contains enough trees to reach + /// the specified address, which must be at the [`Self::subtree_level`] level. If a subtree + /// already exists at this address, its root will be annotated with the specified hash value. + /// + /// This will return an error if the specified hash conflicts with any existing annotation. + pub fn put_root(&mut self, addr: Address, value: H) -> Result<(), InsertionError> { + let updated_subtree = match self.store.get_shard(addr) { + Some(s) if !s.root.is_nil() => s.root.node_value().map_or_else( + || { + Ok(Some( + s.clone().reannotate_root(Some(Rc::new(value.clone()))), + )) + }, + |v| { + if v == &value { + // the existing root is already correctly annotated, so no need to + // do anything + Ok(None) + } else { + // the provided value conflicts with the existing root value + Err(InsertionError::Conflict(addr)) + } + }, + ), + _ => { + // there is no existing subtree root, so construct a new one. + Ok(Some(LocatedTree { + root_addr: addr, + root: Tree(Node::Leaf { + value: (value, EPHEMERAL), + }), + })) + } + }?; + + if let Some(s) = updated_subtree { + self.store.put_shard(s).map_err(InsertionError::Storage)?; + } + + Ok(()) + } + + /// Append a single value at the first available position in the tree. + /// + /// Prefer to use [`Self::batch_insert`] when appending multiple values, as these operations + /// require fewer traversals of the tree than are necessary when performing multiple sequential + /// calls to [`Self::append`]. + pub fn append( + &mut self, + value: H, + retention: Retention, + ) -> Result<(), InsertionError> { + if let Retention::Checkpoint { id, .. } = &retention { + if self.checkpoints.keys().last() >= Some(id) { + return Err(InsertionError::CheckpointOutOfOrder); + } + } + + let (append_result, position, checkpoint_id) = + if let Some(subtree) = self.store.last_shard() { + if subtree.root.reduce(&is_complete) { + let addr = subtree.root_addr; + + if addr.index() + 1 >= 0x1 << (SHARD_HEIGHT - 1) { + return Err(InsertionError::OutOfRange(addr.position_range())); + } else { + LocatedTree::empty(addr.next_at_level()).append(value, retention)? + } + } else { + subtree.append(value, retention)? + } + } else { + let root_addr = Address::from_parts(Self::subtree_level(), 0); + LocatedTree::empty(root_addr).append(value, retention)? + }; + + self.store + .put_shard(append_result) + .map_err(InsertionError::Storage)?; + if let Some(c) = checkpoint_id { + self.checkpoints + .insert(c, Checkpoint::at_position(position)); + } + + self.prune_excess_checkpoints() + .map_err(InsertionError::Storage)?; + + Ok(()) + } + + fn prune_excess_checkpoints(&mut self) -> Result<(), S::Error> { + if self.checkpoints.len() > self.max_checkpoints { + // Batch removals by subtree & create a list of the checkpoint identifiers that + // will be removed from the checkpoints map. + let mut checkpoints_to_delete = vec![]; + let mut clear_positions: BTreeMap> = + BTreeMap::new(); + for (cid, checkpoint) in self + .checkpoints + .iter() + .take(self.checkpoints.len() - self.max_checkpoints) + { + checkpoints_to_delete.push(cid.clone()); + + // clear the checkpoint leaf + if let TreeState::AtPosition(pos) = checkpoint.tree_state { + let subtree_addr = Address::above_position(Self::subtree_level(), pos); + clear_positions + .entry(subtree_addr) + .and_modify(|to_clear| { + to_clear + .entry(pos) + .and_modify(|flags| *flags = *flags | CHECKPOINT) + .or_insert(CHECKPOINT); + }) + .or_insert_with(|| BTreeMap::from([(pos, CHECKPOINT)])); + } + + // clear the leaves that have been marked for removal + for unmark_pos in checkpoint.marks_removed.iter() { + let subtree_addr = Address::above_position(Self::subtree_level(), *unmark_pos); + clear_positions + .entry(subtree_addr) + .and_modify(|to_clear| { + to_clear + .entry(*unmark_pos) + .and_modify(|flags| *flags = *flags | MARKED) + .or_insert(MARKED); + }) + .or_insert_with(|| BTreeMap::from([(*unmark_pos, MARKED)])); + } + } + + // Prune each affected subtree + for (subtree_addr, positions) in clear_positions.into_iter() { + let cleared = self + .store + .get_shard(subtree_addr) + .map(|subtree| subtree.clear_flags(positions)); + if let Some(cleared) = cleared { + self.store.put_shard(cleared)?; + } + } + + // Now that the leaves have been pruned, actually remove the checkpoints + for c in checkpoints_to_delete { + self.checkpoints.remove(&c); + } + } + + Ok(()) + } + + /// Returns the position of the checkpoint, if any, along with the number of subsequent + /// checkpoints at the same position. Returns `None` if `checkpoint_depth == 0` or if + /// insufficient checkpoints exist to seek back to the requested depth. + pub fn checkpoint_at_depth(&self, checkpoint_depth: usize) -> Option<(&C, &Checkpoint)> { + if checkpoint_depth == 0 { + None + } else { + self.checkpoints.iter().rev().nth(checkpoint_depth - 1) + } + } + + /// Returns the position of the rightmost leaf inserted as of the given checkpoint. + /// + /// Returns the maximum leaf position if `checkpoint_depth == 0` (or `Ok(None)` in this + /// case if the tree is empty) or an error if the checkpointed position cannot be restored + /// because it has been pruned. Note that no actual level-0 leaf may exist at this position. + pub fn max_leaf_position( + &self, + checkpoint_depth: usize, + ) -> Result, QueryError> { + if checkpoint_depth == 0 { + // TODO: This relies on the invariant that the last shard in the subtrees vector is + // never created without a leaf then being added to it. However, this may be a + // difficult invariant to maintain when adding empty roots, so perhaps we need a + // better way of tracking the actual max position of the tree; we might want to + // just store it directly. + Ok(self.store.last_shard().and_then(|t| t.max_position())) + } else { + match self.checkpoint_at_depth(checkpoint_depth) { + Some((_, c)) => Ok(c.position()), + None => { + // There is no checkpoint at the specified depth, so we report it as pruned. + Err(QueryError::CheckpointPruned) + } + } + } + } +} + // We need an applicative functor for Result for this function so that we can correctly // accumulate errors, but we don't have one so we just write a special- cased version here. fn accumulate_result_with( @@ -1499,10 +1878,14 @@ pub mod testing { #[cfg(test)] mod tests { use crate::{ - LocatedPrunableTree, LocatedTree, Node, PrunableTree, QueryError, Tree, EPHEMERAL, MARKED, + LocatedPrunableTree, LocatedTree, Node, PrunableTree, QueryError, ShardStore, ShardTree, + Tree, EPHEMERAL, MARKED, }; use core::convert::Infallible; - use incrementalmerkletree::{Address, Level, Position, Retention}; + use incrementalmerkletree::{ + testing::{self, check_append, complete_tree::CompleteTree, CombinedTree}, + Address, Hashable, Level, Position, Retention, + }; use std::collections::BTreeSet; use std::rc::Rc; @@ -1582,6 +1965,81 @@ mod tests { ); } + #[test] + fn located_prunable_tree_insert_subtree() { + let t: LocatedPrunableTree = LocatedTree { + root_addr: Address::from_parts(3.into(), 1), + root: parent( + leaf(("abcd".to_string(), EPHEMERAL)), + parent(nil(), leaf(("gh".to_string(), EPHEMERAL))), + ), + }; + + assert_eq!( + t.insert_subtree::( + LocatedTree { + root_addr: Address::from_parts(1.into(), 6), + root: parent(leaf(("e".to_string(), MARKED)), nil()) + }, + true + ), + Ok(( + LocatedTree { + root_addr: Address::from_parts(3.into(), 1), + root: parent( + leaf(("abcd".to_string(), EPHEMERAL)), + parent( + parent(leaf(("e".to_string(), MARKED)), nil()), + leaf(("gh".to_string(), EPHEMERAL)) + ) + ) + }, + vec![] + )) + ); + } + + #[test] + fn located_prunable_tree_witness() { + let t: LocatedPrunableTree = LocatedTree { + root_addr: Address::from_parts(3.into(), 0), + root: parent( + leaf(("abcd".to_string(), EPHEMERAL)), + parent( + parent( + leaf(("e".to_string(), MARKED)), + leaf(("f".to_string(), EPHEMERAL)), + ), + leaf(("gh".to_string(), EPHEMERAL)), + ), + ), + }; + + assert_eq!( + t.witness(4.into(), 8.into()), + Ok(vec!["f", "gh", "abcd"] + .into_iter() + .map(|s| s.to_string()) + .collect()) + ); + assert_eq!( + t.witness(4.into(), 6.into()), + Ok(vec!["f", "__", "abcd"] + .into_iter() + .map(|s| s.to_string()) + .collect()) + ); + assert_eq!( + t.witness(4.into(), 7.into()), + Err(QueryError::TreeIncomplete(vec![Address::from_parts( + 1.into(), + 3 + )])) + ); + } + + type VecShardStore = Vec>; + #[test] fn tree_marked_positions() { let t: PrunableTree = parent( @@ -1739,76 +2197,80 @@ mod tests { assert_eq!(complete.subtree.right_filled_root(), Ok("abcd".to_string())); } - #[test] - fn located_prunable_tree_insert_subtree() { - let t: LocatedPrunableTree = LocatedTree { - root_addr: Address::from_parts(3.into(), 1), - root: parent( - leaf(("abcd".to_string(), EPHEMERAL)), - parent(nil(), leaf(("gh".to_string(), EPHEMERAL))), - ), - }; + impl< + H: Hashable + Ord + Clone, + C: Clone + Ord + core::fmt::Debug, + S: ShardStore, + const DEPTH: u8, + const SHARD_HEIGHT: u8, + > testing::Tree for ShardTree + { + fn depth(&self) -> u8 { + DEPTH + } - assert_eq!( - t.insert_subtree::( - LocatedTree { - root_addr: Address::from_parts(1.into(), 6), - root: parent(leaf(("e".to_string(), MARKED)), nil()) - }, - true - ), - Ok(( - LocatedTree { - root_addr: Address::from_parts(3.into(), 1), - root: parent( - leaf(("abcd".to_string(), EPHEMERAL)), - parent( - parent(leaf(("e".to_string(), MARKED)), nil()), - leaf(("gh".to_string(), EPHEMERAL)) - ) - ) - }, - vec![] - )) - ); + fn append(&mut self, value: H, retention: Retention) -> bool { + ShardTree::append(self, value, retention).is_ok() + } + + fn current_position(&self) -> Option { + ShardTree::max_leaf_position(self, 0).ok().flatten() + } + + fn get_marked_leaf(&self, _position: Position) -> Option<&H> { + todo!() + } + + fn marked_positions(&self) -> BTreeSet { + todo!() + } + + fn root(&self, _checkpoint_depth: usize) -> Option { + todo!() + } + + fn witness(&self, _position: Position, _checkpoint_depth: usize) -> Option> { + todo!() + } + + fn remove_mark(&mut self, _position: Position) -> bool { + todo!() + } + + fn checkpoint(&mut self, _checkpoint_id: C) -> bool { + todo!() + } + + fn rewind(&mut self) -> bool { + todo!() + } } #[test] - fn located_prunable_tree_witness() { - let t: LocatedPrunableTree = LocatedTree { - root_addr: Address::from_parts(3.into(), 0), - root: parent( - leaf(("abcd".to_string(), EPHEMERAL)), - parent( - parent( - leaf(("e".to_string(), MARKED)), - leaf(("f".to_string(), EPHEMERAL)), - ), - leaf(("gh".to_string(), EPHEMERAL)), - ), - ), - }; + fn append() { + check_append(|m| { + ShardTree::, 4, 3>::new(vec![], m, 0) + }); + } - assert_eq!( - t.witness(4.into(), 8.into()), - Ok(vec!["f", "gh", "abcd"] - .into_iter() - .map(|s| s.to_string()) - .collect()) - ); - assert_eq!( - t.witness(4.into(), 6.into()), - Ok(vec!["f", "__", "abcd"] - .into_iter() - .map(|s| s.to_string()) - .collect()) - ); - assert_eq!( - t.witness(4.into(), 7.into()), - Err(QueryError::TreeIncomplete(vec![Address::from_parts( - 1.into(), - 3 - )])) - ); + // Combined tree tests + #[allow(clippy::type_complexity)] + fn new_combined_tree( + max_checkpoints: usize, + ) -> CombinedTree< + H, + usize, + CompleteTree, + ShardTree, 4, 3>, + > { + CombinedTree::new( + CompleteTree::new(max_checkpoints, 0), + ShardTree::new(vec![], max_checkpoints, 0), + ) + } + + #[test] + fn combined_append() { + check_append(new_combined_tree); } }