Remove initial checkpoints.

The choice to use an initial checkpoint as an anchor for removals in
`unmark` operations produced an awkward situation where it was necessary
to choose a checkpoint identifier just to construct an empty tree. This
removes that decision in favor of eagerly removing data if there is no
checkpoint to tie the removal to.
This commit is contained in:
Kris Nuttycombe 2023-05-19 08:14:55 -06:00
parent a26844a451
commit 4efe39eaa6
4 changed files with 138 additions and 187 deletions

View File

@ -470,13 +470,13 @@ impl<H, C, const DEPTH: u8> BridgeTree<H, C, DEPTH> {
///
/// Panics if `max_checkpoints < 1` because mark/rewind logic depends upon the presence
/// of checkpoints to function.
pub fn new(max_checkpoints: usize, initial_checkpoint_id: C) -> Self {
pub fn new(max_checkpoints: usize) -> Self {
assert!(max_checkpoints >= 1);
Self {
prior_bridges: vec![],
current_bridge: None,
saved: BTreeMap::new(),
checkpoints: VecDeque::from(vec![Checkpoint::at_length(0, initial_checkpoint_id)]),
checkpoints: VecDeque::new(),
max_checkpoints,
}
}
@ -535,12 +535,8 @@ impl<H, C, const DEPTH: u8> BridgeTree<H, C, DEPTH> {
impl<H: Hashable + Clone + Ord, C: Clone + Ord, const DEPTH: u8> BridgeTree<H, C, DEPTH> {
/// Construct a new BridgeTree that will start recording changes from the state of
/// the specified frontier.
pub fn from_frontier(
max_checkpoints: usize,
frontier: NonEmptyFrontier<H>,
checkpoint_id: C,
) -> Self {
let mut bridge = Self {
pub fn from_frontier(max_checkpoints: usize, frontier: NonEmptyFrontier<H>) -> Self {
Self {
prior_bridges: vec![],
current_bridge: Some(MerkleBridge::from_parts(
None,
@ -551,9 +547,7 @@ impl<H: Hashable + Clone + Ord, C: Clone + Ord, const DEPTH: u8> BridgeTree<H, C
saved: BTreeMap::new(),
checkpoints: VecDeque::new(),
max_checkpoints,
};
bridge.checkpoint(checkpoint_id);
bridge
}
}
/// Construct a new BridgeTree from its constituent parts, checking for internal
@ -717,11 +711,9 @@ impl<H: Hashable + Clone + Ord, C: Clone + Ord, const DEPTH: u8> BridgeTree<H, C
// mark the position as having been marked in the current checkpoint
if let std::collections::btree_map::Entry::Vacant(e) = self.saved.entry(pos) {
let c = self
.checkpoints
.back_mut()
.expect("Checkpoints deque must never be empty");
c.marked.insert(pos);
if let Some(c) = self.checkpoints.back_mut() {
c.marked.insert(pos);
}
e.insert(self.prior_bridges.len() - 1);
}
@ -749,11 +741,11 @@ impl<H: Hashable + Clone + Ord, C: Clone + Ord, const DEPTH: u8> BridgeTree<H, C
/// false if we were already not maintaining a mark at this position.
pub fn remove_mark(&mut self, position: Position) -> bool {
if self.saved.contains_key(&position) {
let c = self
.checkpoints
.back_mut()
.expect("Checkpoints deque must never be empty.");
c.forgotten.insert(position);
if let Some(c) = self.checkpoints.back_mut() {
c.forgotten.insert(position);
} else {
self.saved.remove(&position);
}
true
} else {
false
@ -805,12 +797,7 @@ impl<H: Hashable + Clone + Ord, C: Clone + Ord, const DEPTH: u8> BridgeTree<H, C
/// at that tree state have been removed using `rewind`. This function
/// return false and leave the tree unmodified if no checkpoints exist.
pub fn rewind(&mut self) -> bool {
if self.checkpoints.len() > 1 {
let c = self
.checkpoints
.pop_back()
.expect("Checkpoints deque is known to be non-empty.");
if let Some(c) = self.checkpoints.pop_back() {
// Remove marks for positions that were marked during the lifetime of this checkpoint.
for pos in c.marked {
self.saved.remove(&pos);
@ -1063,7 +1050,7 @@ mod tests {
#[test]
fn tree_depth() {
let mut tree = BridgeTree::<String, usize, 3>::new(100, 0);
let mut tree = BridgeTree::<String, usize, 3>::new(100);
for c in 'a'..'i' {
assert!(tree.append(c.to_string()))
}
@ -1095,7 +1082,7 @@ mod tests {
{
let pos_gen = (0..max_count).prop_map(|p| Position::try_from(p).unwrap());
proptest::collection::vec(arb_operation(item_gen, pos_gen), 0..max_count).prop_map(|ops| {
let mut tree: BridgeTree<G::Value, usize, 8> = BridgeTree::new(10, 0);
let mut tree: BridgeTree<G::Value, usize, 8> = BridgeTree::new(10);
for (i, op) in ops.into_iter().enumerate() {
apply_operation(&mut tree, op.map_checkpoint_id(|_| i));
}
@ -1130,33 +1117,31 @@ mod tests {
#[test]
fn root_hashes() {
check_root_hashes(|max_checkpoints| {
BridgeTree::<String, usize, 4>::new(max_checkpoints, 0)
});
check_root_hashes(BridgeTree::<String, usize, 4>::new);
}
#[test]
fn witness() {
check_witnesses(|max_checkpoints| BridgeTree::<String, usize, 4>::new(max_checkpoints, 0));
check_witnesses(BridgeTree::<String, usize, 4>::new);
}
#[test]
fn checkpoint_rewind() {
check_checkpoint_rewind(|max_checkpoints| {
BridgeTree::<String, usize, 4>::new(max_checkpoints, 0)
BridgeTree::<String, usize, 4>::new(max_checkpoints)
});
}
#[test]
fn rewind_remove_mark() {
check_rewind_remove_mark(|max_checkpoints| {
BridgeTree::<String, usize, 4>::new(max_checkpoints, 0)
BridgeTree::<String, usize, 4>::new(max_checkpoints)
});
}
#[test]
fn garbage_collect() {
let mut tree: BridgeTree<String, usize, 7> = BridgeTree::new(1000, 0);
let mut tree: BridgeTree<String, usize, 7> = BridgeTree::new(1000);
let empty_root = tree.root(0);
tree.append("a".to_string());
for i in 0..100 {
@ -1167,7 +1152,7 @@ mod tests {
tree.rewind();
assert!(tree.root(0) != empty_root);
let mut t = BridgeTree::<String, usize, 7>::new(10, 0);
let mut t = BridgeTree::<String, usize, 7>::new(10);
let mut to_unmark = vec![];
let mut has_witness = vec![];
for i in 0u64..100 {
@ -1213,8 +1198,8 @@ mod tests {
max_checkpoints: usize,
) -> CombinedTree<H, usize, CompleteTree<H, usize, 4>, BridgeTree<H, usize, 4>> {
CombinedTree::new(
CompleteTree::<H, usize, 4>::new(max_checkpoints, 0),
BridgeTree::<H, usize, 4>::new(max_checkpoints, 0),
CompleteTree::<H, usize, 4>::new(max_checkpoints),
BridgeTree::<H, usize, 4>::new(max_checkpoints),
)
}

View File

@ -71,11 +71,11 @@ pub struct CompleteTree<H, C: Ord, const DEPTH: u8> {
impl<H: Hashable, C: Clone + Ord + core::fmt::Debug, const DEPTH: u8> CompleteTree<H, C, DEPTH> {
/// Creates a new, empty binary tree
pub fn new(max_checkpoints: usize, initial_checkpoint_id: C) -> Self {
pub fn new(max_checkpoints: usize) -> Self {
Self {
leaves: vec![],
marks: BTreeSet::new(),
checkpoints: BTreeMap::from([(initial_checkpoint_id, Checkpoint::at_length(0))]),
checkpoints: BTreeMap::new(),
max_checkpoints,
}
}
@ -140,22 +140,18 @@ impl<H: Hashable, C: Clone + Ord + core::fmt::Debug, const DEPTH: u8> CompleteTr
/// Marks the current tree state leaf as a value that we're interested in
/// marking. Returns the current position if the tree is non-empty.
fn mark(&mut self) -> Option<Position> {
match self.current_position() {
Some(pos) => {
if !self.marks.contains(&pos) {
self.marks.insert(pos);
self.checkpoints
.iter_mut()
.rev()
.next()
.unwrap()
.1
.marked
.insert(pos);
if let Some(pos) = self.current_position() {
if !self.marks.contains(&pos) {
self.marks.insert(pos);
if let Some(checkpoint) = self.checkpoints.values_mut().rev().next() {
checkpoint.marked.insert(pos);
}
Some(pos)
}
None => None,
Some(pos)
} else {
None
}
}
@ -282,14 +278,11 @@ impl<H: Hashable + PartialEq + Clone, C: Ord + Clone + core::fmt::Debug, const D
fn remove_mark(&mut self, position: Position) -> bool {
if self.marks.contains(&position) {
self.checkpoints
.iter_mut()
.rev()
.next()
.unwrap()
.1
.forgotten
.insert(position);
if let Some(c) = self.checkpoints.values_mut().rev().next() {
c.forgotten.insert(position);
} else {
self.marks.remove(&position);
}
true
} else {
false
@ -297,7 +290,7 @@ impl<H: Hashable + PartialEq + Clone, C: Ord + Clone + core::fmt::Debug, const D
}
fn checkpoint(&mut self, id: C) -> bool {
if Some(&id) > self.checkpoints.iter().rev().next().map(|(id, _)| id) {
if Some(&id) > self.checkpoints.keys().rev().next() {
Self::checkpoint(self, id, self.current_position());
true
} else {
@ -306,8 +299,7 @@ impl<H: Hashable + PartialEq + Clone, C: Ord + Clone + core::fmt::Debug, const D
}
fn rewind(&mut self) -> bool {
if self.checkpoints.len() > 1 {
let (id, c) = self.checkpoints.iter().rev().next().unwrap();
if let Some((id, c)) = self.checkpoints.iter().rev().next() {
self.leaves.truncate(c.leaves_len);
for pos in c.marked.iter() {
self.marks.remove(pos);
@ -342,7 +334,7 @@ mod tests {
expected = SipHashable::combine(lvl.into(), &expected, &expected);
}
let tree = CompleteTree::<SipHashable, (), DEPTH>::new(100, ());
let tree = CompleteTree::<SipHashable, (), DEPTH>::new(100);
assert_eq!(tree.root(0).unwrap(), expected);
}
@ -351,7 +343,7 @@ mod tests {
const DEPTH: u8 = 3;
let values = (0..(1 << DEPTH)).map(SipHashable);
let mut tree = CompleteTree::<SipHashable, (), DEPTH>::new(100, ());
let mut tree = CompleteTree::<SipHashable, (), DEPTH>::new(100);
for value in values {
assert!(tree.append(value, Retention::Ephemeral).is_ok());
}
@ -376,21 +368,17 @@ mod tests {
#[test]
fn append() {
check_append(|max_checkpoints| CompleteTree::<String, usize, 4>::new(max_checkpoints, 0));
check_append(CompleteTree::<String, usize, 4>::new);
}
#[test]
fn root_hashes() {
check_root_hashes(|max_checkpoints| {
CompleteTree::<String, usize, 4>::new(max_checkpoints, 0)
});
check_root_hashes(CompleteTree::<String, usize, 4>::new);
}
#[test]
fn witnesses() {
check_witnesses(|max_checkpoints| {
CompleteTree::<String, usize, 4>::new(max_checkpoints, 0)
});
check_witnesses(CompleteTree::<String, usize, 4>::new);
}
#[test]
@ -400,7 +388,7 @@ mod tests {
const DEPTH: u8 = 3;
let values = (0..(1 << DEPTH)).map(SipHashable);
let mut tree = CompleteTree::<SipHashable, (), DEPTH>::new(100, ());
let mut tree = CompleteTree::<SipHashable, (), DEPTH>::new(100);
for value in values {
assert!(Tree::append(&mut tree, value, Retention::Marked));
}
@ -435,14 +423,14 @@ mod tests {
#[test]
fn checkpoint_rewind() {
check_checkpoint_rewind(|max_checkpoints| {
CompleteTree::<String, usize, 4>::new(max_checkpoints, 0)
CompleteTree::<String, usize, 4>::new(max_checkpoints)
});
}
#[test]
fn rewind_remove_mark() {
check_rewind_remove_mark(|max_checkpoints| {
CompleteTree::<String, usize, 4>::new(max_checkpoints, 0)
CompleteTree::<String, usize, 4>::new(max_checkpoints)
});
}
}

View File

@ -16,3 +16,7 @@ cc cf1a33ef6df58bbf7cc199b8b1879c3a078e7784aa3edc0aba9ca03772bea5f2 # shrinks to
cc 544e027d994eaf7f97b1c8d9ee7b35522a64a610b1430d56d74ec947018b759d # shrinks to ops = [Append("a", Marked), Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Ephemeral), Checkpoint(()), Append("a", Marked), Checkpoint(()), Checkpoint(()), Append("a", Checkpoint { id: (), is_marked: false }), Rewind, Rewind, Witness(Position(7), 2)]
cc 55d00b68a0f0a02f83ab53f18a29d16d0233153b69a01414a1622104e0eead31 # shrinks to ops = [Append("a", Marked), Append("a", Checkpoint { id: (), is_marked: false }), Append("a", Marked), Checkpoint(()), Checkpoint(()), Checkpoint(()), Append("a", Checkpoint { id: (), is_marked: false }), Append("a", Checkpoint { id: (), is_marked: false }), Witness(Position(0), 7)]
cc 9dd966ff1ab66965c5b84153ae13f684258560cdd5e84c7deb24f724cb12aba7 # shrinks to ops = [Append("a", Marked), Append("a", Ephemeral), Append("a", Checkpoint { id: (), is_marked: true }), Checkpoint(()), Append("a", Checkpoint { id: (), is_marked: false }), Rewind, Rewind, Append("a", Checkpoint { id: (), is_marked: false }), Append("a", Checkpoint { id: (), is_marked: false }), Checkpoint(()), Witness(Position(2), 4)]
cc d53a73021238de143764ee1d48b944abb93bd4bc54f35d16e514261220d3eb78 # shrinks to ops = [Append(SipHashable(0), Marked), Unmark(Position(0))]
cc d9460b8acbc5b4d112cae5d9e2296fcd793999b2b2e1d5405722f2bd8d176c31 # shrinks to ops = [Append("a", Checkpoint { id: (), is_marked: true }), Rewind, Append("a", Ephemeral), Rewind, Unmark(Position(0))]
cc 644c7763bc7bdc65bd9e6eb156b3b1a9b0632571a571c462bd44f3e04a389ca0 # shrinks to ops = [Append("a", Ephemeral), Append("a", Checkpoint { id: (), is_marked: true }), Append("a", Ephemeral), Append("a", Ephemeral), Unmark(Position(1)), Witness(Position(1), 0)]
cc 12790169d3df4280dd155d9cdfa76719318b8ec97a80bd562b7cb182d4f9bc79 # shrinks to ops = [CurrentPosition, CurrentPosition, Append(SipHashable(0), Ephemeral), Append(SipHashable(0), Marked), Append(SipHashable(0), Ephemeral), Checkpoint(()), Checkpoint(()), Checkpoint(()), Unmark(Position(1)), Checkpoint(()), Witness(Position(1), 0)]

View File

@ -1761,21 +1761,12 @@ impl<
where
S::Error: From<InsertionError> + From<QueryError>,
{
/// Creates a new empty tree and establishes a checkpoint for the empty tree at the given
/// checkpoint identifier.
pub fn empty(
store: S,
max_checkpoints: usize,
initial_checkpoint_id: C,
) -> Result<Self, S::Error> {
let mut result = Self {
/// Creates a new empty tree.
pub fn empty(store: S, max_checkpoints: usize) -> Self {
Self {
store,
max_checkpoints,
};
result
.store
.add_checkpoint(initial_checkpoint_id, Checkpoint::tree_empty())?;
Ok(result)
}
}
/// Constructs a wrapper around the provided shard store without initialization.
@ -1797,6 +1788,11 @@ where
Level::from(SHARD_HEIGHT - 1)
}
/// Returns the root address of the subtree that contains the specified position.
pub fn subtree_addr(pos: Position) -> Address {
Address::above_position(Self::subtree_level(), pos)
}
/// Returns the leaf value at the specified position, if it is a marked leaf.
pub fn get_marked_leaf(&self, position: Position) -> Result<Option<H>, S::Error> {
Ok(self
@ -2031,19 +2027,13 @@ where
// Update the rightmost subtree to add the `CHECKPOINT` flag to the right-most leaf (which
// need not be a level-0 leaf; it's fine to rewind to a pruned state).
if let Some(subtree) = self.store.last_shard()? {
if let Some((replacement, checkpoint_position)) = go(subtree.root_addr, &subtree.root) {
if self
.store
.put_shard(LocatedTree {
root_addr: subtree.root_addr,
root: replacement,
})
.is_err()
{
return Ok(false);
}
if let Some((replacement, pos)) = go(subtree.root_addr, &subtree.root) {
self.store.put_shard(LocatedTree {
root_addr: subtree.root_addr,
root: replacement,
})?;
self.store
.add_checkpoint(checkpoint_id, Checkpoint::at_position(checkpoint_position))?;
.add_checkpoint(checkpoint_id, Checkpoint::at_position(pos))?;
// early return once we've updated the tree state
self.prune_excess_checkpoints()?;
@ -2132,36 +2122,32 @@ where
) -> Result<bool, S::Error> {
Ok(if checkpoint_depth == 0 {
true
} else if self.store.checkpoint_count()? > 1 {
if let Some((checkpoint_id, c)) =
self.store.get_checkpoint_at_depth(checkpoint_depth)?
{
match c.tree_state {
TreeState::Empty => {
self.store
.truncate(Address::from_parts(Self::subtree_level(), 0))?;
} else if let Some((checkpoint_id, c)) =
self.store.get_checkpoint_at_depth(checkpoint_depth)?
{
match c.tree_state {
TreeState::Empty => {
self.store
.truncate(Address::from_parts(Self::subtree_level(), 0))?;
self.store.truncate_checkpoints(&checkpoint_id)?;
true
}
TreeState::AtPosition(position) => {
let subtree_addr = Address::above_position(Self::subtree_level(), position);
let replacement = self
.store
.get_shard(subtree_addr)?
.and_then(|s| s.truncate_to_position(position));
if let Some(truncated) = replacement {
self.store.truncate(subtree_addr)?;
self.store.put_shard(truncated)?;
self.store.truncate_checkpoints(&checkpoint_id)?;
true
}
TreeState::AtPosition(position) => {
let subtree_addr = Address::above_position(Self::subtree_level(), position);
let replacement = self
.store
.get_shard(subtree_addr)?
.and_then(|s| s.truncate_to_position(position));
if let Some(truncated) = replacement {
self.store.truncate(subtree_addr)?;
self.store.put_shard(truncated)?;
self.store.truncate_checkpoints(&checkpoint_id)?;
true
} else {
false
}
} else {
false
}
}
} else {
false
}
} else {
false
@ -2361,31 +2347,43 @@ where
pub fn remove_mark(
&mut self,
position: Position,
as_of_checkpoint: &C,
) -> Result<bool, S::Error> {
as_of_checkpoint: Option<&C>,
) -> Result<bool, S::Error>
where
C: std::fmt::Debug,
{
#[allow(clippy::blocks_in_if_conditions)]
if self.get_marked_leaf(position)?.is_some() {
if self
.store
.update_checkpoint_with(as_of_checkpoint, |checkpoint| {
checkpoint.marks_removed.insert(position);
Ok(())
})?
{
return Ok(true);
}
if let Some(cid) = self.store.min_checkpoint_id()? {
if self.store.update_checkpoint_with(&cid, |checkpoint| {
checkpoint.marks_removed.insert(position);
Ok(())
})? {
return Ok(true);
match as_of_checkpoint {
Some(cid) if Some(cid) >= self.store.min_checkpoint_id()?.as_ref() => {
self.store.update_checkpoint_with(cid, |checkpoint| {
checkpoint.marks_removed.insert(position);
Ok(())
})
}
_ => {
// if no checkpoint was provided, or if the checkpoint is too far in the past,
// remove the mark directly.
let cleared =
self.store
.get_shard(Self::subtree_addr(position))?
.map(|subtree| {
subtree.clear_flags(BTreeMap::from([(
position,
RetentionFlags::MARKED,
)]))
});
if let Some(cleared) = cleared {
self.store.put_shard(cleared)?;
Ok(true)
} else {
Ok(false)
}
}
}
} else {
Ok(false)
}
Ok(false)
}
}
@ -2786,7 +2784,7 @@ mod tests {
#[test]
fn shardtree_insertion() {
let mut tree: ShardTree<MemoryShardStore<String, usize>, 4, 3> =
ShardTree::empty(MemoryShardStore::empty(), 100, 0).unwrap();
ShardTree::empty(MemoryShardStore::empty(), 100);
assert_matches!(
tree.batch_insert(
Position::from(1),
@ -2915,11 +2913,12 @@ mod tests {
}
fn remove_mark(&mut self, position: Position) -> bool {
if let Ok(Some(c)) = self.store.max_checkpoint_id() {
ShardTree::remove_mark(self, position, &c).unwrap()
} else {
false
}
ShardTree::remove_mark(
self,
position,
self.store.max_checkpoint_id().unwrap().as_ref(),
)
.unwrap()
}
fn checkpoint(&mut self, checkpoint_id: C) -> bool {
@ -2934,60 +2933,35 @@ mod tests {
#[test]
fn append() {
check_append(|m| {
ShardTree::<MemoryShardStore<String, usize>, 4, 3>::empty(
MemoryShardStore::empty(),
m,
0,
)
.unwrap()
ShardTree::<MemoryShardStore<String, usize>, 4, 3>::empty(MemoryShardStore::empty(), m)
});
}
#[test]
fn root_hashes() {
check_root_hashes(|m| {
ShardTree::<MemoryShardStore<String, usize>, 4, 3>::empty(
MemoryShardStore::empty(),
m,
0,
)
.unwrap()
ShardTree::<MemoryShardStore<String, usize>, 4, 3>::empty(MemoryShardStore::empty(), m)
});
}
#[test]
fn witnesses() {
check_witnesses(|m| {
ShardTree::<MemoryShardStore<String, usize>, 4, 3>::empty(
MemoryShardStore::empty(),
m,
0,
)
.unwrap()
ShardTree::<MemoryShardStore<String, usize>, 4, 3>::empty(MemoryShardStore::empty(), m)
});
}
#[test]
fn checkpoint_rewind() {
check_checkpoint_rewind(|m| {
ShardTree::<MemoryShardStore<String, usize>, 4, 3>::empty(
MemoryShardStore::empty(),
m,
0,
)
.unwrap()
ShardTree::<MemoryShardStore<String, usize>, 4, 3>::empty(MemoryShardStore::empty(), m)
});
}
#[test]
fn rewind_remove_mark() {
check_rewind_remove_mark(|m| {
ShardTree::<MemoryShardStore<String, usize>, 4, 3>::empty(
MemoryShardStore::empty(),
m,
0,
)
.unwrap()
ShardTree::<MemoryShardStore<String, usize>, 4, 3>::empty(MemoryShardStore::empty(), m)
});
}
@ -3002,8 +2976,8 @@ mod tests {
ShardTree<MemoryShardStore<H, usize>, 4, 3>,
> {
CombinedTree::new(
CompleteTree::new(max_checkpoints, 0),
ShardTree::empty(MemoryShardStore::empty(), max_checkpoints, 0).unwrap(),
CompleteTree::new(max_checkpoints),
ShardTree::empty(MemoryShardStore::empty(), max_checkpoints),
)
}