From 0ae9b499cc6b9f54116dc1bc58bf4040a5cd2600 Mon Sep 17 00:00:00 2001 From: Kris Nuttycombe Date: Wed, 14 Dec 2022 15:24:59 -0700 Subject: [PATCH 01/16] Introduce the `shardtree` crate: a sparse Merkle tree type. --- Cargo.toml | 1 + shardtree/Cargo.toml | 12 +++ shardtree/LICENSE-APACHE | 202 +++++++++++++++++++++++++++++++++++++++ shardtree/LICENSE-MIT | 21 ++++ shardtree/src/lib.rs | 1 + 5 files changed, 237 insertions(+) create mode 100644 shardtree/Cargo.toml create mode 100644 shardtree/LICENSE-APACHE create mode 100644 shardtree/LICENSE-MIT create mode 100644 shardtree/src/lib.rs diff --git a/Cargo.toml b/Cargo.toml index fd76a54..98ebc88 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,4 +2,5 @@ members = [ "incrementalmerkletree", "bridgetree", + "shardtree", ] diff --git a/shardtree/Cargo.toml b/shardtree/Cargo.toml new file mode 100644 index 0000000..53e36a0 --- /dev/null +++ b/shardtree/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "shardtree" +version = "0.0.0" +authors = [ + "Kris Nuttycombe ", +] +edition = "2018" +license = "MIT OR Apache-2.0" +description = "A space-efficient Merkle tree with witnessing of marked leaves, checkpointing & state restoration." +homepage = "https://github.com/zcash/incrementalmerkletree" +repository = "https://github.com/zcash/incrementalmerkletree" +categories = ["algorithms", "data-structures"] diff --git a/shardtree/LICENSE-APACHE b/shardtree/LICENSE-APACHE new file mode 100644 index 0000000..1e5006d --- /dev/null +++ b/shardtree/LICENSE-APACHE @@ -0,0 +1,202 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + diff --git a/shardtree/LICENSE-MIT b/shardtree/LICENSE-MIT new file mode 100644 index 0000000..94ac1a7 --- /dev/null +++ b/shardtree/LICENSE-MIT @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2015-2021 The Electric Coin Company + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/shardtree/src/lib.rs b/shardtree/src/lib.rs new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/shardtree/src/lib.rs @@ -0,0 +1 @@ + From 8864a84d197eb051890827358842887dec218fc6 Mon Sep 17 00:00:00 2001 From: Kris Nuttycombe Date: Fri, 13 Jan 2023 08:40:57 -0700 Subject: [PATCH 02/16] Introduce a simple binary tree type. --- .github/workflows/ci.yml | 2 +- incrementalmerkletree/src/testing.rs | 24 +-- shardtree/Cargo.toml | 24 +++ shardtree/benches/shardtree.rs | 86 ++++++++++ shardtree/src/lib.rs | 241 +++++++++++++++++++++++++++ 5 files changed, 365 insertions(+), 12 deletions(-) create mode 100644 shardtree/benches/shardtree.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f7b4fa1..bf4f97e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -23,7 +23,7 @@ jobs: - uses: actions/checkout@v3 # Build benchmarks to prevent bitrot - name: Build benchmarks - run: cargo build --workspace --benches + run: cargo build --workspace --benches --all-features doc-links: name: Intra-doc links diff --git a/incrementalmerkletree/src/testing.rs b/incrementalmerkletree/src/testing.rs index f511f51..c015cf8 100644 --- a/incrementalmerkletree/src/testing.rs +++ b/incrementalmerkletree/src/testing.rs @@ -65,17 +65,16 @@ pub trait Tree { /// Creates a new checkpoint for the current tree state. /// - /// It is valid to have multiple checkpoints for the same tree state, and - /// each `rewind` call will remove a single checkpoint. Returns `false` - /// if the checkpoint identifier provided is less than or equal to the - /// maximum checkpoint identifier observed. + /// It is valid to have multiple checkpoints for the same tree state, and each `rewind` call + /// will remove a single checkpoint. Returns `false` if the checkpoint identifier provided is + /// less than or equal to the maximum checkpoint identifier observed. fn checkpoint(&mut self, id: C) -> bool; - /// Rewinds the tree state to the previous checkpoint, and then removes - /// that checkpoint record. If there are multiple checkpoints at a given - /// tree state, the tree state will not be altered until all checkpoints - /// at that tree state have been removed using `rewind`. This function - /// return false and leave the tree unmodified if no checkpoints exist. + /// Rewinds the tree state to the previous checkpoint, and then removes that checkpoint record. + /// + /// If there are multiple checkpoints at a given tree state, the tree state will not be altered + /// until all checkpoints at that tree state have been removed using `rewind`. This function + /// will return false and leave the tree unmodified if no checkpoints exist. fn rewind(&mut self) -> bool; } @@ -288,7 +287,10 @@ pub fn check_operations>( tree_checkpoints.push(tree_size); } } else { - prop_assert_eq!(tree_size, 1 << tree.depth()); + prop_assert_eq!( + tree_size, + tree.current_position().map_or(0, |p| usize::from(p) + 1) + ); } } CurrentPosition => { @@ -375,7 +377,7 @@ pub fn compute_root_from_witness(value: H, position: Position, path // Types and utilities for cross-verification property tests // -#[derive(Clone)] +#[derive(Clone, Debug)] pub struct CombinedTree, E: Tree> { inefficient: I, efficient: E, diff --git a/shardtree/Cargo.toml b/shardtree/Cargo.toml index 53e36a0..e3f1c63 100644 --- a/shardtree/Cargo.toml +++ b/shardtree/Cargo.toml @@ -10,3 +10,27 @@ description = "A space-efficient Merkle tree with witnessing of marked leaves, c homepage = "https://github.com/zcash/incrementalmerkletree" repository = "https://github.com/zcash/incrementalmerkletree" categories = ["algorithms", "data-structures"] + +[dependencies] +either = "1.8" +incrementalmerkletree = { version = "0.3", path = "../incrementalmerkletree" } +proptest = { version = "1.0.0", optional = true } + +[dev-dependencies] +assert_matches = "1.5" +criterion = "0.3" +incrementalmerkletree = { version = "0.3", path = "../incrementalmerkletree", features = ["test-dependencies"] } +proptest = "1.0.0" + +[features] +test-dependencies = ["proptest"] + +[target.'cfg(unix)'.dev-dependencies] +pprof = { version = "0.9", features = ["criterion", "flamegraph"] } # MSRV 1.56 +inferno = ">=0.11, <0.11.5" # MSRV 1.59 + +[[bench]] +name = "shardtree" +harness = false +required-features = ["test-dependencies"] + diff --git a/shardtree/benches/shardtree.rs b/shardtree/benches/shardtree.rs new file mode 100644 index 0000000..ffba295 --- /dev/null +++ b/shardtree/benches/shardtree.rs @@ -0,0 +1,86 @@ +use criterion::{criterion_group, criterion_main, Criterion}; +use proptest::prelude::*; +use proptest::strategy::ValueTree; +use proptest::test_runner::TestRunner; + +use incrementalmerkletree::Address; +use shardtree::{testing::arb_tree, Node}; + +#[cfg(unix)] +use pprof::criterion::{Output, PProfProfiler}; + +// An algebra for computing the incomplete roots of a tree (the addresses at which nodes are +// `Nil`). This is used for benchmarking to determine the viability of "attribute grammars" for +// when you want to use `reduce` to compute a value that requires information to be passed top-down +// through the tree. +type RootFn = Box Vec
>; +pub fn incomplete_roots(node: Node) -> RootFn { + Box::new(move |addr| match &node { + Node::Parent { left, right, .. } => { + let (left_addr, right_addr) = addr + .children() + .expect("A parent node cannot appear at level 0"); + let mut left_result = left(left_addr); + let mut right_result = right(right_addr); + left_result.append(&mut right_result); + left_result + } + Node::Leaf { .. } => vec![], + Node::Nil { .. } => vec![addr], + }) +} + +pub fn bench_shardtree(c: &mut Criterion) { + { + //let mut group = c.benchmark_group("shardtree-incomplete"); + + let mut runner = TestRunner::deterministic(); + let input = arb_tree(Just(()), any::(), 16, 4096) + .new_tree(&mut runner) + .unwrap() + .current(); + println!( + "Benchmarking with {} leaves.", + input.reduce( + &(|node| match node { + Node::Parent { left, right } => left + right, + Node::Leaf { .. } => 1, + Node::Nil => 0, + }) + ) + ); + + let input_root = Address::from_parts( + input + .reduce( + &(|node| match node { + Node::Parent { left, right } => std::cmp::max(left, right) + 1, + Node::Leaf { .. } => 0, + Node::Nil => 0, + }), + ) + .into(), + 0, + ); + + c.bench_function("direct_recursion", |b| { + b.iter(|| input.incomplete(input_root)) + }); + + c.bench_function("reduce", |b| { + b.iter(|| input.reduce(&incomplete_roots)(input_root)) + }); + } +} + +#[cfg(unix)] +criterion_group! { + name = benches; + config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None))); + targets = bench_shardtree +} + +#[cfg(not(unix))] +criterion_group!(benches, bench_shardtree); + +criterion_main!(benches); diff --git a/shardtree/src/lib.rs b/shardtree/src/lib.rs index 8b13789..4986033 100644 --- a/shardtree/src/lib.rs +++ b/shardtree/src/lib.rs @@ -1 +1,242 @@ +use core::fmt::Debug; +use core::ops::Deref; +use either::Either; +use std::rc::Rc; +use incrementalmerkletree::Address; + +/// A "pattern functor" for a single layer of a binary tree. +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum Node { + /// A parent node in the tree, annotated with a value of type `A` and with left and right + /// children of type `C`. + Parent { ann: A, left: C, right: C }, + /// A node of the tree that contains a value (usually a hash, sometimes with additional + /// metadata) and that has no children. + /// + /// Note that leaf nodes may appear at any position in the tree; i.e. they may contain computed + /// subtree root values and not just level-0 leaves. + Leaf { value: V }, + /// The empty tree; a subtree or leaf for which no information is available. + Nil, +} + +impl Node { + /// Returns whether or not this is the `Nil` tree. + /// + /// This is useful for cases where the compiler can automatically dereference an `Rc`, where + /// one would otherwise need additional ceremony to make an equality check. + pub fn is_nil(&self) -> bool { + matches!(self, Node::Nil) + } + + /// Returns the contained leaf value, if this is a leaf node. + pub fn leaf_value(&self) -> Option<&V> { + match self { + Node::Parent { .. } => None, + Node::Leaf { value } => Some(value), + Node::Nil { .. } => None, + } + } + + pub fn annotation(&self) -> Option<&A> { + match self { + Node::Parent { ann, .. } => Some(ann), + Node::Leaf { .. } => None, + Node::Nil => None, + } + } + + /// Replaces the annotation on this node, if it is a `Node::Parent`; otherwise + /// returns this node unaltered. + pub fn reannotate(self, ann: A) -> Self { + match self { + Node::Parent { left, right, .. } => Node::Parent { ann, left, right }, + other => other, + } + } +} + +/// An F-algebra for use with [`Tree::reduce`] for determining whether a tree has any `Nil` nodes. +/// +/// Returns `true` if no [`Node::Nil`] nodes are present in the tree. +pub fn is_complete(node: Node) -> bool { + match node { + Node::Parent { left, right, .. } => left && right, + Node::Leaf { .. } => true, + Node::Nil { .. } => false, + } +} + +/// An immutable binary tree with each of its nodes tagged with an annotation value. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Tree(Node>, A, V>); + +impl Deref for Tree { + type Target = Node>, A, V>; + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl Tree { + /// Replaces the annotation at the root of the tree, if the root is a `Node::Parent`; otherwise + /// returns this tree unaltered. + pub fn reannotate_root(self, ann: A) -> Tree { + Tree(self.0.reannotate(ann)) + } + + /// Returns a vector of the addresses of [`Node::Nil`] subtree roots within this tree. + /// + /// The given address must correspond to the root of this tree, or this method will + /// yield incorrect results or may panic. + pub fn incomplete(&self, root_addr: Address) -> Vec
{ + match &self.0 { + Node::Parent { left, right, .. } => { + // We should never construct parent nodes where both children are Nil. + // While we could handle that here, if we encountered that case it would + // be indicative of a programming error elsewhere and so we assert instead. + assert!(!(left.0.is_nil() && right.0.is_nil())); + let (left_root, right_root) = root_addr + .children() + .expect("A parent node cannot appear at level 0"); + + let mut left_incomplete = left.incomplete(left_root); + let mut right_incomplete = right.incomplete(right_root); + left_incomplete.append(&mut right_incomplete); + left_incomplete + } + Node::Leaf { .. } => vec![], + Node::Nil => vec![root_addr], + } + } +} + +impl Tree { + /// Folds over the tree from leaf to root with the given function. + /// + /// See [`is_complete`] for an example of a function that can be used with this method. + /// This operation will visit every node of the tree. See [`try_reduce`] for a variant + /// that can perform a depth-first, left-to-right traversal with the option to + /// short-circuit. + pub fn reduce) -> B>(&self, alg: &F) -> B { + match &self.0 { + Node::Parent { ann, left, right } => { + let left_result = left.reduce(alg); + let right_result = right.reduce(alg); + alg(Node::Parent { + ann: ann.clone(), + left: left_result, + right: right_result, + }) + } + Node::Leaf { value } => alg(Node::Leaf { + value: value.clone(), + }), + Node::Nil => alg(Node::Nil), + } + } + + /// Folds over the tree from leaf to root with the given function. + /// + /// This performs a left-to-right, depth-first traversal that halts on the first + /// [`Either::Left`] result, or builds an [`Either::Right`] from the results computed at every + /// node. + pub fn try_reduce) -> Either>(&self, alg: &F) -> Either { + match &self.0 { + Node::Parent { ann, left, right } => left.try_reduce(alg).right_and_then(|l_value| { + right.try_reduce(alg).right_and_then(move |r_value| { + alg(Node::Parent { + ann: ann.clone(), + left: l_value, + right: r_value, + }) + }) + }), + Node::Leaf { value } => alg(Node::Leaf { + value: value.clone(), + }), + Node::Nil => alg(Node::Nil), + } + } +} + +#[cfg(any(bench, test, feature = "test-dependencies"))] +pub mod testing { + use super::*; + use incrementalmerkletree::Hashable; + use proptest::prelude::*; + + pub fn arb_tree( + arb_annotation: A, + arb_leaf: V, + depth: u32, + size: u32, + ) -> impl Strategy> + where + A::Value: Clone + 'static, + V::Value: Hashable + Clone + 'static, + { + let leaf = prop_oneof![ + Just(Tree(Node::Nil)), + arb_leaf.prop_map(|value| Tree(Node::Leaf { value })) + ]; + + leaf.prop_recursive(depth, size, 2, move |inner| { + (arb_annotation.clone(), inner.clone(), inner).prop_map(|(ann, left, right)| { + Tree(if left.is_nil() && right.is_nil() { + Node::Nil + } else { + Node::Parent { + ann, + left: Rc::new(left), + right: Rc::new(right), + } + }) + }) + }) + } +} + +#[cfg(test)] +mod tests { + use crate::{Node, Tree}; + use incrementalmerkletree::{Address, Level}; + use std::rc::Rc; + + #[test] + fn tree_incomplete() { + let t = Tree(Node::Parent { + ann: (), + left: Rc::new(Tree(Node::Nil)), + right: Rc::new(Tree(Node::Leaf { value: "a" })), + }); + assert_eq!( + t.incomplete(Address::from_parts(Level::from(1), 0)), + vec![Address::from_parts(Level::from(0), 0)] + ); + + let t0 = Tree(Node::Parent { + ann: (), + left: Rc::new(Tree(Node::Leaf { value: "b" })), + right: Rc::new(t.clone()), + }); + assert_eq!( + t0.incomplete(Address::from_parts(Level::from(2), 1)), + vec![Address::from_parts(Level::from(0), 6)] + ); + + let t1 = Tree(Node::Parent { + ann: (), + left: Rc::new(Tree(Node::Nil)), + right: Rc::new(t), + }); + assert_eq!( + t1.incomplete(Address::from_parts(Level::from(2), 1)), + vec![ + Address::from_parts(Level::from(1), 2), + Address::from_parts(Level::from(0), 6) + ] + ); + } +} From 8644372c4e6605c68ee180587637190edd97bdc9 Mon Sep 17 00:00:00 2001 From: Kris Nuttycombe Date: Fri, 13 Jan 2023 08:40:57 -0700 Subject: [PATCH 03/16] Add types and methods to support tree pruning. Each leaf of the tree is annotated with retention metadata, and ephemeral leaves can be aggressively pruned when performing insertions into the tree. --- shardtree/src/lib.rs | 551 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 547 insertions(+), 4 deletions(-) diff --git a/shardtree/src/lib.rs b/shardtree/src/lib.rs index 4986033..7b898b6 100644 --- a/shardtree/src/lib.rs +++ b/shardtree/src/lib.rs @@ -1,9 +1,94 @@ use core::fmt::Debug; -use core::ops::Deref; +use core::ops::{BitAnd, BitOr, Deref, Not}; use either::Either; +use std::collections::BTreeSet; use std::rc::Rc; -use incrementalmerkletree::Address; +use incrementalmerkletree::{Address, Hashable, Level, Position, Retention}; + +/// A type for flags that determine when and how leaves can be pruned from a tree. +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct RetentionFlags(u8); + +impl BitOr for RetentionFlags { + type Output = Self; + + fn bitor(self, rhs: Self) -> Self { + RetentionFlags(self.0 | rhs.0) + } +} + +impl BitAnd for RetentionFlags { + type Output = Self; + + fn bitand(self, rhs: Self) -> Self { + RetentionFlags(self.0 & rhs.0) + } +} + +/// An leaf with `EPHEMERAL` retention can be pruned as soon as we are certain that it is not part +/// of the witness for a leaf with `CHECKPOINT` or `MARKED` retention. +pub static EPHEMERAL: RetentionFlags = RetentionFlags(0b00000000); + +/// A leaf with `CHECKPOINT` retention can be pruned when there are more than `max_checkpoints` +/// additional checkpoint leaves, if it is not also a marked leaf. +pub static CHECKPOINT: RetentionFlags = RetentionFlags(0b00000001); + +/// A leaf with `MARKED` retention can be pruned only as a consequence of an explicit deletion +/// action. +pub static MARKED: RetentionFlags = RetentionFlags(0b00000010); + +impl RetentionFlags { + pub fn is_checkpoint(&self) -> bool { + (*self & CHECKPOINT) == CHECKPOINT + } + + pub fn is_marked(&self) -> bool { + (*self & MARKED) == MARKED + } +} + +impl<'a, C> From<&'a Retention> for RetentionFlags { + fn from(retention: &'a Retention) -> Self { + match retention { + Retention::Ephemeral => EPHEMERAL, + Retention::Checkpoint { is_marked, .. } => { + if *is_marked { + CHECKPOINT | MARKED + } else { + CHECKPOINT + } + } + Retention::Marked => MARKED, + } + } +} + +impl From> for RetentionFlags { + fn from(retention: Retention) -> Self { + RetentionFlags::from(&retention) + } +} + +/// A mask that may be used to unset one or more retention flags. +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub struct RetentionMask(u8); + +impl Not for RetentionFlags { + type Output = RetentionMask; + + fn not(self) -> Self::Output { + RetentionMask(!self.0) + } +} + +impl BitAnd for RetentionFlags { + type Output = Self; + + fn bitand(self, rhs: RetentionMask) -> Self { + RetentionFlags(self.0 & rhs.0) + } +} /// A "pattern functor" for a single layer of a binary tree. #[derive(Clone, Debug, PartialEq, Eq)] @@ -68,6 +153,25 @@ pub fn is_complete(node: Node) -> bool { } } +/// An F-algebra for use with [`Tree::try_reduce`] for determining whether a tree has any `MARKED` nodes. +/// +/// `Tree::try_reduce` is preferred for this operation because it allows us to short-circuit as +/// soon as we find a marked node. Returns [`Either::Left(())`] if a marked node exists, +/// [`Either::Right(())`] otherwise. +pub fn contains_marked(node: Node<(), A, (V, RetentionFlags)>) -> Either<(), ()> { + match node { + Node::Parent { .. } => Either::Right(()), + Node::Leaf { value: (_, r) } => { + if r.is_marked() { + Either::Left(()) + } else { + Either::Right(()) + } + } + Node::Nil { .. } => Either::Right(()), + } +} + /// An immutable binary tree with each of its nodes tagged with an annotation value. #[derive(Clone, Debug, PartialEq, Eq)] pub struct Tree(Node>, A, V>); @@ -161,11 +265,282 @@ impl Tree { } } +type PrunableTree = Tree>, (H, RetentionFlags)>; + +impl PrunableTree { + /// Returns the the value if this is a leaf. + pub fn leaf_value(&self) -> Option<&H> { + self.0.leaf_value().map(|(h, _)| h) + } + + /// Returns the cached root value with which the tree has been annotated for this node if it is + /// available, otherwise return the value if this is a leaf. + pub fn node_value(&self) -> Option<&H> { + self.0.annotation().map_or_else( + || self.leaf_value(), + |rc_opt| rc_opt.as_ref().map(|rc| rc.as_ref()), + ) + } + + /// Returns whether or not this tree is a leaf with `Marked` retention. + pub fn is_marked_leaf(&self) -> bool { + self.0 + .leaf_value() + .map_or(false, |(_, retention)| retention.is_marked()) + } + + /// Returns the Merkle root of this tree, given the address of the root node, or + /// a vector of the addresses of `Nil` nodes that inhibited the computation of + /// such a root. + /// + /// ### Parameters: + /// * `truncate_at` An inclusive lower bound on positions in the tree beyond which all leaf + /// values will be treated as `Nil`. + pub fn root_hash(&self, root_addr: Address, truncate_at: Position) -> Result> { + if truncate_at <= root_addr.position_range_start() { + // we are in the part of the tree where we're generating empty roots, + // so no need to inspect the tree + Ok(H::empty_root(root_addr.level())) + } else { + match self { + Tree(Node::Parent { ann, left, right }) => ann + .as_ref() + .filter(|_| truncate_at >= root_addr.position_range_end()) + .map_or_else( + || { + // Compute the roots of the left and right children and hash them + // together. + let (l_addr, r_addr) = root_addr.children().unwrap(); + accumulate_result_with( + left.root_hash(l_addr, truncate_at), + right.root_hash(r_addr, truncate_at), + |left_root, right_root| { + H::combine(l_addr.level(), &left_root, &right_root) + }, + ) + }, + |rc| { + // Since we have an annotation on the root, and we are not truncating + // within this subtree, we can just use the cached value. + Ok(rc.as_ref().clone()) + }, + ), + Tree(Node::Leaf { value }) => { + if truncate_at >= root_addr.position_range_end() { + // no truncation of this leaf is necessary, just use it + Ok(value.0.clone()) + } else { + // we have a leaf value that is a subtree root created by hashing together + // the roots of child subtrees, but truncation would require that that leaf + // value be "split" into its constituent parts, which we can't do so we + // return an error + Err(vec![root_addr]) + } + } + Tree(Node::Nil) => Err(vec![root_addr]), + } + } + } + + /// Returns a vector of the positions of [`Node::Leaf`] values in the tree having [`MARKED`] + /// retention. + /// + /// Computing the set of marked positions requires a full traversal of the tree, and so should + /// be considered to be a somewhat expensive operation. + pub fn marked_positions(&self, root_addr: Address) -> BTreeSet { + match &self.0 { + Node::Parent { left, right, .. } => { + // We should never construct parent nodes where both children are Nil. + // While we could handle that here, if we encountered that case it would + // be indicative of a programming error elsewhere and so we assert instead. + assert!(!(left.0.is_nil() && right.0.is_nil())); + let (left_root, right_root) = root_addr + .children() + .expect("A parent node cannot appear at level 0"); + + let mut left_incomplete = left.marked_positions(left_root); + let mut right_incomplete = right.marked_positions(right_root); + left_incomplete.append(&mut right_incomplete); + left_incomplete + } + Node::Leaf { + value: (_, retention), + } => { + let mut result = BTreeSet::new(); + if root_addr.level() == 0.into() && retention.is_marked() { + result.insert(Position::from(root_addr.index())); + } + result + } + Node::Nil => BTreeSet::new(), + } + } + + /// Prunes the tree by hashing together ephemeral sibling nodes. + /// + /// `level` must be the level of the root of the node being pruned. + pub fn prune(self, level: Level) -> Self { + match self { + Tree(Node::Parent { ann, left, right }) => Tree::unite( + level, + ann, + left.as_ref().clone().prune(level - 1), + right.as_ref().clone().prune(level - 1), + ), + other => other, + } + } + + /// Merge two subtrees having the same root address. + /// + /// The merge operation is checked to be strictly additive and returns an error if merging + /// would cause information loss or if a conflict between root hashes occurs at a node. The + /// returned error contains the address of the node where such a conflict occurred. + pub fn merge_checked(self, root_addr: Address, other: Self) -> Result { + #[allow(clippy::type_complexity)] + fn go( + addr: Address, + t0: PrunableTree, + t1: PrunableTree, + ) -> Result, Address> { + // Require that any roots the we compute will not be default-filled by picking + // a starting valid fill point that is outside the range of leaf positions. + let no_default_fill = addr.position_range_end(); + match (t0, t1) { + (Tree(Node::Nil), other) => Ok(other), + (other, Tree(Node::Nil)) => Ok(other), + (Tree(Node::Leaf { value: vl }), Tree(Node::Leaf { value: vr })) => { + if vl == vr { + Ok(Tree(Node::Leaf { value: vl })) + } else { + Err(addr) + } + } + (Tree(Node::Leaf { value }), parent) => { + // `parent` is statically known to be a `Node::Parent` + if parent + .root_hash(addr, no_default_fill) + .iter() + .all(|r| r == &value.0) + { + Ok(parent.reannotate_root(Some(Rc::new(value.0)))) + } else { + Err(addr) + } + } + (parent, Tree(Node::Leaf { value })) => { + // `parent` is statically known to be a `Node::Parent` + if parent + .root_hash(addr, no_default_fill) + .iter() + .all(|r| r == &value.0) + { + Ok(parent.reannotate_root(Some(Rc::new(value.0)))) + } else { + Err(addr) + } + } + (lparent, rparent) => { + let lroot = lparent.root_hash(addr, no_default_fill).ok(); + let rroot = rparent.root_hash(addr, no_default_fill).ok(); + // If both parents share the same root hash (or if one of them is absent), + // they can be merged + if lroot.zip(rroot).iter().all(|(l, r)| l == r) { + // using `if let` here to bind variables; we need to borrow the trees for + // root hash calculation but binding the children of the parent node + // interferes with binding a reference to the parent. + if let ( + Tree(Node::Parent { + ann: lann, + left: ll, + right: lr, + }), + Tree(Node::Parent { + ann: rann, + left: rl, + right: rr, + }), + ) = (lparent, rparent) + { + let (l_addr, r_addr) = addr.children().unwrap(); + Ok(Tree::unite( + addr.level() - 1, + lann.or(rann), + go(l_addr, ll.as_ref().clone(), rl.as_ref().clone())?, + go(r_addr, lr.as_ref().clone(), rr.as_ref().clone())?, + )) + } else { + unreachable!() + } + } else { + Err(addr) + } + } + } + } + + go(root_addr, self, other) + } + + /// Unite two nodes by either constructing a new parent node, or, if both nodes are ephemeral + /// leaves or Nil, constructing a replacement root by hashing leaf values together (or a + /// replacement `Nil` value). + /// + /// `level` must be the level of the two nodes that are being joined. + fn unite(level: Level, ann: Option>, left: Self, right: Self) -> Self { + match (left, right) { + (Tree(Node::Nil), Tree(Node::Nil)) => Tree(Node::Nil), + (Tree(Node::Leaf { value: lv }), Tree(Node::Leaf { value: rv })) + // we can prune right-hand leaves that are not marked; if a leaf + // is a checkpoint then that information will be propagated to + // the replacement leaf + if lv.1 == EPHEMERAL && (rv.1 & MARKED) == EPHEMERAL => + { + Tree( + Node::Leaf { + value: (H::combine(level, &lv.0, &rv.0), rv.1), + }, + ) + } + (left, right) => Tree( + Node::Parent { + ann, + left: Rc::new(left), + right: Rc::new(right), + }, + ), + } + } +} + +// We need an applicative functor for Result for this function so that we can correctly +// accumulate errors, but we don't have one so we just write a special- cased version here. +fn accumulate_result_with( + left: Result>, + right: Result>, + combine_success: impl FnOnce(A, B) -> C, +) -> Result> { + match (left, right) { + (Ok(a), Ok(b)) => Ok(combine_success(a, b)), + (Err(mut xs), Err(mut ys)) => { + xs.append(&mut ys); + Err(xs) + } + (Ok(_), Err(xs)) => Err(xs), + (Err(xs), Ok(_)) => Err(xs), + } +} + #[cfg(any(bench, test, feature = "test-dependencies"))] pub mod testing { use super::*; use incrementalmerkletree::Hashable; use proptest::prelude::*; + use proptest::sample::select; + + pub fn arb_retention_flags() -> impl Strategy { + select(vec![EPHEMERAL, CHECKPOINT, MARKED, MARKED | CHECKPOINT]) + } pub fn arb_tree( arb_annotation: A, @@ -200,8 +575,9 @@ pub mod testing { #[cfg(test)] mod tests { - use crate::{Node, Tree}; - use incrementalmerkletree::{Address, Level}; + use crate::{Node, PrunableTree, Tree, EPHEMERAL, MARKED}; + use incrementalmerkletree::{Address, Level, Position}; + use std::collections::BTreeSet; use std::rc::Rc; #[test] @@ -239,4 +615,171 @@ mod tests { ] ); } + + #[test] + fn tree_root() { + let t: PrunableTree = Tree(Node::Parent { + ann: None, + left: Rc::new(Tree(Node::Leaf { + value: ("a".to_string(), EPHEMERAL), + })), + right: Rc::new(Tree(Node::Leaf { + value: ("b".to_string(), EPHEMERAL), + })), + }); + assert_eq!( + t.root_hash(Address::from_parts(Level::from(1), 0), Position::from(2)), + Ok("ab".to_string()) + ); + + let t0: PrunableTree = Tree(Node::Parent { + ann: None, + left: Rc::new(Tree(Node::Nil)), + right: Rc::new(t.clone()), + }); + assert_eq!( + t0.root_hash(Address::from_parts(Level::from(2), 0), Position::from(4)), + Err(vec![Address::from_parts(Level::from(1), 0)]) + ); + + // Check root computation with truncation + let t1: PrunableTree = Tree(Node::Parent { + ann: None, + left: Rc::new(t), + right: Rc::new(Tree(Node::Nil)), + }); + assert_eq!( + t1.root_hash(Address::from_parts(Level::from(2), 0), Position::from(2)), + Ok("ab__".to_string()) + ); + assert_eq!( + t1.root_hash(Address::from_parts(Level::from(2), 0), Position::from(3)), + Err(vec![Address::from_parts(Level::from(1), 1)]) + ); + } + + #[test] + fn tree_marked_positions() { + let t: PrunableTree = Tree(Node::Parent { + ann: None, + left: Rc::new(Tree(Node::Leaf { + value: ("a".to_string(), EPHEMERAL), + })), + right: Rc::new(Tree(Node::Leaf { + value: ("b".to_string(), MARKED), + })), + }); + assert_eq!( + t.marked_positions(Address::from_parts(Level::from(1), 0)), + BTreeSet::from([Position::from(1)]) + ); + + let t0: PrunableTree = Tree(Node::Parent { + ann: None, + left: Rc::new(t.clone()), + right: Rc::new(t), + }); + assert_eq!( + t0.marked_positions(Address::from_parts(Level::from(2), 1)), + BTreeSet::from([Position::from(5), Position::from(7)]) + ); + } + + #[test] + fn tree_prune() { + let t: PrunableTree = Tree(Node::Parent { + ann: None, + left: Rc::new(Tree(Node::Leaf { + value: ("a".to_string(), EPHEMERAL), + })), + right: Rc::new(Tree(Node::Leaf { + value: ("b".to_string(), EPHEMERAL), + })), + }); + + assert_eq!( + t.clone().prune(Level::from(1)), + Tree(Node::Leaf { + value: ("ab".to_string(), EPHEMERAL) + }) + ); + + let t0: PrunableTree = Tree(Node::Parent { + ann: None, + left: Rc::new(Tree(Node::Leaf { + value: ("c".to_string(), MARKED), + })), + right: Rc::new(t), + }); + assert_eq!( + t0.prune(Level::from(2)), + Tree(Node::Parent { + ann: None, + left: Rc::new(Tree(Node::Leaf { + value: ("c".to_string(), MARKED), + },)), + right: Rc::new(Tree(Node::Leaf { + value: ("ab".to_string(), EPHEMERAL) + })) + },) + ); + } + + #[test] + fn tree_merge_checked() { + let t0: PrunableTree = Tree(Node::Parent { + ann: None, + left: Rc::new(Tree(Node::Leaf { + value: ("a".to_string(), EPHEMERAL), + })), + right: Rc::new(Tree(Node::Nil)), + }); + + let t1: PrunableTree = Tree(Node::Parent { + ann: None, + left: Rc::new(Tree(Node::Nil)), + right: Rc::new(Tree(Node::Leaf { + value: ("b".to_string(), EPHEMERAL), + })), + }); + + assert_eq!( + t0.clone() + .merge_checked(Address::from_parts(1.into(), 0), t1.clone()), + Ok(Tree(Node::Leaf { + value: ("ab".to_string(), EPHEMERAL) + })) + ); + + let t2: PrunableTree = Tree(Node::Parent { + ann: None, + left: Rc::new(Tree(Node::Leaf { + value: ("c".to_string(), EPHEMERAL), + })), + right: Rc::new(Tree(Node::Nil)), + }); + assert_eq!( + t0.clone() + .merge_checked(Address::from_parts(1.into(), 0), t2.clone()), + Err(Address::from_parts(0.into(), 0)) + ); + + let t3: PrunableTree = Tree(Node::Parent { + ann: None, + left: Rc::new(t0), + right: Rc::new(t2), + }); + let t4: PrunableTree = Tree(Node::Parent { + ann: None, + left: Rc::new(t1.clone()), + right: Rc::new(t1), + }); + + assert_eq!( + t3.merge_checked(Address::from_parts(2.into(), 0), t4), + Ok(Tree(Node::Leaf { + value: ("abcb".to_string(), EPHEMERAL) + })) + ); + } } From 34f6bd7ce58dc60346e5736cca33ffafe4c6cb82 Mon Sep 17 00:00:00 2001 From: Kris Nuttycombe Date: Fri, 13 Jan 2023 08:40:57 -0700 Subject: [PATCH 04/16] Add a `LocatedTree` type that pairs tree roots with address information. --- shardtree/src/lib.rs | 384 ++++++++++++++++++++++++++++++------------- 1 file changed, 270 insertions(+), 114 deletions(-) diff --git a/shardtree/src/lib.rs b/shardtree/src/lib.rs index 7b898b6..df5222d 100644 --- a/shardtree/src/lib.rs +++ b/shardtree/src/lib.rs @@ -513,6 +513,181 @@ impl PrunableTree { } } +/// A binary Merkle tree with its root at the given address. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct LocatedTree { + root_addr: Address, + root: Tree, +} + +impl LocatedTree { + /// Returns the root address of this tree. + pub fn root_addr(&self) -> Address { + self.root_addr + } + + /// Returns a reference to the root of the tree. + pub fn root(&self) -> &Tree { + &self.root + } + + /// Returns a new [`LocatedTree`] with the provided value replacing the annotation of its root + /// node, if that root node is a `Node::Parent`. Otherwise . + pub fn reannotate_root(self, value: A) -> Self { + LocatedTree { + root_addr: self.root_addr, + root: self.root.reannotate_root(value), + } + } + + /// Returns the set of incomplete subtree roots contained within this tree, ordered by + /// increasing position. + pub fn incomplete(&self) -> Vec
{ + self.root.incomplete(self.root_addr) + } + + /// Returns the maximum position at which a non-Nil leaf has been observed in the tree. + /// + /// Note that no actual leaf value may exist at this position, as it may have previously been + /// pruned. + pub fn max_position(&self) -> Option { + fn go(addr: Address, root: &Tree) -> Option { + match &root.0 { + Node::Nil => None, + Node::Leaf { .. } => Some(addr.position_range_end() - 1), + Node::Parent { left, right, .. } => { + let (l_addr, r_addr) = addr.children().unwrap(); + go(r_addr, right.as_ref()).or_else(|| go(l_addr, left.as_ref())) + } + } + } + + go(self.root_addr, &self.root) + } + + /// Returns the value at the specified position, if any. + pub fn value_at_position(&self, position: Position) -> Option<&V> { + fn go(pos: Position, addr: Address, root: &Tree) -> Option<&V> { + match &root.0 { + Node::Parent { left, right, .. } => { + let (l_addr, r_addr) = addr.children().unwrap(); + if l_addr.position_range().contains(&pos) { + go(pos, l_addr, left) + } else { + go(pos, r_addr, right) + } + } + Node::Leaf { value } if addr.level() == Level::from(0) => Some(value), + _ => None, + } + } + + if self.root_addr.position_range().contains(&position) { + go(position, self.root_addr, &self.root) + } else { + None + } + } +} + +impl LocatedTree { + /// Constructs a new empty tree with its root at the provided address. + pub fn empty(root_addr: Address) -> Self { + Self { + root_addr, + root: Tree(Node::Nil), + } + } + + /// Constructs a new tree consisting of a single leaf with the provided value, and the + /// specified root address. + pub fn with_root_value(root_addr: Address, value: V) -> Self { + Self { + root_addr, + root: Tree(Node::Leaf { value }), + } + } + + /// Traverses this tree to find the child node at the specified address and returns it. + /// + /// Returns `None` if the specified address is not a descendant of this tree's root address, or + /// if the tree is terminated by a [`Node::Nil`] or leaf node before the specified address can + /// be reached. + pub fn subtree(&self, addr: Address) -> Option { + fn go( + root_addr: Address, + root: &Tree, + addr: Address, + ) -> Option> { + if root_addr == addr { + Some(LocatedTree { + root_addr, + root: root.clone(), + }) + } else { + match &root.0 { + Node::Parent { left, right, .. } => { + let (l_addr, r_addr) = root_addr.children().unwrap(); + if l_addr.contains(&addr) { + go(l_addr, left.as_ref(), addr) + } else { + go(r_addr, right.as_ref(), addr) + } + } + _ => None, + } + } + } + + if self.root_addr.contains(&addr) { + go(self.root_addr, &self.root, addr) + } else { + None + } + } + + /// Decomposes this tree into the vector of its subtrees having height `level + 1`. + /// + /// If this root address of this tree is lower down in the tree than the level specified, + /// the entire tree is returned as the sole element of the result vector. + pub fn decompose_to_level(self, level: Level) -> Vec { + fn go( + level: Level, + root_addr: Address, + root: Tree, + ) -> Vec> { + if root_addr.level() == level { + vec![LocatedTree { root_addr, root }] + } else { + match root.0 { + Node::Parent { left, right, .. } => { + let (l_addr, r_addr) = root_addr.children().unwrap(); + let mut l_decomposed = go( + level, + l_addr, + Rc::try_unwrap(left).unwrap_or_else(|rc| (*rc).clone()), + ); + let mut r_decomposed = go( + level, + r_addr, + Rc::try_unwrap(right).unwrap_or_else(|rc| (*rc).clone()), + ); + l_decomposed.append(&mut r_decomposed); + l_decomposed + } + _ => vec![], + } + } + } + + if level >= self.root_addr.level() { + vec![self] + } else { + go(level, self.root_addr, self.root) + } + } +} + // We need an applicative functor for Result for this function so that we can correctly // accumulate errors, but we don't have one so we just write a special- cased version here. fn accumulate_result_with( @@ -575,38 +750,48 @@ pub mod testing { #[cfg(test)] mod tests { - use crate::{Node, PrunableTree, Tree, EPHEMERAL, MARKED}; + use crate::{LocatedTree, Node, PrunableTree, Tree, EPHEMERAL, MARKED}; use incrementalmerkletree::{Address, Level, Position}; use std::collections::BTreeSet; use std::rc::Rc; + fn nil() -> Tree { + Tree(Node::Nil) + } + + fn str_leaf(c: &str) -> Tree { + Tree(Node::Leaf { + value: c.to_string(), + }) + } + + fn leaf(value: B) -> Tree { + Tree(Node::Leaf { value }) + } + + fn parent(left: Tree, right: Tree) -> Tree { + Tree(Node::Parent { + ann: A::default(), + left: Rc::new(left), + right: Rc::new(right), + }) + } + #[test] fn tree_incomplete() { - let t = Tree(Node::Parent { - ann: (), - left: Rc::new(Tree(Node::Nil)), - right: Rc::new(Tree(Node::Leaf { value: "a" })), - }); + let t: Tree<(), String> = parent(nil(), str_leaf("a")); assert_eq!( t.incomplete(Address::from_parts(Level::from(1), 0)), vec![Address::from_parts(Level::from(0), 0)] ); - let t0 = Tree(Node::Parent { - ann: (), - left: Rc::new(Tree(Node::Leaf { value: "b" })), - right: Rc::new(t.clone()), - }); + let t0 = parent(str_leaf("b"), t.clone()); assert_eq!( t0.incomplete(Address::from_parts(Level::from(2), 1)), vec![Address::from_parts(Level::from(0), 6)] ); - let t1 = Tree(Node::Parent { - ann: (), - left: Rc::new(Tree(Node::Nil)), - right: Rc::new(t), - }); + let t1 = parent(nil(), t); assert_eq!( t1.incomplete(Address::from_parts(Level::from(2), 1)), vec![ @@ -618,36 +803,24 @@ mod tests { #[test] fn tree_root() { - let t: PrunableTree = Tree(Node::Parent { - ann: None, - left: Rc::new(Tree(Node::Leaf { - value: ("a".to_string(), EPHEMERAL), - })), - right: Rc::new(Tree(Node::Leaf { - value: ("b".to_string(), EPHEMERAL), - })), - }); + let t: PrunableTree = parent( + leaf(("a".to_string(), EPHEMERAL)), + leaf(("b".to_string(), EPHEMERAL)), + ); + assert_eq!( t.root_hash(Address::from_parts(Level::from(1), 0), Position::from(2)), Ok("ab".to_string()) ); - let t0: PrunableTree = Tree(Node::Parent { - ann: None, - left: Rc::new(Tree(Node::Nil)), - right: Rc::new(t.clone()), - }); + let t0 = parent(nil(), t.clone()); assert_eq!( t0.root_hash(Address::from_parts(Level::from(2), 0), Position::from(4)), Err(vec![Address::from_parts(Level::from(1), 0)]) ); // Check root computation with truncation - let t1: PrunableTree = Tree(Node::Parent { - ann: None, - left: Rc::new(t), - right: Rc::new(Tree(Node::Nil)), - }); + let t1 = parent(t, nil()); assert_eq!( t1.root_hash(Address::from_parts(Level::from(2), 0), Position::from(2)), Ok("ab__".to_string()) @@ -660,25 +833,16 @@ mod tests { #[test] fn tree_marked_positions() { - let t: PrunableTree = Tree(Node::Parent { - ann: None, - left: Rc::new(Tree(Node::Leaf { - value: ("a".to_string(), EPHEMERAL), - })), - right: Rc::new(Tree(Node::Leaf { - value: ("b".to_string(), MARKED), - })), - }); + let t: PrunableTree = parent( + leaf(("a".to_string(), EPHEMERAL)), + leaf(("b".to_string(), MARKED)), + ); assert_eq!( t.marked_positions(Address::from_parts(Level::from(1), 0)), BTreeSet::from([Position::from(1)]) ); - let t0: PrunableTree = Tree(Node::Parent { - ann: None, - left: Rc::new(t.clone()), - right: Rc::new(t), - }); + let t0 = parent(t.clone(), t); assert_eq!( t0.marked_positions(Address::from_parts(Level::from(2), 1)), BTreeSet::from([Position::from(5), Position::from(7)]) @@ -687,99 +851,91 @@ mod tests { #[test] fn tree_prune() { - let t: PrunableTree = Tree(Node::Parent { - ann: None, - left: Rc::new(Tree(Node::Leaf { - value: ("a".to_string(), EPHEMERAL), - })), - right: Rc::new(Tree(Node::Leaf { - value: ("b".to_string(), EPHEMERAL), - })), - }); + let t: PrunableTree = parent( + leaf(("a".to_string(), EPHEMERAL)), + leaf(("b".to_string(), EPHEMERAL)), + ); assert_eq!( t.clone().prune(Level::from(1)), - Tree(Node::Leaf { - value: ("ab".to_string(), EPHEMERAL) - }) + leaf(("ab".to_string(), EPHEMERAL)) ); - let t0: PrunableTree = Tree(Node::Parent { - ann: None, - left: Rc::new(Tree(Node::Leaf { - value: ("c".to_string(), MARKED), - })), - right: Rc::new(t), - }); + let t0 = parent(leaf(("c".to_string(), MARKED)), t); assert_eq!( t0.prune(Level::from(2)), - Tree(Node::Parent { - ann: None, - left: Rc::new(Tree(Node::Leaf { - value: ("c".to_string(), MARKED), - },)), - right: Rc::new(Tree(Node::Leaf { - value: ("ab".to_string(), EPHEMERAL) - })) - },) + parent( + leaf(("c".to_string(), MARKED)), + leaf(("ab".to_string(), EPHEMERAL)) + ) ); } #[test] fn tree_merge_checked() { - let t0: PrunableTree = Tree(Node::Parent { - ann: None, - left: Rc::new(Tree(Node::Leaf { - value: ("a".to_string(), EPHEMERAL), - })), - right: Rc::new(Tree(Node::Nil)), - }); + let t0: PrunableTree = parent(leaf(("a".to_string(), EPHEMERAL)), nil()); - let t1: PrunableTree = Tree(Node::Parent { - ann: None, - left: Rc::new(Tree(Node::Nil)), - right: Rc::new(Tree(Node::Leaf { - value: ("b".to_string(), EPHEMERAL), - })), - }); + let t1: PrunableTree = parent(nil(), leaf(("b".to_string(), EPHEMERAL))); assert_eq!( t0.clone() .merge_checked(Address::from_parts(1.into(), 0), t1.clone()), - Ok(Tree(Node::Leaf { - value: ("ab".to_string(), EPHEMERAL) - })) + Ok(leaf(("ab".to_string(), EPHEMERAL))) ); - let t2: PrunableTree = Tree(Node::Parent { - ann: None, - left: Rc::new(Tree(Node::Leaf { - value: ("c".to_string(), EPHEMERAL), - })), - right: Rc::new(Tree(Node::Nil)), - }); + let t2: PrunableTree = parent(leaf(("c".to_string(), EPHEMERAL)), nil()); assert_eq!( t0.clone() .merge_checked(Address::from_parts(1.into(), 0), t2.clone()), Err(Address::from_parts(0.into(), 0)) ); - let t3: PrunableTree = Tree(Node::Parent { - ann: None, - left: Rc::new(t0), - right: Rc::new(t2), - }); - let t4: PrunableTree = Tree(Node::Parent { - ann: None, - left: Rc::new(t1.clone()), - right: Rc::new(t1), - }); + let t3: PrunableTree = parent(t0, t2); + let t4: PrunableTree = parent(t1.clone(), t1); assert_eq!( t3.merge_checked(Address::from_parts(2.into(), 0), t4), - Ok(Tree(Node::Leaf { - value: ("abcb".to_string(), EPHEMERAL) - })) + Ok(leaf(("abcb".to_string(), EPHEMERAL))) + ); + } + + #[test] + fn located_tree() { + let l = parent(str_leaf("a"), str_leaf("b")); + let r = parent(str_leaf("c"), str_leaf("d")); + + let t: LocatedTree<(), String> = LocatedTree { + root_addr: Address::from_parts(2.into(), 1), + root: parent(l.clone(), r.clone()), + }; + + assert_eq!(t.max_position(), Some(7.into())); + assert_eq!(t.value_at_position(5.into()), Some(&"b".to_string())); + assert_eq!(t.value_at_position(8.into()), None); + assert_eq!(t.subtree(Address::from_parts(0.into(), 1)), None); + assert_eq!(t.subtree(Address::from_parts(3.into(), 0)), None); + + let subtree_addr = Address::from_parts(1.into(), 3); + assert_eq!( + t.subtree(subtree_addr), + Some(LocatedTree { + root_addr: subtree_addr, + root: r.clone() + }) + ); + + assert_eq!( + t.decompose_to_level(1.into()), + vec![ + LocatedTree { + root_addr: Address::from_parts(1.into(), 2), + root: l, + }, + LocatedTree { + root_addr: Address::from_parts(1.into(), 3), + root: r, + } + ] ); } } From dc5a3ed0e7775e36f7a46a4dadaf19e10b322376 Mon Sep 17 00:00:00 2001 From: Kris Nuttycombe Date: Fri, 13 Jan 2023 08:40:57 -0700 Subject: [PATCH 05/16] Add types & operations for individual shards. This adds the `LocatedPrunableTree` type, which provides the complete set of operations for individual shards within a larger tree. --- shardtree/src/lib.rs | 881 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 877 insertions(+), 4 deletions(-) diff --git a/shardtree/src/lib.rs b/shardtree/src/lib.rs index df5222d..c83eb34 100644 --- a/shardtree/src/lib.rs +++ b/shardtree/src/lib.rs @@ -1,7 +1,7 @@ use core::fmt::Debug; -use core::ops::{BitAnd, BitOr, Deref, Not}; +use core::ops::{BitAnd, BitOr, Deref, Not, Range}; use either::Either; -use std::collections::BTreeSet; +use std::collections::{BTreeMap, BTreeSet}; use std::rc::Rc; use incrementalmerkletree::{Address, Hashable, Level, Position, Retention}; @@ -688,6 +688,754 @@ impl LocatedTree { } } +type LocatedPrunableTree = LocatedTree>, (H, RetentionFlags)>; + +/// A data structure describing the nature of a [`Node::Nil`] node in the tree that was introduced +/// as the consequence of an insertion. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct IncompleteAt { + /// The address of the empty node. + pub address: Address, + /// A flag identifying whether or not the missing node is required in order to construct a + /// witness for a node with [`MARKED`] retention. + pub required_for_witness: bool, +} + +/// A type for the result of a batch insertion operation. +/// +/// This result type contains the newly constructed tree, the addresses any new incomplete internal +/// nodes within that tree that were introduced as a consequence of that insertion, and the +/// remainder of the iterator that provided the inserted values. +#[derive(Debug)] +pub struct BatchInsertionResult)>> { + /// The updated tree after all insertions have been performed. + pub subtree: LocatedPrunableTree, + /// A flag identifying whether the constructed subtree contains a marked node. + pub contains_marked: bool, + /// The vector of addresses of [`Node::Nil`] nodes that were inserted into the tree as part of + /// the insertion operation, for nodes that are required in order to construct a witness for + /// each inserted leaf with [`MARKED`] retention. + pub incomplete: Vec, + /// The maximum position at which a leaf was inserted. + pub max_insert_position: Option, + /// The positions of all leaves with [`CHECKPOINT`] retention that were inserted. + pub checkpoints: BTreeMap, + /// The unconsumed remainder of the iterator from which leaves were inserted, if the tree + /// was completely filled before the iterator was fully consumed. + pub remainder: I, +} + +/// An error prevented the insertion of values into the subtree. +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum InsertionError { + /// The caller attempted to insert a subtree into a tree that does not contain + /// the subtree's root address. + NotContained, + /// The start of the range of positions provided for insertion is not included + /// in the range of positions within this subtree. + OutOfRange(Range), + /// An existing root hash conflicts with the root hash of a node being inserted. + Conflict(Address), + /// An out-of-order checkpoint was detected + /// + /// Checkpoint identifiers must be in nondecreasing order relative to tree positions. + CheckpointOutOfOrder, + /// An append operation has exceeded the capacity of the tree. + TreeFull, + /// An error was produced by the underlying [`ShardStore`] + Storage(S), +} + +/// Errors that may be returned in the process of querying a [`ShardTree`] +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum QueryError { + /// The caller attempted to query the value at an address within a tree that does not contain + /// that address. + NotContained(Address), + /// A leaf required by a given checkpoint has been pruned, or is otherwise not accessible in + /// the tree. + CheckpointPruned, + /// It is not possible to compute a root for one or more subtrees because they contain + /// [`Node::Nil`] values at positions that cannot be replaced with default hashes. + TreeIncomplete(Vec
), +} + +/// Operations on [`LocatedTree`]s that are annotated with Merkle hashes. +impl LocatedPrunableTree { + /// Computes the root hash of this tree, truncated to the given position. + /// + /// If the tree contains any [`Node::Nil`] nodes corresponding to positions less than + /// `truncate_at`, this will return an error containing the addresses of those nodes within the + /// tree. + pub fn root_hash(&self, truncate_at: Position) -> Result> { + self.root.root_hash(self.root_addr, truncate_at) + } + + /// Compute the root hash of this subtree, filling empty nodes along the rightmost path of the + /// subtree with the empty root value for the given level. + /// + /// This should only be used for computing roots when it is known that no successor trees + /// exist. + /// + /// If the tree contains any [`Node::Nil`] nodes that are to the left of filled nodes in the + /// tree, this will return an error containing the addresses of those nodes. + pub fn right_filled_root(&self) -> Result> { + self.root_hash( + self.max_position() + .map_or_else(|| self.root_addr.position_range_start(), |pos| pos + 1), + ) + } + + /// Returns the positions of marked leaves in the tree. + pub fn marked_positions(&self) -> BTreeSet { + fn go( + root_addr: Address, + root: &PrunableTree, + acc: &mut BTreeSet, + ) { + match &root.0 { + Node::Parent { left, right, .. } => { + let (l_addr, r_addr) = root_addr.children().unwrap(); + go(l_addr, left.as_ref(), acc); + go(r_addr, right.as_ref(), acc); + } + Node::Leaf { value } => { + if value.1.is_marked() && root_addr.level() == 0.into() { + acc.insert(Position::from(root_addr.index())); + } + } + _ => {} + } + } + + let mut result = BTreeSet::new(); + go(self.root_addr, &self.root, &mut result); + result + } + + /// Compute the witness for the leaf at the specified position. + /// + /// This tree will be truncated to the `truncate_at` position, and then empty + /// empty roots corresponding to later positions will be filled by [`H::empty_root`]. + /// + /// Returns either the witness for the leaf at the specified position, or an error that + /// describes the causes of failure. + pub fn witness(&self, position: Position, truncate_at: Position) -> Result, QueryError> { + // traverse down to the desired leaf position, and then construct + // the authentication path on the way back up. + fn go( + root: &PrunableTree, + root_addr: Address, + position: Position, + truncate_at: Position, + ) -> Result, Vec
> { + match &root.0 { + Node::Parent { left, right, .. } => { + let (l_addr, r_addr) = root_addr.children().unwrap(); + if root_addr.level() > 1.into() { + let r_start = r_addr.position_range_start(); + if position < r_start { + accumulate_result_with( + go(left.as_ref(), l_addr, position, truncate_at), + right.as_ref().root_hash(r_addr, truncate_at), + |mut witness, sibling_root| { + witness.push(sibling_root); + witness + }, + ) + } else { + // if the position we're witnessing is down the right-hand branch then + // we always set the truncation bound outside the range of leaves on the + // left, because we don't allow any empty nodes to the left + accumulate_result_with( + left.as_ref().root_hash(l_addr, r_start), + go(right.as_ref(), r_addr, position, truncate_at), + |sibling_root, mut witness| { + witness.push(sibling_root); + witness + }, + ) + } + } else { + // we handle the level 0 leaves here by adding the sibling of our desired + // leaf to the witness + if position.is_odd() { + if right.is_marked_leaf() { + left.leaf_value() + .map(|v| vec![v.clone()]) + .ok_or_else(|| vec![l_addr]) + } else { + Err(vec![l_addr]) + } + } else if left.is_marked_leaf() { + // If we have the left-hand leaf and the right-hand leaf is empty, we + // can fill it with the empty leaf, but only if `fill_start` is None or + // it is located at `position + 1`. + if truncate_at <= position + 1 { + Ok(vec![H::empty_leaf()]) + } else { + right + .leaf_value() + .map_or_else(|| Err(vec![r_addr]), |v| Ok(vec![v.clone()])) + } + } else { + Err(vec![r_addr]) + } + } + } + _ => { + // if we encounter a nil or leaf node, we were unable to descend + // to the leaf at the desired position. + Err(vec![root_addr]) + } + } + } + + if self.root_addr.position_range().contains(&position) { + go(&self.root, self.root_addr, position, truncate_at) + .map_err(QueryError::TreeIncomplete) + } else { + Err(QueryError::NotContained(self.root_addr)) + } + } + + /// Prunes this tree by replacing all nodes that are right-hand children along the path + /// to the specified position with [`Node::Nil`]. + /// + /// The leaf at the specified position is retained. + pub fn truncate_to_position(&self, position: Position) -> Option { + fn go( + position: Position, + root_addr: Address, + root: &PrunableTree, + ) -> Option> { + match &root.0 { + Node::Parent { ann, left, right } => { + let (l_child, r_child) = root_addr.children().unwrap(); + if position < r_child.position_range_start() { + // we are truncating within the range of the left node, so recurse + // to the left to truncate the left child and then reconstruct the + // node with `Nil` as the right sibling + go(position, l_child, left.as_ref()).map(|left| { + Tree::unite(l_child.level(), ann.clone(), left, Tree(Node::Nil)) + }) + } else { + // we are truncating within the range of the right node, so recurse + // to the right to truncate the right child and then reconstruct the + // node with the left sibling unchanged + go(position, r_child, right.as_ref()).map(|right| { + Tree::unite(r_child.level(), ann.clone(), left.as_ref().clone(), right) + }) + } + } + Node::Leaf { .. } => { + if root_addr.max_position() <= position { + Some(root.clone()) + } else { + None + } + } + Node::Nil => None, + } + } + + if self.root_addr.position_range().contains(&position) { + go(position, self.root_addr, &self.root).map(|root| LocatedTree { + root_addr: self.root_addr, + root, + }) + } else { + None + } + } + + /// Inserts a descendant subtree into this subtree, creating empty sibling nodes as necessary + /// to fill out the tree. + /// + /// In the case that a leaf node would be replaced by an incomplete subtree, the resulting + /// parent node will be annotated with the existing leaf value. + /// + /// Returns the updated tree, along with the addresses of any [`Node::Nil`] nodes that were + /// inserted in the process of creating the parent nodes down to the insertion point, or an + /// error if the specified subtree's root address is not in the range of valid descendants of + /// the root node of this tree or if the insertion would result in a conflict between computed + /// root hashes of complete subtrees. + pub fn insert_subtree( + &self, + subtree: Self, + contains_marked: bool, + ) -> Result<(Self, Vec), InsertionError> { + // A function to recursively dig into the tree, creating a path downward and introducing + // empty nodes as necessary until we can insert the provided subtree. + #[allow(clippy::type_complexity)] + fn go( + root_addr: Address, + into: &PrunableTree, + subtree: LocatedPrunableTree, + is_complete: bool, + contains_marked: bool, + ) -> Result<(PrunableTree, Vec), InsertionError> { + // In the case that we are replacing a node entirely, we need to extend the + // subtree up to the level of the node being replaced, adding Nil siblings + // and recording the presence of those incomplete nodes when necessary + let replacement = |ann: Option>, mut node: LocatedPrunableTree| { + // construct the replacement node bottom-up + let mut incomplete = vec![]; + while node.root_addr.level() < root_addr.level() { + incomplete.push(IncompleteAt { + address: node.root_addr.sibling(), + required_for_witness: contains_marked, + }); + node = LocatedTree { + root_addr: node.root_addr.parent(), + root: if node.root_addr.is_right_child() { + Tree(Node::Parent { + ann: None, + left: Rc::new(Tree(Node::Nil)), + right: Rc::new(node.root), + }) + } else { + Tree(Node::Parent { + ann: None, + left: Rc::new(node.root), + right: Rc::new(Tree(Node::Nil)), + }) + }, + }; + } + (node.root.reannotate_root(ann), incomplete) + }; + + match into { + Tree(Node::Nil) => Ok(replacement(None, subtree)), + Tree(Node::Leaf { value: (value, _) }) => { + if root_addr == subtree.root_addr { + if is_complete { + // It is safe to replace the existing root unannotated, because we + // can always recompute the root from a complete subtree. + Ok((subtree.root, vec![])) + } else if subtree + .root + .0 + .annotation() + .and_then(|ann| ann.as_ref()) + .iter() + .all(|v| v.as_ref() == value) + { + Ok(( + // at this point we statically know the root to be a parent + subtree.root.reannotate_root(Some(Rc::new(value.clone()))), + vec![], + )) + } else { + Err(InsertionError::Conflict(root_addr)) + } + } else { + Ok(replacement(Some(Rc::new(value.clone())), subtree)) + } + } + parent if root_addr == subtree.root_addr => { + // Merge the existing subtree with the subtree being inserted. + // A merge operation can't introduce any new incomplete roots. + parent + .clone() + .merge_checked(root_addr, subtree.root) + .map_err(InsertionError::Conflict) + .map(|tree| (tree, vec![])) + } + Tree(Node::Parent { ann, left, right }) => { + // In this case, we have an existing parent but we need to dig down farther + // before we can insert the subtree that we're carrying for insertion. + let (l_addr, r_addr) = root_addr.children().unwrap(); + if l_addr.contains(&subtree.root_addr) { + let (new_left, incomplete) = + go(l_addr, left.as_ref(), subtree, is_complete, contains_marked)?; + Ok(( + Tree::unite( + root_addr.level() - 1, + ann.clone(), + new_left, + right.as_ref().clone(), + ), + incomplete, + )) + } else { + let (new_right, incomplete) = go( + r_addr, + right.as_ref(), + subtree, + is_complete, + contains_marked, + )?; + Ok(( + Tree::unite( + root_addr.level() - 1, + ann.clone(), + left.as_ref().clone(), + new_right, + ), + incomplete, + )) + } + } + } + } + + let LocatedTree { root_addr, root } = self; + if root_addr.contains(&subtree.root_addr) { + let complete = subtree.root.reduce(&is_complete); + go(*root_addr, root, subtree, complete, contains_marked).map(|(root, incomplete)| { + ( + LocatedTree { + root_addr: *root_addr, + root, + }, + incomplete, + ) + }) + } else { + Err(InsertionError::NotContained) + } + } + + /// Append a single value at the first available position in the tree. + /// + /// Prefer to use [`Self::batch_append`] or [`Self::batch_insert`] when appending multiple + /// values, as these operations require fewer traversals of the tree than are necessary when + /// performing multiple sequential calls to [`Self::append`]. + pub fn append( + &self, + value: H, + retention: Retention, + ) -> Result<(Self, Position, Option), InsertionError> { + let checkpoint_id = if let Retention::Checkpoint { id, .. } = &retention { + Some(id.clone()) + } else { + None + }; + + self.batch_append(Some((value, retention)).into_iter()) + // We know that the max insert position will have been incremented by one. + .and_then(|r| { + let mut r = r.expect("We know the iterator to have been nonempty."); + if r.remainder.next().is_some() { + Err(InsertionError::TreeFull) + } else { + Ok((r.subtree, r.max_insert_position.unwrap(), checkpoint_id)) + } + }) + } + + /// Append a values from an iterator, beginning at the first available position in the tree. + /// + /// Returns an error if the tree is full. If the position at the end of the iterator is outside + /// of the subtree's range, the unconsumed part of the iterator will be returned as part of + /// the result. + pub fn batch_append)>, E>( + &self, + values: I, + ) -> Result>, InsertionError> { + let append_position = self + .max_position() + .map(|p| p + 1) + .unwrap_or_else(|| self.root_addr.position_range_start()); + self.batch_insert(append_position, values) + } + + /// Builds a [`LocatedPrunableTree`] from an iterator of level-0 leaves. + /// + /// This may be used in conjunction with [`ShardTree::insert_tree`] to support + /// partially-parallelizable tree construction. Multiple subtrees may be constructed in + /// parallel from iterators over (preferably, though not necessarily) disjoint leaf ranges, and + /// [`ShardTree::insert_tree`] may be used to insert those subtrees into the `ShardTree` in + /// arbitrary order. + /// + /// * `position_range` - The range of leaf positions at which values will be inserted. This + /// range is also used to place an upper bound on the number of items that will be consumed + /// from the `values` iterator. + /// * `prune_below` - Nodes with [`EPHEMERAL`] retention that are not required to be retained + /// in order to construct a witness for a marked node or to make it possible to rewind to a + /// checkpointed node may be pruned so long as their address is at less than the specified + /// level. + /// * `values` The iterator of `(H, Retention)` pairs from which to construct the tree. + pub fn from_iter)>>( + position_range: Range, + prune_below: Level, + mut values: I, + ) -> Option> { + // Unite two subtrees by either adding a parent node, or a leaf containing the Merkle root + // of such a parent if both nodes are ephemeral leaves. + // + // `unite` is only called when both root addrs have the same parent. `batch_insert` never + // constructs Nil nodes, so we don't create any incomplete root information here. + fn unite( + lroot: LocatedPrunableTree, + rroot: LocatedPrunableTree, + prune_below: Level, + ) -> LocatedTree>, (H, RetentionFlags)> { + LocatedTree { + root_addr: lroot.root_addr.parent(), + root: if lroot.root_addr.level() < prune_below { + Tree::unite(lroot.root_addr.level(), None, lroot.root, rroot.root) + } else { + Tree(Node::Parent { + ann: None, + left: Rc::new(lroot.root), + right: Rc::new(rroot.root), + }) + }, + } + } + + // Builds a single tree from the provided stack of subtrees, which must be non-overlapping + // and in position order. Returns the resulting tree, a flag indicating whether the + // resulting tree contains a `MARKED` node, and the vector of [`IncompleteAt`] values for + // [`Node::Nil`] nodes that were introduced in the process of constructing the tree. + fn build_minimal_tree( + mut xs: Vec<(LocatedPrunableTree, bool)>, + prune_below: Level, + ) -> Option<(LocatedPrunableTree, bool, Vec)> { + // First, consume the stack from the right, building up a single tree + // until we can't combine any more. + if let Some((mut cur, mut contains_marked)) = xs.pop() { + let mut incomplete = vec![]; + while let Some((top, top_marked)) = xs.pop() { + while cur.root_addr.level() < top.root_addr.level() { + let sibling_addr = cur.root_addr.sibling(); + incomplete.push(IncompleteAt { + address: sibling_addr, + required_for_witness: top_marked, + }); + cur = unite( + cur, + LocatedTree { + root_addr: sibling_addr, + root: Tree(Node::Nil), + }, + prune_below, + ); + } + + if cur.root_addr.level() == top.root_addr.level() { + contains_marked = contains_marked || top_marked; + if cur.root_addr.is_right_child() { + // We have a left child and a right child, so unite them. + cur = unite(top, cur, prune_below); + } else { + // This is a left child, so we build it up one more level and then + // we've merged as much as we can from the right and need to work from + // the left + xs.push((top, top_marked)); + let sibling_addr = cur.root_addr.sibling(); + incomplete.push(IncompleteAt { + address: sibling_addr, + required_for_witness: top_marked, + }); + cur = unite( + cur, + LocatedTree { + root_addr: sibling_addr, + root: Tree(Node::Nil), + }, + prune_below, + ); + break; + } + } else { + // top.root_addr.level < cur.root_addr.level, so we've merged as much as we + // can from the right and now need to work from the left. + xs.push((top, top_marked)); + break; + } + } + + // push our accumulated max-height right hand node back on to the stack. + xs.push((cur, contains_marked)); + + // From the stack of subtrees, construct a single sparse tree that can be + // inserted/merged into the existing tree + let res_tree = xs.into_iter().fold( + None, + |acc: Option>, (next_tree, next_marked)| { + if let Some(mut prev_tree) = acc { + // add nil branches to build up the left tree until we can merge it + // with the right + while prev_tree.root_addr.level() < next_tree.root_addr.level() { + let sibling_addr = prev_tree.root_addr.sibling(); + contains_marked = contains_marked || next_marked; + incomplete.push(IncompleteAt { + address: sibling_addr, + required_for_witness: next_marked, + }); + prev_tree = unite( + LocatedTree { + root_addr: sibling_addr, + root: Tree(Node::Nil), + }, + prev_tree, + prune_below, + ); + } + + // at this point, prev_tree.level == next_tree.level + Some(unite(prev_tree, next_tree, prune_below)) + } else { + Some(next_tree) + } + }, + ); + + res_tree.map(|t| (t, contains_marked, incomplete)) + } else { + None + } + } + + // A stack of complete subtrees to be inserted as descendants into the subtree labeled + // with the addresses at which they will be inserted, along with their root hashes. + let mut fragments: Vec<(Self, bool)> = vec![]; + let mut position = position_range.start; + let mut checkpoints: BTreeMap = BTreeMap::new(); + while position < position_range.end { + if let Some((value, retention)) = values.next() { + if let Retention::Checkpoint { id, .. } = &retention { + checkpoints.insert(id.clone(), position); + } + + let rflags = RetentionFlags::from(retention); + let mut subtree = LocatedTree { + root_addr: Address::from(position), + root: Tree(Node::Leaf { + value: (value.clone(), rflags), + }), + }; + + if position.is_odd() { + // At odd positions, we are completing a subtree and so we unite fragments + // up the stack until we get the largest possible subtree + while let Some((potential_sibling, marked)) = fragments.pop() { + if potential_sibling.root_addr.parent() == subtree.root_addr.parent() { + subtree = unite(potential_sibling, subtree, prune_below); + } else { + // this is not a sibling node, so we push it back on to the stack + // and are done + fragments.push((potential_sibling, marked)); + break; + } + } + } + + fragments.push((subtree, rflags.is_marked())); + position += 1; + } else { + break; + } + } + + build_minimal_tree(fragments, prune_below).map( + |(to_insert, contains_marked, incomplete)| BatchInsertionResult { + subtree: to_insert, + contains_marked, + incomplete, + max_insert_position: Some(position - 1), + checkpoints, + remainder: values, + }, + ) + } + + /// Put a range of values into the subtree by consuming the given iterator, starting at the + /// specified position. + /// + /// The start position must exist within the position range of this subtree. If the position at + /// the end of the iterator is outside of the subtree's range, the unconsumed part of the + /// iterator will be returned as part of the result. + /// + /// Returns `Ok(None)` if the provided iterator is empty, `Ok(Some)` if + /// values were successfully inserted, or an error if the start position provided is outside + /// of this tree's position range or if a conflict with an existing subtree root is detected. + pub fn batch_insert)>, E>( + &self, + start: Position, + values: I, + ) -> Result>, InsertionError> { + let subtree_range = self.root_addr.position_range(); + let contains_start = subtree_range.contains(&start); + if contains_start { + let position_range = Range { + start, + end: subtree_range.end, + }; + Self::from_iter(position_range, self.root_addr.level(), values) + .map(|mut res| { + let (subtree, mut incomplete) = self + .clone() + .insert_subtree(res.subtree, res.contains_marked)?; + res.subtree = subtree; + res.incomplete.append(&mut incomplete); + Ok(res) + }) + .transpose() + } else { + Err(InsertionError::OutOfRange(subtree_range)) + } + } + + /// Clears the specified retention flags at all positions specified, pruning any branches + /// that no longer need to be retained. + pub fn clear_flags(&self, to_clear: BTreeMap) -> Self { + fn go( + to_clear: &[(Position, RetentionFlags)], + root_addr: Address, + root: &PrunableTree, + ) -> PrunableTree { + if to_clear.is_empty() { + // nothing to do, so we just return the root + root.clone() + } else { + match root { + Tree(Node::Parent { ann, left, right }) => { + let (l_addr, r_addr) = root_addr.children().unwrap(); + + let p = to_clear.partition_point(|(p, _)| p < &l_addr.position_range_end()); + Tree::unite( + l_addr.level(), + ann.clone(), + go(&to_clear[0..p], l_addr, left), + go(&to_clear[p..], r_addr, right), + ) + } + Tree(Node::Leaf { value: (h, r) }) => { + // When we reach a leaf, we should be down to just a single position + // which should correspond to the last level-0 child of the address's + // subtree range; if it's a checkpoint this will always be the case for + // a partially-pruned branch, and if it's a marked node then it will + // be a level-0 leaf. + match to_clear { + [(pos, flags)] => { + assert_eq!(*pos, root_addr.max_position()); + Tree(Node::Leaf { + value: (h.clone(), *r & !*flags), + }) + } + _ => { + panic!("Tree state inconsistent with checkpoints."); + } + } + } + Tree(Node::Nil) => Tree(Node::Nil), + } + } + } + + let to_clear = to_clear.into_iter().collect::>(); + Self { + root_addr: self.root_addr, + root: go(&to_clear, self.root_addr, &self.root), + } + } +} + // We need an applicative functor for Result for this function so that we can correctly // accumulate errors, but we don't have one so we just write a special- cased version here. fn accumulate_result_with( @@ -750,8 +1498,11 @@ pub mod testing { #[cfg(test)] mod tests { - use crate::{LocatedTree, Node, PrunableTree, Tree, EPHEMERAL, MARKED}; - use incrementalmerkletree::{Address, Level, Position}; + use crate::{ + LocatedPrunableTree, LocatedTree, Node, PrunableTree, QueryError, Tree, EPHEMERAL, MARKED, + }; + use core::convert::Infallible; + use incrementalmerkletree::{Address, Level, Position, Retention}; use std::collections::BTreeSet; use std::rc::Rc; @@ -938,4 +1689,126 @@ mod tests { ] ); } + + #[test] + fn located_prunable_tree_insert() { + let tree = LocatedPrunableTree::empty(Address::from_parts(Level::from(2), 0)); + let (base, _, _) = tree + .append::<(), Infallible>("a".to_string(), Retention::Ephemeral) + .unwrap(); + assert_eq!(base.right_filled_root(), Ok("a___".to_string())); + + // Perform an in-order insertion. + let (in_order, pos, _) = base + .append::<(), Infallible>("b".to_string(), Retention::Ephemeral) + .unwrap(); + assert_eq!(pos, 1.into()); + assert_eq!(in_order.right_filled_root(), Ok("ab__".to_string())); + + // On the same tree, perform an out-of-order insertion. + let out_of_order = base + .batch_insert::<(), _, Infallible>( + Position::from(3), + vec![("d".to_string(), Retention::Ephemeral)].into_iter(), + ) + .unwrap() + .unwrap(); + assert_eq!( + out_of_order.subtree, + LocatedPrunableTree { + root_addr: Address::from_parts(2.into(), 0), + root: parent( + parent(leaf(("a".to_string(), EPHEMERAL)), nil()), + parent(nil(), leaf(("d".to_string(), EPHEMERAL))) + ) + } + ); + + let complete = out_of_order + .subtree + .batch_insert::<(), _, Infallible>( + Position::from(1), + vec![ + ("b".to_string(), Retention::Ephemeral), + ("c".to_string(), Retention::Ephemeral), + ] + .into_iter(), + ) + .unwrap() + .unwrap(); + assert_eq!(complete.subtree.right_filled_root(), Ok("abcd".to_string())); + } + + #[test] + fn located_prunable_tree_insert_subtree() { + let t: LocatedPrunableTree = LocatedTree { + root_addr: Address::from_parts(3.into(), 1), + root: parent( + leaf(("abcd".to_string(), EPHEMERAL)), + parent(nil(), leaf(("gh".to_string(), EPHEMERAL))), + ), + }; + + assert_eq!( + t.insert_subtree::( + LocatedTree { + root_addr: Address::from_parts(1.into(), 6), + root: parent(leaf(("e".to_string(), MARKED)), nil()) + }, + true + ), + Ok(( + LocatedTree { + root_addr: Address::from_parts(3.into(), 1), + root: parent( + leaf(("abcd".to_string(), EPHEMERAL)), + parent( + parent(leaf(("e".to_string(), MARKED)), nil()), + leaf(("gh".to_string(), EPHEMERAL)) + ) + ) + }, + vec![] + )) + ); + } + + #[test] + fn located_prunable_tree_witness() { + let t: LocatedPrunableTree = LocatedTree { + root_addr: Address::from_parts(3.into(), 0), + root: parent( + leaf(("abcd".to_string(), EPHEMERAL)), + parent( + parent( + leaf(("e".to_string(), MARKED)), + leaf(("f".to_string(), EPHEMERAL)), + ), + leaf(("gh".to_string(), EPHEMERAL)), + ), + ), + }; + + assert_eq!( + t.witness(4.into(), 8.into()), + Ok(vec!["f", "gh", "abcd"] + .into_iter() + .map(|s| s.to_string()) + .collect()) + ); + assert_eq!( + t.witness(4.into(), 6.into()), + Ok(vec!["f", "__", "abcd"] + .into_iter() + .map(|s| s.to_string()) + .collect()) + ); + assert_eq!( + t.witness(4.into(), 7.into()), + Err(QueryError::TreeIncomplete(vec![Address::from_parts( + 1.into(), + 3 + )])) + ); + } } From ebe3efa135ae934089a497241f3be1e42b226328 Mon Sep 17 00:00:00 2001 From: Kris Nuttycombe Date: Fri, 13 Jan 2023 08:40:57 -0700 Subject: [PATCH 06/16] Add ShardTree types & implement append operation. --- incrementalmerkletree/src/testing.rs | 32 +- .../src/testing/complete_tree.rs | 9 +- shardtree/src/lib.rs | 598 ++++++++++++++++-- 3 files changed, 567 insertions(+), 72 deletions(-) diff --git a/incrementalmerkletree/src/testing.rs b/incrementalmerkletree/src/testing.rs index c015cf8..8435157 100644 --- a/incrementalmerkletree/src/testing.rs +++ b/incrementalmerkletree/src/testing.rs @@ -500,9 +500,37 @@ pub fn check_root_hashes, F: Fn(usize) -> T>(new_tree: F) assert_eq!(t.root(0).unwrap(), "aaaa____________"); } -pub fn check_witnesses + std::fmt::Debug, F: Fn(usize) -> T>(new_tree: F) { +/// This test expects a depth-4 tree and verifies that the tree reports itself as full after 2^4 +/// appends. +pub fn check_append + std::fmt::Debug, F: Fn(usize) -> T>(new_tree: F) { + use Retention::*; + let mut tree = new_tree(100); - tree.append("a".to_string(), Retention::Marked); + assert_eq!(tree.depth(), 4); + + // 16 appends should succeed + for i in 0..16 { + assert!(tree.append(i.to_string(), Ephemeral)); + assert_eq!(tree.current_position(), Some(Position::from(i))); + } + + // 17th append should fail + assert!(!tree.append("16".to_string(), Ephemeral)); + + // The following checks a condition on state restoration in the case that an append fails. + // We want to ensure that a failed append does not cause a loss of information. + let ops = (0..17) + .map(|i| Append(i.to_string(), Ephemeral)) + .collect::>(); + let tree = new_tree(100); + check_operations(tree, &ops).unwrap(); +} + +pub fn check_witnesses + std::fmt::Debug, F: Fn(usize) -> T>(new_tree: F) { + use Retention::*; + + let mut tree = new_tree(100); + tree.append("a".to_string(), Marked); assert_eq!( tree.witness(Position::from(0), 0), Some(vec![ diff --git a/incrementalmerkletree/src/testing/complete_tree.rs b/incrementalmerkletree/src/testing/complete_tree.rs index 073da85..53a2989 100644 --- a/incrementalmerkletree/src/testing/complete_tree.rs +++ b/incrementalmerkletree/src/testing/complete_tree.rs @@ -321,8 +321,8 @@ mod tests { use super::CompleteTree; use crate::{ testing::{ - check_checkpoint_rewind, check_rewind_remove_mark, check_root_hashes, check_witnesses, - compute_root_from_witness, SipHashable, Tree, + check_append, check_checkpoint_rewind, check_rewind_remove_mark, check_root_hashes, + check_witnesses, compute_root_from_witness, SipHashable, Tree, }, Hashable, Level, Position, Retention, }; @@ -367,6 +367,11 @@ mod tests { assert_eq!(tree.root(0).unwrap(), expected); } + #[test] + fn append() { + check_append(|max_checkpoints| CompleteTree::::new(max_checkpoints, 0)); + } + #[test] fn root_hashes() { check_root_hashes(|max_checkpoints| { diff --git a/shardtree/src/lib.rs b/shardtree/src/lib.rs index c83eb34..f4811bf 100644 --- a/shardtree/src/lib.rs +++ b/shardtree/src/lib.rs @@ -1,4 +1,6 @@ +use core::convert::Infallible; use core::fmt::Debug; +use core::marker::PhantomData; use core::ops::{BitAnd, BitOr, Deref, Not, Range}; use either::Either; use std::collections::{BTreeMap, BTreeSet}; @@ -1436,6 +1438,383 @@ impl LocatedPrunableTree { } } +/// An enumeration of possible checkpoint locations. +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum TreeState { + /// Checkpoints of the empty tree. + Empty, + /// Checkpoint at a (possibly pruned) leaf state corresponding to the + /// wrapped leaf position. + AtPosition(Position), +} + +#[derive(Clone, Debug)] +pub struct Checkpoint { + tree_state: TreeState, + marks_removed: BTreeSet, +} + +impl Checkpoint { + pub fn tree_empty() -> Self { + Checkpoint { + tree_state: TreeState::Empty, + marks_removed: BTreeSet::new(), + } + } + + pub fn at_position(position: Position) -> Self { + Checkpoint { + tree_state: TreeState::AtPosition(position), + marks_removed: BTreeSet::new(), + } + } + + pub fn is_tree_empty(&self) -> bool { + matches!(self.tree_state, TreeState::Empty) + } + + pub fn position(&self) -> Option { + match self.tree_state { + TreeState::Empty => None, + TreeState::AtPosition(pos) => Some(pos), + } + } +} + +/// A capability for storage of fragment subtrees of the `ShardTree` type. +/// +/// All fragment subtrees must have roots at level `SHARD_HEIGHT - 1` +pub trait ShardStore { + type Error; + + /// Returns the subtree at the given root address, if any such subtree exists. + fn get_shard(&self, shard_root: Address) -> Option<&LocatedPrunableTree>; + + /// Returns the subtree containing the maximum inserted leaf position. + fn last_shard(&self) -> Option<&LocatedPrunableTree>; + + /// Inserts or replaces the subtree having the same root address as the provided tree. + /// + /// Implementations of this method MUST enforce the constraint that the root address + /// of the provided subtree has level `SHARD_HEIGHT - 1`. + fn put_shard(&mut self, subtree: LocatedPrunableTree) -> Result<(), Self::Error>; + + /// Returns the vector of addresses corresponding to the roots of subtrees stored in this + /// store. + fn get_shard_roots(&self) -> Vec
; + + /// Removes subtrees from the underlying store having root addresses at indices greater + /// than or equal to that of the specified address. + /// + /// Implementations of this method MUST enforce the constraint that the root address + /// provided has level `SHARD_HEIGHT - 1`. + fn truncate(&mut self, from: Address) -> Result<(), Self::Error>; +} + +impl ShardStore for Vec> { + type Error = Infallible; + + fn get_shard(&self, shard_root: Address) -> Option<&LocatedPrunableTree> { + self.get(shard_root.index()) + } + + fn last_shard(&self) -> Option<&LocatedPrunableTree> { + self.last() + } + + fn put_shard(&mut self, subtree: LocatedPrunableTree) -> Result<(), Self::Error> { + let subtree_addr = subtree.root_addr; + for subtree_idx in self.last().map_or(0, |s| s.root_addr.index() + 1)..=subtree_addr.index() + { + self.push(LocatedTree { + root_addr: Address::from_parts(subtree_addr.level(), subtree_idx), + root: Tree(Node::Nil), + }) + } + self[subtree_addr.index()] = subtree; + Ok(()) + } + + fn get_shard_roots(&self) -> Vec
{ + self.iter().map(|s| s.root_addr).collect() + } + + fn truncate(&mut self, from: Address) -> Result<(), Self::Error> { + self.truncate(from.index()); + Ok(()) + } +} + +/// A left-dense, sparse binary Merkle tree of the specified depth, represented as a vector of +/// subtrees (shards) of the given maximum height. +/// +/// This tree maintains a collection of "checkpoints" which represent positions, usually near the +/// front of the tree, that are maintained such that it's possible to truncate nodes to the right +/// of the specified position. +#[derive(Debug)] +pub struct ShardTree, const DEPTH: u8, const SHARD_HEIGHT: u8> { + /// The vector of tree shards. + store: S, + /// The maximum number of checkpoints to retain before pruning. + max_checkpoints: usize, + /// A map from position to the count of checkpoints at this position. + checkpoints: BTreeMap, + // /// A tree that is used to cache the known roots of subtrees in the "cap" of nodes between + // /// `SHARD_HEIGHT` and `DEPTH` that are otherwise not directly represented in the tree. This + // /// cache is automatically updated when computing roots and witnesses. Leaf nodes are empty + // /// because the annotation slot is consistently used to store the subtree hashes at each node. + // cap_cache: Tree>, ()> + _hash_type: PhantomData, +} + +impl< + H: Hashable + Clone + PartialEq, + C: Clone + Ord + core::fmt::Debug, + S: ShardStore, + const DEPTH: u8, + const SHARD_HEIGHT: u8, + > ShardTree +{ + /// Creates a new empty tree. + pub fn new(store: S, max_checkpoints: usize, initial_checkpoint_id: C) -> Self { + Self { + store, + max_checkpoints, + checkpoints: BTreeMap::from([(initial_checkpoint_id, Checkpoint::tree_empty())]), + //cap_cache: Tree(None, ()) + _hash_type: PhantomData, + } + } + + /// Returns the root address of the tree. + pub fn root_addr() -> Address { + Address::from_parts(Level::from(DEPTH), 0) + } + + /// Returns the fixed level of subtree roots within the vector of subtrees used as this tree's + /// representation. + pub fn subtree_level() -> Level { + Level::from(SHARD_HEIGHT - 1) + } + + /// Returns the position and checkpoint count for each checkpointed position in the tree. + pub fn checkpoints(&self) -> &BTreeMap { + &self.checkpoints + } + + /// Returns the leaf value at the specified position, if it is a marked leaf. + pub fn get_marked_leaf(&self, position: Position) -> Option<&H> { + self.store + .get_shard(Address::above_position(Self::subtree_level(), position)) + .and_then(|t| t.value_at_position(position)) + .and_then(|(v, r)| if r.is_marked() { Some(v) } else { None }) + } + + /// Returns the positions of marked leaves in the tree. + pub fn marked_positions(&self) -> BTreeSet { + let mut result = BTreeSet::new(); + for subtree_addr in &self.store.get_shard_roots() { + if let Some(subtree) = self.store.get_shard(*subtree_addr) { + result.append(&mut subtree.marked_positions()); + } + } + result + } + + /// Inserts a new root into the tree at the given address. + /// + /// This will pad from the left until the tree's subtrees vector contains enough trees to reach + /// the specified address, which must be at the [`Self::subtree_level`] level. If a subtree + /// already exists at this address, its root will be annotated with the specified hash value. + /// + /// This will return an error if the specified hash conflicts with any existing annotation. + pub fn put_root(&mut self, addr: Address, value: H) -> Result<(), InsertionError> { + let updated_subtree = match self.store.get_shard(addr) { + Some(s) if !s.root.is_nil() => s.root.node_value().map_or_else( + || { + Ok(Some( + s.clone().reannotate_root(Some(Rc::new(value.clone()))), + )) + }, + |v| { + if v == &value { + // the existing root is already correctly annotated, so no need to + // do anything + Ok(None) + } else { + // the provided value conflicts with the existing root value + Err(InsertionError::Conflict(addr)) + } + }, + ), + _ => { + // there is no existing subtree root, so construct a new one. + Ok(Some(LocatedTree { + root_addr: addr, + root: Tree(Node::Leaf { + value: (value, EPHEMERAL), + }), + })) + } + }?; + + if let Some(s) = updated_subtree { + self.store.put_shard(s).map_err(InsertionError::Storage)?; + } + + Ok(()) + } + + /// Append a single value at the first available position in the tree. + /// + /// Prefer to use [`Self::batch_insert`] when appending multiple values, as these operations + /// require fewer traversals of the tree than are necessary when performing multiple sequential + /// calls to [`Self::append`]. + pub fn append( + &mut self, + value: H, + retention: Retention, + ) -> Result<(), InsertionError> { + if let Retention::Checkpoint { id, .. } = &retention { + if self.checkpoints.keys().last() >= Some(id) { + return Err(InsertionError::CheckpointOutOfOrder); + } + } + + let (append_result, position, checkpoint_id) = + if let Some(subtree) = self.store.last_shard() { + if subtree.root.reduce(&is_complete) { + let addr = subtree.root_addr; + + if addr.index() + 1 >= 0x1 << (SHARD_HEIGHT - 1) { + return Err(InsertionError::OutOfRange(addr.position_range())); + } else { + LocatedTree::empty(addr.next_at_level()).append(value, retention)? + } + } else { + subtree.append(value, retention)? + } + } else { + let root_addr = Address::from_parts(Self::subtree_level(), 0); + LocatedTree::empty(root_addr).append(value, retention)? + }; + + self.store + .put_shard(append_result) + .map_err(InsertionError::Storage)?; + if let Some(c) = checkpoint_id { + self.checkpoints + .insert(c, Checkpoint::at_position(position)); + } + + self.prune_excess_checkpoints() + .map_err(InsertionError::Storage)?; + + Ok(()) + } + + fn prune_excess_checkpoints(&mut self) -> Result<(), S::Error> { + if self.checkpoints.len() > self.max_checkpoints { + // Batch removals by subtree & create a list of the checkpoint identifiers that + // will be removed from the checkpoints map. + let mut checkpoints_to_delete = vec![]; + let mut clear_positions: BTreeMap> = + BTreeMap::new(); + for (cid, checkpoint) in self + .checkpoints + .iter() + .take(self.checkpoints.len() - self.max_checkpoints) + { + checkpoints_to_delete.push(cid.clone()); + + // clear the checkpoint leaf + if let TreeState::AtPosition(pos) = checkpoint.tree_state { + let subtree_addr = Address::above_position(Self::subtree_level(), pos); + clear_positions + .entry(subtree_addr) + .and_modify(|to_clear| { + to_clear + .entry(pos) + .and_modify(|flags| *flags = *flags | CHECKPOINT) + .or_insert(CHECKPOINT); + }) + .or_insert_with(|| BTreeMap::from([(pos, CHECKPOINT)])); + } + + // clear the leaves that have been marked for removal + for unmark_pos in checkpoint.marks_removed.iter() { + let subtree_addr = Address::above_position(Self::subtree_level(), *unmark_pos); + clear_positions + .entry(subtree_addr) + .and_modify(|to_clear| { + to_clear + .entry(*unmark_pos) + .and_modify(|flags| *flags = *flags | MARKED) + .or_insert(MARKED); + }) + .or_insert_with(|| BTreeMap::from([(*unmark_pos, MARKED)])); + } + } + + // Prune each affected subtree + for (subtree_addr, positions) in clear_positions.into_iter() { + let cleared = self + .store + .get_shard(subtree_addr) + .map(|subtree| subtree.clear_flags(positions)); + if let Some(cleared) = cleared { + self.store.put_shard(cleared)?; + } + } + + // Now that the leaves have been pruned, actually remove the checkpoints + for c in checkpoints_to_delete { + self.checkpoints.remove(&c); + } + } + + Ok(()) + } + + /// Returns the position of the checkpoint, if any, along with the number of subsequent + /// checkpoints at the same position. Returns `None` if `checkpoint_depth == 0` or if + /// insufficient checkpoints exist to seek back to the requested depth. + pub fn checkpoint_at_depth(&self, checkpoint_depth: usize) -> Option<(&C, &Checkpoint)> { + if checkpoint_depth == 0 { + None + } else { + self.checkpoints.iter().rev().nth(checkpoint_depth - 1) + } + } + + /// Returns the position of the rightmost leaf inserted as of the given checkpoint. + /// + /// Returns the maximum leaf position if `checkpoint_depth == 0` (or `Ok(None)` in this + /// case if the tree is empty) or an error if the checkpointed position cannot be restored + /// because it has been pruned. Note that no actual level-0 leaf may exist at this position. + pub fn max_leaf_position( + &self, + checkpoint_depth: usize, + ) -> Result, QueryError> { + if checkpoint_depth == 0 { + // TODO: This relies on the invariant that the last shard in the subtrees vector is + // never created without a leaf then being added to it. However, this may be a + // difficult invariant to maintain when adding empty roots, so perhaps we need a + // better way of tracking the actual max position of the tree; we might want to + // just store it directly. + Ok(self.store.last_shard().and_then(|t| t.max_position())) + } else { + match self.checkpoint_at_depth(checkpoint_depth) { + Some((_, c)) => Ok(c.position()), + None => { + // There is no checkpoint at the specified depth, so we report it as pruned. + Err(QueryError::CheckpointPruned) + } + } + } + } +} + // We need an applicative functor for Result for this function so that we can correctly // accumulate errors, but we don't have one so we just write a special- cased version here. fn accumulate_result_with( @@ -1499,10 +1878,14 @@ pub mod testing { #[cfg(test)] mod tests { use crate::{ - LocatedPrunableTree, LocatedTree, Node, PrunableTree, QueryError, Tree, EPHEMERAL, MARKED, + LocatedPrunableTree, LocatedTree, Node, PrunableTree, QueryError, ShardStore, ShardTree, + Tree, EPHEMERAL, MARKED, }; use core::convert::Infallible; - use incrementalmerkletree::{Address, Level, Position, Retention}; + use incrementalmerkletree::{ + testing::{self, check_append, complete_tree::CompleteTree, CombinedTree}, + Address, Hashable, Level, Position, Retention, + }; use std::collections::BTreeSet; use std::rc::Rc; @@ -1582,6 +1965,81 @@ mod tests { ); } + #[test] + fn located_prunable_tree_insert_subtree() { + let t: LocatedPrunableTree = LocatedTree { + root_addr: Address::from_parts(3.into(), 1), + root: parent( + leaf(("abcd".to_string(), EPHEMERAL)), + parent(nil(), leaf(("gh".to_string(), EPHEMERAL))), + ), + }; + + assert_eq!( + t.insert_subtree::( + LocatedTree { + root_addr: Address::from_parts(1.into(), 6), + root: parent(leaf(("e".to_string(), MARKED)), nil()) + }, + true + ), + Ok(( + LocatedTree { + root_addr: Address::from_parts(3.into(), 1), + root: parent( + leaf(("abcd".to_string(), EPHEMERAL)), + parent( + parent(leaf(("e".to_string(), MARKED)), nil()), + leaf(("gh".to_string(), EPHEMERAL)) + ) + ) + }, + vec![] + )) + ); + } + + #[test] + fn located_prunable_tree_witness() { + let t: LocatedPrunableTree = LocatedTree { + root_addr: Address::from_parts(3.into(), 0), + root: parent( + leaf(("abcd".to_string(), EPHEMERAL)), + parent( + parent( + leaf(("e".to_string(), MARKED)), + leaf(("f".to_string(), EPHEMERAL)), + ), + leaf(("gh".to_string(), EPHEMERAL)), + ), + ), + }; + + assert_eq!( + t.witness(4.into(), 8.into()), + Ok(vec!["f", "gh", "abcd"] + .into_iter() + .map(|s| s.to_string()) + .collect()) + ); + assert_eq!( + t.witness(4.into(), 6.into()), + Ok(vec!["f", "__", "abcd"] + .into_iter() + .map(|s| s.to_string()) + .collect()) + ); + assert_eq!( + t.witness(4.into(), 7.into()), + Err(QueryError::TreeIncomplete(vec![Address::from_parts( + 1.into(), + 3 + )])) + ); + } + + type VecShardStore = Vec>; + #[test] fn tree_marked_positions() { let t: PrunableTree = parent( @@ -1739,76 +2197,80 @@ mod tests { assert_eq!(complete.subtree.right_filled_root(), Ok("abcd".to_string())); } - #[test] - fn located_prunable_tree_insert_subtree() { - let t: LocatedPrunableTree = LocatedTree { - root_addr: Address::from_parts(3.into(), 1), - root: parent( - leaf(("abcd".to_string(), EPHEMERAL)), - parent(nil(), leaf(("gh".to_string(), EPHEMERAL))), - ), - }; + impl< + H: Hashable + Ord + Clone, + C: Clone + Ord + core::fmt::Debug, + S: ShardStore, + const DEPTH: u8, + const SHARD_HEIGHT: u8, + > testing::Tree for ShardTree + { + fn depth(&self) -> u8 { + DEPTH + } - assert_eq!( - t.insert_subtree::( - LocatedTree { - root_addr: Address::from_parts(1.into(), 6), - root: parent(leaf(("e".to_string(), MARKED)), nil()) - }, - true - ), - Ok(( - LocatedTree { - root_addr: Address::from_parts(3.into(), 1), - root: parent( - leaf(("abcd".to_string(), EPHEMERAL)), - parent( - parent(leaf(("e".to_string(), MARKED)), nil()), - leaf(("gh".to_string(), EPHEMERAL)) - ) - ) - }, - vec![] - )) - ); + fn append(&mut self, value: H, retention: Retention) -> bool { + ShardTree::append(self, value, retention).is_ok() + } + + fn current_position(&self) -> Option { + ShardTree::max_leaf_position(self, 0).ok().flatten() + } + + fn get_marked_leaf(&self, _position: Position) -> Option<&H> { + todo!() + } + + fn marked_positions(&self) -> BTreeSet { + todo!() + } + + fn root(&self, _checkpoint_depth: usize) -> Option { + todo!() + } + + fn witness(&self, _position: Position, _checkpoint_depth: usize) -> Option> { + todo!() + } + + fn remove_mark(&mut self, _position: Position) -> bool { + todo!() + } + + fn checkpoint(&mut self, _checkpoint_id: C) -> bool { + todo!() + } + + fn rewind(&mut self) -> bool { + todo!() + } } #[test] - fn located_prunable_tree_witness() { - let t: LocatedPrunableTree = LocatedTree { - root_addr: Address::from_parts(3.into(), 0), - root: parent( - leaf(("abcd".to_string(), EPHEMERAL)), - parent( - parent( - leaf(("e".to_string(), MARKED)), - leaf(("f".to_string(), EPHEMERAL)), - ), - leaf(("gh".to_string(), EPHEMERAL)), - ), - ), - }; + fn append() { + check_append(|m| { + ShardTree::, 4, 3>::new(vec![], m, 0) + }); + } - assert_eq!( - t.witness(4.into(), 8.into()), - Ok(vec!["f", "gh", "abcd"] - .into_iter() - .map(|s| s.to_string()) - .collect()) - ); - assert_eq!( - t.witness(4.into(), 6.into()), - Ok(vec!["f", "__", "abcd"] - .into_iter() - .map(|s| s.to_string()) - .collect()) - ); - assert_eq!( - t.witness(4.into(), 7.into()), - Err(QueryError::TreeIncomplete(vec![Address::from_parts( - 1.into(), - 3 - )])) - ); + // Combined tree tests + #[allow(clippy::type_complexity)] + fn new_combined_tree( + max_checkpoints: usize, + ) -> CombinedTree< + H, + usize, + CompleteTree, + ShardTree, 4, 3>, + > { + CombinedTree::new( + CompleteTree::new(max_checkpoints, 0), + ShardTree::new(vec![], max_checkpoints, 0), + ) + } + + #[test] + fn combined_append() { + check_append(new_combined_tree); } } From e209f3bf200d347c8aeb065f428c0a57a51983b1 Mon Sep 17 00:00:00 2001 From: Kris Nuttycombe Date: Fri, 13 Jan 2023 08:40:57 -0700 Subject: [PATCH 07/16] Add `shardtree` checkpointing & root computation. --- shardtree/src/lib.rs | 237 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 229 insertions(+), 8 deletions(-) diff --git a/shardtree/src/lib.rs b/shardtree/src/lib.rs index f4811bf..ab7e73f 100644 --- a/shardtree/src/lib.rs +++ b/shardtree/src/lib.rs @@ -1713,6 +1713,98 @@ impl< Ok(()) } + /// Adds a checkpoint at the rightmost leaf state of the tree. + pub fn checkpoint(&mut self, checkpoint_id: C) -> bool { + fn go( + root_addr: Address, + root: &PrunableTree, + ) -> Option<(PrunableTree, Position)> { + match root { + Tree(Node::Parent { ann, left, right }) => { + let (l_addr, r_addr) = root_addr.children().unwrap(); + go(r_addr, right).map_or_else( + || { + go(l_addr, left).map(|(new_left, pos)| { + ( + Tree::unite( + l_addr.level(), + ann.clone(), + new_left, + right.as_ref().clone(), + ), + pos, + ) + }) + }, + |(new_right, pos)| { + Some(( + Tree::unite( + l_addr.level(), + ann.clone(), + left.as_ref().clone(), + new_right, + ), + pos, + )) + }, + ) + } + Tree(Node::Leaf { value: (h, r) }) => Some(( + Tree(Node::Leaf { + value: (h.clone(), *r | CHECKPOINT), + }), + root_addr.max_position(), + )), + Tree(Node::Nil) => None, + } + } + + // checkpoint identifiers at the tip must be in increasing order + if self.checkpoints.keys().last() >= Some(&checkpoint_id) { + return false; + } + + // Search backward from the end of the subtrees iter to find a non-empty subtree. + // When we find one, update the subtree to add the `CHECKPOINT` flag to the + // right-most leaf (which need not be a level-0 leaf; it's fine to rewind to a + // pruned state). + for subtree_addr in self.store.get_shard_roots().iter().rev() { + let subtree = self.store.get_shard(*subtree_addr).expect( + "The store should not return root addresses for subtrees it cannot provide.", + ); + if let Some((replacement, checkpoint_position)) = go(*subtree_addr, &subtree.root) { + if self + .store + .put_shard(LocatedTree { + root_addr: *subtree_addr, + root: replacement, + }) + .is_err() + { + return false; + } + self.checkpoints + .insert(checkpoint_id, Checkpoint::at_position(checkpoint_position)); + + // early return once we've updated the tree state + return self + .prune_excess_checkpoints() + .map_err(InsertionError::Storage) + .is_ok(); + } + } + + self.checkpoints + .insert(checkpoint_id, Checkpoint::tree_empty()); + + // TODO: it should not be necessary to do this on every checkpoint, + // but currently that's how the reference tree behaves so we're maintaining + // those semantics for test compatibility. + self.prune_excess_checkpoints() + .map_err(InsertionError::Storage) + .is_ok() + } + fn prune_excess_checkpoints(&mut self) -> Result<(), S::Error> { if self.checkpoints.len() > self.max_checkpoints { // Batch removals by subtree & create a list of the checkpoint identifiers that @@ -1776,6 +1868,115 @@ impl< Ok(()) } + /// Computes the root of any subtree of this tree rooted at the given address, with the overall + /// tree truncated to the specified position. + /// + /// The specified address is not required to be at any particular level, though it cannot + /// exceed the level corresponding to the maximum depth of the tree. Nodes to the right of the + /// given position, and parents of such nodes, will be replaced by the empty root for the + /// associated level. + /// + /// Use [`Self::root_at_checkpoint`] to obtain the root of the overall tree. + pub fn root(&self, address: Address, truncate_at: Position) -> Result { + match address.context(Self::subtree_level()) { + Either::Left(subtree_addr) => { + // The requested root address is fully contained within one of the subtrees. + if truncate_at <= address.position_range_start() { + Ok(H::empty_root(address.level())) + } else { + // get the child of the subtree with its root at `address` + self.store + .get_shard(subtree_addr) + .ok_or_else(|| vec![subtree_addr]) + .and_then(|subtree| { + subtree.subtree(address).map_or_else( + || Err(vec![address]), + |child| child.root_hash(truncate_at), + ) + }) + .map_err(QueryError::TreeIncomplete) + } + } + Either::Right(subtree_range) => { + // The requested root requires hashing together the roots of several subtrees. + let mut root_stack = vec![]; + let mut incomplete = vec![]; + + for subtree_idx in subtree_range { + let subtree_addr = Address::from_parts(Self::subtree_level(), subtree_idx); + if truncate_at <= subtree_addr.position_range_start() { + break; + } + + let subtree_root = self + .store + .get_shard(subtree_addr) + .ok_or_else(|| vec![subtree_addr]) + .and_then(|s| s.root_hash(truncate_at)); + + match subtree_root { + Ok(mut cur_hash) => { + if subtree_addr.index() % 2 == 0 { + root_stack.push((subtree_addr, cur_hash)) + } else { + let mut cur_addr = subtree_addr; + while let Some((addr, hash)) = root_stack.pop() { + if addr.parent() == cur_addr.parent() { + cur_hash = H::combine(cur_addr.level(), &hash, &cur_hash); + cur_addr = cur_addr.parent(); + } else { + root_stack.push((addr, hash)); + break; + } + } + root_stack.push((cur_addr, cur_hash)); + } + } + Err(mut new_incomplete) => { + // Accumulate incomplete root information and continue, so that we can + // return the complete set of incomplete results. + incomplete.append(&mut new_incomplete); + } + } + } + + if !incomplete.is_empty() { + return Err(QueryError::TreeIncomplete(incomplete)); + } + + // Now hash with empty roots to obtain the root at maximum height + if let Some((mut cur_addr, mut cur_hash)) = root_stack.pop() { + while let Some((addr, hash)) = root_stack.pop() { + while addr.level() > cur_addr.level() { + cur_hash = H::combine( + cur_addr.level(), + &cur_hash, + &H::empty_root(cur_addr.level()), + ); + cur_addr = cur_addr.parent(); + } + cur_hash = H::combine(cur_addr.level(), &hash, &cur_hash); + cur_addr = cur_addr.parent(); + } + + while cur_addr.level() < address.level() { + cur_hash = H::combine( + cur_addr.level(), + &cur_hash, + &H::empty_root(cur_addr.level()), + ); + cur_addr = cur_addr.parent(); + } + + Ok(cur_hash) + } else { + // if the stack is empty, we just return the default root at max height + Ok(H::empty_root(address.level())) + } + } + } + } + /// Returns the position of the checkpoint, if any, along with the number of subsequent /// checkpoints at the same position. Returns `None` if `checkpoint_depth == 0` or if /// insufficient checkpoints exist to seek back to the requested depth. @@ -1813,6 +2014,18 @@ impl< } } } + + /// Computes the root of the tree as of the checkpointed position at the specified depth. + /// + /// Returns the root as of the most recently appended leaf if `checkpoint_depth == 0`. Note + /// that if the most recently appended leaf is also a checkpoint, this will return the same + /// result as `checkpoint_depth == 1`. + pub fn root_at_checkpoint(&self, checkpoint_depth: usize) -> Result { + self.max_leaf_position(checkpoint_depth)?.map_or_else( + || Ok(H::empty_root(Self::root_addr().level())), + |pos| self.root(Self::root_addr(), pos + 1), + ) + } } // We need an applicative functor for Result for this function so that we can correctly @@ -1883,7 +2096,9 @@ mod tests { }; use core::convert::Infallible; use incrementalmerkletree::{ - testing::{self, check_append, complete_tree::CompleteTree, CombinedTree}, + testing::{ + self, check_append, check_root_hashes, complete_tree::CompleteTree, CombinedTree, + }, Address, Hashable, Level, Position, Retention, }; use std::collections::BTreeSet; @@ -2217,16 +2432,16 @@ mod tests { ShardTree::max_leaf_position(self, 0).ok().flatten() } - fn get_marked_leaf(&self, _position: Position) -> Option<&H> { - todo!() + fn get_marked_leaf(&self, position: Position) -> Option<&H> { + ShardTree::get_marked_leaf(self, position) } fn marked_positions(&self) -> BTreeSet { - todo!() + ShardTree::marked_positions(self) } - fn root(&self, _checkpoint_depth: usize) -> Option { - todo!() + fn root(&self, checkpoint_depth: usize) -> Option { + ShardTree::root_at_checkpoint(self, checkpoint_depth).ok() } fn witness(&self, _position: Position, _checkpoint_depth: usize) -> Option> { @@ -2237,8 +2452,8 @@ mod tests { todo!() } - fn checkpoint(&mut self, _checkpoint_id: C) -> bool { - todo!() + fn checkpoint(&mut self, checkpoint_id: C) -> bool { + ShardTree::checkpoint(self, checkpoint_id) } fn rewind(&mut self) -> bool { @@ -2253,6 +2468,12 @@ mod tests { }); } + #[test] + fn root_hashes() { + check_root_hashes(|m| { + ShardTree::, 4, 3>::new(vec![], m, 0) + }); + } // Combined tree tests #[allow(clippy::type_complexity)] fn new_combined_tree( From a7bb8bb749214fafd8b87e3eaf9cfd1b6a07a4fa Mon Sep 17 00:00:00 2001 From: Kris Nuttycombe Date: Fri, 13 Jan 2023 08:40:57 -0700 Subject: [PATCH 08/16] Add `shardtree` batch insertion. --- shardtree/src/lib.rs | 83 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/shardtree/src/lib.rs b/shardtree/src/lib.rs index ab7e73f..6840151 100644 --- a/shardtree/src/lib.rs +++ b/shardtree/src/lib.rs @@ -1713,6 +1713,89 @@ impl< Ok(()) } + /// Put a range of values into the subtree to fill leaves starting from the given position. + /// + /// This operation will pad the tree until it contains enough subtrees to reach the starting + /// position. It will fully consume the provided iterator, constructing successive subtrees + /// until no more values are available. It aggressively prunes the tree as it goes, retaining + /// only nodes that either have [`MARKED`] retention, are required to construct a witness for + /// such marked nodes, or that must be retained in order to make it possible to truncate the + /// tree to any position with [`CHECKPOINT`] retention. + /// + /// This operation returns the final position at which a leaf was inserted, and the vector of + /// [`IncompleteAt`] values that identify addresses at which [`Node::Nil`] nodes were + /// introduced to the tree, as well as whether or not those newly introduced nodes will need to + /// be filled with values in order to produce witnesses for inserted leaves with [`MARKED`] + /// retention. + #[allow(clippy::type_complexity)] + pub fn batch_insert)>>( + &mut self, + mut start: Position, + values: I, + ) -> Result)>, InsertionError> { + let mut values = values.peekable(); + let mut subtree_root_addr = Address::above_position(Self::subtree_level(), start); + let mut max_insert_position = None; + let mut all_incomplete = vec![]; + loop { + if values.peek().is_some() { + let empty = LocatedTree::empty(subtree_root_addr); + let mut res = self + .store + .get_shard(subtree_root_addr) + .unwrap_or(&empty) + .batch_insert(start, values)? + .expect( + "Iterator containing leaf values to insert was verified to be nonempty.", + ); + self.store + .put_shard(res.subtree) + .map_err(InsertionError::Storage)?; + for (id, position) in res.checkpoints.into_iter() { + self.checkpoints + .insert(id, Checkpoint::at_position(position)); + } + + values = res.remainder; + subtree_root_addr = subtree_root_addr.next_at_level(); + max_insert_position = res.max_insert_position; + start = max_insert_position.unwrap() + 1; + all_incomplete.append(&mut res.incomplete); + } else { + break; + } + } + + self.prune_excess_checkpoints() + .map_err(InsertionError::Storage)?; + + Ok(max_insert_position.map(|p| (p, all_incomplete))) + } + + /// Insert a tree by decomposing it into its [`SHARD_HEIGHT`] or smaller parts (if necessary) + /// and inserting those at their appropriate locations. + pub fn insert_tree( + &mut self, + tree: LocatedPrunableTree, + ) -> Result, InsertionError> { + let mut all_incomplete = vec![]; + for subtree in tree.decompose_to_level(Self::subtree_level()).into_iter() { + let root_addr = subtree.root_addr; + let contains_marked = subtree.root.try_reduce(&contains_marked).is_left(); + let empty = LocatedTree::empty(root_addr); + let (new_subtree, mut incomplete) = self + .store + .get_shard(root_addr) + .unwrap_or(&empty) + .insert_subtree(subtree, contains_marked)?; + self.store + .put_shard(new_subtree) + .map_err(InsertionError::Storage)?; + all_incomplete.append(&mut incomplete); + } + Ok(all_incomplete) + } + /// Adds a checkpoint at the rightmost leaf state of the tree. pub fn checkpoint(&mut self, checkpoint_id: C) -> bool { fn go( From 0cb1cec21fd5d60c4b38ebf88fa25954863feb6b Mon Sep 17 00:00:00 2001 From: Kris Nuttycombe Date: Fri, 13 Jan 2023 08:40:57 -0700 Subject: [PATCH 09/16] Add `shardtree` witness operation & implement property tests. --- incrementalmerkletree/src/testing.rs | 235 +++++++++++--- .../src/testing/complete_tree.rs | 2 +- shardtree/proptest-regressions/lib.txt | 18 ++ shardtree/src/lib.rs | 287 +++++++++++++++++- 4 files changed, 489 insertions(+), 53 deletions(-) create mode 100644 shardtree/proptest-regressions/lib.txt diff --git a/incrementalmerkletree/src/testing.rs b/incrementalmerkletree/src/testing.rs index 8435157..51ed920 100644 --- a/incrementalmerkletree/src/testing.rs +++ b/incrementalmerkletree/src/testing.rs @@ -541,7 +541,7 @@ pub fn check_witnesses + std::fmt::Debug, F: Fn(usize) -> ]) ); - tree.append("b".to_string(), Retention::Ephemeral); + tree.append("b".to_string(), Ephemeral); assert_eq!( tree.witness(0.into(), 0), Some(vec![ @@ -552,7 +552,7 @@ pub fn check_witnesses + std::fmt::Debug, F: Fn(usize) -> ]) ); - tree.append("c".to_string(), Retention::Marked); + tree.append("c".to_string(), Marked); assert_eq!( tree.witness(Position::from(2), 0), Some(vec![ @@ -563,7 +563,7 @@ pub fn check_witnesses + std::fmt::Debug, F: Fn(usize) -> ]) ); - tree.append("d".to_string(), Retention::Ephemeral); + tree.append("d".to_string(), Ephemeral); assert_eq!( tree.witness(Position::from(2), 0), Some(vec![ @@ -574,7 +574,7 @@ pub fn check_witnesses + std::fmt::Debug, F: Fn(usize) -> ]) ); - tree.append("e".to_string(), Retention::Ephemeral); + tree.append("e".to_string(), Ephemeral); assert_eq!( tree.witness(Position::from(2), 0), Some(vec![ @@ -586,12 +586,12 @@ pub fn check_witnesses + std::fmt::Debug, F: Fn(usize) -> ); let mut tree = new_tree(100); - tree.append("a".to_string(), Retention::Marked); + tree.append("a".to_string(), Marked); for c in 'b'..'g' { - tree.append(c.to_string(), Retention::Ephemeral); + tree.append(c.to_string(), Ephemeral); } - tree.append("g".to_string(), Retention::Marked); - tree.append("h".to_string(), Retention::Ephemeral); + tree.append("g".to_string(), Marked); + tree.append("h".to_string(), Ephemeral); assert_eq!( tree.witness(0.into(), 0), @@ -604,13 +604,13 @@ pub fn check_witnesses + std::fmt::Debug, F: Fn(usize) -> ); let mut tree = new_tree(100); - tree.append("a".to_string(), Retention::Marked); - tree.append("b".to_string(), Retention::Ephemeral); - tree.append("c".to_string(), Retention::Ephemeral); - tree.append("d".to_string(), Retention::Marked); - tree.append("e".to_string(), Retention::Marked); - tree.append("f".to_string(), Retention::Marked); - tree.append("g".to_string(), Retention::Ephemeral); + tree.append("a".to_string(), Marked); + tree.append("b".to_string(), Ephemeral); + tree.append("c".to_string(), Ephemeral); + tree.append("d".to_string(), Marked); + tree.append("e".to_string(), Marked); + tree.append("f".to_string(), Marked); + tree.append("g".to_string(), Ephemeral); assert_eq!( tree.witness(Position::from(5), 0), @@ -624,10 +624,10 @@ pub fn check_witnesses + std::fmt::Debug, F: Fn(usize) -> let mut tree = new_tree(100); for c in 'a'..'k' { - tree.append(c.to_string(), Retention::Ephemeral); + assert!(tree.append(c.to_string(), Ephemeral)); } - tree.append('k'.to_string(), Retention::Marked); - tree.append('l'.to_string(), Retention::Ephemeral); + assert!(tree.append('k'.to_string(), Marked)); + assert!(tree.append('l'.to_string(), Ephemeral)); assert_eq!( tree.witness(Position::from(10), 0), @@ -642,18 +642,18 @@ pub fn check_witnesses + std::fmt::Debug, F: Fn(usize) -> let mut tree = new_tree(100); assert!(tree.append( 'a'.to_string(), - Retention::Checkpoint { + Checkpoint { id: 1, is_marked: true } )); assert!(tree.rewind()); for c in 'b'..'e' { - tree.append(c.to_string(), Retention::Ephemeral); + tree.append(c.to_string(), Ephemeral); } - tree.append("e".to_string(), Retention::Marked); + tree.append("e".to_string(), Marked); for c in 'f'..'i' { - tree.append(c.to_string(), Retention::Ephemeral); + tree.append(c.to_string(), Ephemeral); } assert_eq!( tree.witness(0.into(), 0), @@ -666,20 +666,20 @@ pub fn check_witnesses + std::fmt::Debug, F: Fn(usize) -> ); let mut tree = new_tree(100); - tree.append('a'.to_string(), Retention::Ephemeral); - tree.append('b'.to_string(), Retention::Ephemeral); - tree.append('c'.to_string(), Retention::Marked); - tree.append('d'.to_string(), Retention::Ephemeral); - tree.append('e'.to_string(), Retention::Ephemeral); - tree.append('f'.to_string(), Retention::Ephemeral); + tree.append('a'.to_string(), Ephemeral); + tree.append('b'.to_string(), Ephemeral); + tree.append('c'.to_string(), Marked); + tree.append('d'.to_string(), Ephemeral); + tree.append('e'.to_string(), Ephemeral); + tree.append('f'.to_string(), Ephemeral); assert!(tree.append( 'g'.to_string(), - Retention::Checkpoint { + Checkpoint { id: 1, is_marked: true } )); - tree.append('h'.to_string(), Retention::Ephemeral); + tree.append('h'.to_string(), Ephemeral); assert!(tree.rewind()); assert_eq!( tree.witness(Position::from(2), 0), @@ -692,18 +692,18 @@ pub fn check_witnesses + std::fmt::Debug, F: Fn(usize) -> ); let mut tree = new_tree(100); - tree.append('a'.to_string(), Retention::Ephemeral); - tree.append('b'.to_string(), Retention::Marked); + tree.append('a'.to_string(), Ephemeral); + tree.append('b'.to_string(), Marked); assert_eq!(tree.witness(Position::from(0), 0), None); let mut tree = new_tree(100); for c in 'a'..'m' { - tree.append(c.to_string(), Retention::Ephemeral); + tree.append(c.to_string(), Ephemeral); } - tree.append('m'.to_string(), Retention::Marked); - tree.append('n'.to_string(), Retention::Marked); - tree.append('o'.to_string(), Retention::Ephemeral); - tree.append('p'.to_string(), Retention::Ephemeral); + tree.append('m'.to_string(), Marked); + tree.append('n'.to_string(), Marked); + tree.append('o'.to_string(), Ephemeral); + tree.append('p'.to_string(), Ephemeral); assert_eq!( tree.witness(Position::from(12), 0), @@ -717,9 +717,9 @@ pub fn check_witnesses + std::fmt::Debug, F: Fn(usize) -> let ops = ('a'..='l') .into_iter() - .map(|c| Append(c.to_string(), Retention::Marked)) - .chain(Some(Append('m'.to_string(), Retention::Ephemeral))) - .chain(Some(Append('n'.to_string(), Retention::Ephemeral))) + .map(|c| Append(c.to_string(), Marked)) + .chain(Some(Append('m'.to_string(), Ephemeral))) + .chain(Some(Append('n'.to_string(), Ephemeral))) .chain(Some(Witness(11usize.into(), 0))) .collect::>(); @@ -736,6 +736,153 @@ pub fn check_witnesses + std::fmt::Debug, F: Fn(usize) -> ] )) ); + + let ops = vec![ + Append("a".to_string(), Ephemeral), + Append("b".to_string(), Ephemeral), + Append("c".to_string(), Ephemeral), + Append( + "d".to_string(), + Checkpoint { + id: 1, + is_marked: true, + }, + ), + Append("e".to_string(), Marked), + Operation::Checkpoint(2), + Append( + "f".to_string(), + Checkpoint { + id: 3, + is_marked: false, + }, + ), + Append( + "g".to_string(), + Checkpoint { + id: 4, + is_marked: false, + }, + ), + Append( + "h".to_string(), + Checkpoint { + id: 5, + is_marked: false, + }, + ), + Witness(3usize.into(), 5), + ]; + let mut tree = new_tree(100); + assert_eq!( + Operation::apply_all(&ops, &mut tree), + Some(( + Position::from(3), + vec![ + "c".to_string(), + "ab".to_string(), + "____".to_string(), + "________".to_string() + ] + )) + ); + let ops = vec![ + Append("a".to_string(), Ephemeral), + Append("a".to_string(), Ephemeral), + Append("a".to_string(), Ephemeral), + Append( + "a".to_string(), + Checkpoint { + id: 1, + is_marked: true, + }, + ), + Append("a".to_string(), Ephemeral), + Append("a".to_string(), Ephemeral), + Append("a".to_string(), Ephemeral), + Append( + "a".to_string(), + Checkpoint { + id: 2, + is_marked: false, + }, + ), + Append("a".to_string(), Ephemeral), + Append("a".to_string(), Ephemeral), + Witness(Position(3), 1), + ]; + let mut tree = new_tree(100); + assert_eq!( + Operation::apply_all(&ops, &mut tree), + Some(( + Position::from(3), + vec![ + "a".to_string(), + "aa".to_string(), + "aaaa".to_string(), + "________".to_string() + ] + )) + ); + + let ops = vec![ + Append("a".to_string(), Marked), + Append("a".to_string(), Ephemeral), + Append("a".to_string(), Ephemeral), + Append("a".to_string(), Ephemeral), + Append("a".to_string(), Ephemeral), + Append("a".to_string(), Ephemeral), + Append("a".to_string(), Ephemeral), + Operation::Checkpoint(1), + Append("a".to_string(), Marked), + Operation::Checkpoint(2), + Operation::Checkpoint(3), + Append( + "a".to_string(), + Checkpoint { + id: 4, + is_marked: false, + }, + ), + Rewind, + Rewind, + Witness(Position(7), 2), + ]; + let mut tree = new_tree(100); + assert_eq!(Operation::apply_all(&ops, &mut tree), None); + + let ops = vec![ + Append("a".to_string(), Marked), + Append("a".to_string(), Ephemeral), + Append( + "a".to_string(), + Checkpoint { + id: 1, + is_marked: true, + }, + ), + Append( + "a".to_string(), + Checkpoint { + id: 4, + is_marked: false, + }, + ), + Witness(Position(2), 2), + ]; + let mut tree = new_tree(100); + assert_eq!( + Operation::apply_all(&ops, &mut tree), + Some(( + Position::from(2), + vec![ + "_".to_string(), + "aa".to_string(), + "____".to_string(), + "________".to_string() + ] + )) + ); } pub fn check_checkpoint_rewind, F: Fn(usize) -> T>(new_tree: F) { @@ -821,15 +968,15 @@ pub fn check_rewind_remove_mark, F: Fn(usize) -> T>(new_t // use a maximum number of checkpoints of 1 let mut tree = new_tree(1); - tree.append("e".to_string(), Retention::Marked); - tree.checkpoint(1); + assert!(tree.append("e".to_string(), Retention::Marked)); + assert!(tree.checkpoint(1)); assert!(tree.marked_positions().contains(&0usize.into())); - tree.append("f".to_string(), Retention::Ephemeral); + assert!(tree.append("f".to_string(), Retention::Ephemeral)); // simulate a spend of `e` at `f` assert!(tree.remove_mark(0usize.into())); // even though the mark has been staged for removal, it's not gone yet assert!(tree.marked_positions().contains(&0usize.into())); - tree.checkpoint(2); + assert!(tree.checkpoint(2)); // the newest checkpoint will have caused the oldest to roll off, and // so the forgotten node will be unmarked assert!(!tree.marked_positions().contains(&0usize.into())); diff --git a/incrementalmerkletree/src/testing/complete_tree.rs b/incrementalmerkletree/src/testing/complete_tree.rs index 53a2989..389883d 100644 --- a/incrementalmerkletree/src/testing/complete_tree.rs +++ b/incrementalmerkletree/src/testing/complete_tree.rs @@ -380,7 +380,7 @@ mod tests { } #[test] - fn witness() { + fn witnesses() { check_witnesses(|max_checkpoints| { CompleteTree::::new(max_checkpoints, 0) }); diff --git a/shardtree/proptest-regressions/lib.txt b/shardtree/proptest-regressions/lib.txt new file mode 100644 index 0000000..4054830 --- /dev/null +++ b/shardtree/proptest-regressions/lib.txt @@ -0,0 +1,18 @@ +# Seeds for failure cases proptest has generated in the past. It is +# automatically read and these particular cases re-run before any +# novel cases are generated. +# +# It is recommended to check this file in to source control so that +# everyone who runs the test benefits from these saved cases. +cc 38b4ca3c029291dfe2a6b5907c33a2e8ae7900f19c759c5db74b616ab19f6c5c # shrinks to ops = [Append(SipHashable(0), MC), Rewind, Rewind] +cc f1a96f73b9f3ba2a2e4b5271037322150450c573b6c3a5ef34f71a540ff0fad2 # shrinks to ops = [Append("a", C), Rewind, Rewind] +cc ad5f5d4276adea6e928376c6dc8c013745e60a8b507e6fa4e716f6c35477fe65 # shrinks to ops = [Append("a", E), Append("a", E), Append("a", E), Append("a", E), Append("a", E), Append("a", E), Append("a", E), Append("a", E), Append("a", E)] +cc ab2690ff3cf593d2e7a78b2f56d76699e525391b87852bbf15315a1f36742f48 # shrinks to ops = [Append(SipHashable(0), C), Append(SipHashable(0), E), Append(SipHashable(0), E), Unmark(Position(0))] +cc 1603359084b0c614a2ff9008036a5e10db9f32fb31b3333e59aaa517686e174d # shrinks to ops = [Append(SipHashable(0), Checkpoint { id: (), is_marked: false })] +cc faaf929be4b27e652712b705e297bfe15c53767102516e56882d177ac6fc58d9 # shrinks to ops = [CurrentPosition, Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Checkpoint { id: (), is_marked: true }), Append("a", Marked), Checkpoint(()), Append("a", Checkpoint { id: (), is_marked: false }), Append("a", Checkpoint { id: (), is_marked: false }), Append("a", Checkpoint { id: (), is_marked: false }), Witness(Position(3), 5)] +cc 8836c27ead7afb8d10092bdaeed33eb31007eaa47e3c3fca248cc00fbce772a3 # shrinks to ops = [Append(SipHashable(0), Ephemeral), Append(SipHashable(0), Ephemeral), Append(SipHashable(0), Ephemeral), Append(SipHashable(0), Ephemeral), Append(SipHashable(0), Ephemeral), Append(SipHashable(0), Ephemeral), Append(SipHashable(0), Ephemeral), Append(SipHashable(0), Ephemeral), Append(SipHashable(0), Ephemeral), Append(SipHashable(0), Ephemeral), Append(SipHashable(0), Ephemeral), Append(SipHashable(0), Ephemeral), Append(SipHashable(0), Ephemeral), Append(SipHashable(0), Ephemeral), Append(SipHashable(0), Ephemeral), Append(SipHashable(0), Ephemeral), Append(SipHashable(0), Ephemeral), Append(SipHashable(0), Ephemeral)] +cc 2303f82f255c60d7bdd45e2d13ff526bdc1d7f5ce846a7c07b38ab7f255c0300 # shrinks to ops = [Append("a", Marked), Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Ephemeral)] +cc cf1a33ef6df58bbf7cc199b8b1879c3a078e7784aa3edc0aba9ca03772bea5f2 # shrinks to ops = [Append(SipHashable(0), Checkpoint { id: (), is_marked: false }), Append(SipHashable(0), Checkpoint { id: (), is_marked: false }), Append(SipHashable(0), Checkpoint { id: (), is_marked: false }), Checkpoint(()), Append(SipHashable(0), Checkpoint { id: (), is_marked: false }), Checkpoint(()), Rewind, Rewind, Rewind, Append(SipHashable(0), Ephemeral), Append(SipHashable(0), Checkpoint { id: (), is_marked: false }), Append(SipHashable(0), Checkpoint { id: (), is_marked: false }), Append(SipHashable(0), Ephemeral), Append(SipHashable(0), Ephemeral), Append(SipHashable(0), Ephemeral), Append(SipHashable(0), Checkpoint { id: (), is_marked: true }), Witness(Position(8), 2)] +cc 544e027d994eaf7f97b1c8d9ee7b35522a64a610b1430d56d74ec947018b759d # shrinks to ops = [Append("a", Marked), Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Ephemeral), Checkpoint(()), Append("a", Marked), Checkpoint(()), Checkpoint(()), Append("a", Checkpoint { id: (), is_marked: false }), Rewind, Rewind, Witness(Position(7), 2)] +cc 55d00b68a0f0a02f83ab53f18a29d16d0233153b69a01414a1622104e0eead31 # shrinks to ops = [Append("a", Marked), Append("a", Checkpoint { id: (), is_marked: false }), Append("a", Marked), Checkpoint(()), Checkpoint(()), Checkpoint(()), Append("a", Checkpoint { id: (), is_marked: false }), Append("a", Checkpoint { id: (), is_marked: false }), Witness(Position(0), 7)] +cc 9dd966ff1ab66965c5b84153ae13f684258560cdd5e84c7deb24f724cb12aba7 # shrinks to ops = [Append("a", Marked), Append("a", Ephemeral), Append("a", Checkpoint { id: (), is_marked: true }), Checkpoint(()), Append("a", Checkpoint { id: (), is_marked: false }), Rewind, Rewind, Append("a", Checkpoint { id: (), is_marked: false }), Append("a", Checkpoint { id: (), is_marked: false }), Checkpoint(()), Witness(Position(2), 4)] diff --git a/shardtree/src/lib.rs b/shardtree/src/lib.rs index 6840151..b42f91e 100644 --- a/shardtree/src/lib.rs +++ b/shardtree/src/lib.rs @@ -1951,6 +1951,61 @@ impl< Ok(()) } + /// Truncates the tree, discarding all information after the checkpoint at the specified depth. + /// + /// This will also discard all checkpoints with depth <= the specified depth. Returns `true` + /// if the truncation succeeds or has no effect, or `false` if no checkpoint exists at the + /// specified depth. + pub fn truncate_removing_checkpoint(&mut self, checkpoint_depth: usize) -> bool { + if checkpoint_depth == 0 { + true + } else if self.checkpoints.len() > 1 { + match self.checkpoint_at_depth(checkpoint_depth) { + Some((checkpoint_id, c)) => { + let checkpoint_id = checkpoint_id.clone(); + match c.tree_state { + TreeState::Empty => { + if (self + .store + .truncate(Address::from_parts(Self::subtree_level(), 0))) + .is_err() + { + return false; + } + self.checkpoints.split_off(&checkpoint_id); + true + } + TreeState::AtPosition(position) => { + let subtree_addr = + Address::above_position(Self::subtree_level(), position); + let replacement = self + .store + .get_shard(subtree_addr) + .and_then(|s| s.truncate_to_position(position)); + match replacement { + Some(truncated) => { + if self.store.truncate(subtree_addr).is_err() + || self.store.put_shard(truncated).is_err() + { + false + } else { + self.checkpoints.split_off(&checkpoint_id); + true + } + } + None => false, + } + } + } + } + None => false, + } + } else { + // do not remove the first checkpoint. + false + } + } + /// Computes the root of any subtree of this tree rooted at the given address, with the overall /// tree truncated to the specified position. /// @@ -2109,6 +2164,68 @@ impl< |pos| self.root(Self::root_addr(), pos + 1), ) } + + /// Computes the witness for the leaf at the specified position. + /// + /// Returns the witness as of the most recently appended leaf if `checkpoint_depth == 0`. Note + /// that if the most recently appended leaf is also a checkpoint, this will return the same + /// result as `checkpoint_depth == 1`. + pub fn witness( + &self, + position: Position, + checkpoint_depth: usize, + ) -> Result, QueryError> { + let max_leaf_position = self + .max_leaf_position(checkpoint_depth) + .and_then(|v| v.ok_or_else(|| QueryError::TreeIncomplete(vec![Self::root_addr()])))?; + + if position > max_leaf_position { + Err(QueryError::NotContained(Address::from_parts( + Level::from(0), + position.into(), + ))) + } else { + let subtree_addr = Address::above_position(Self::subtree_level(), position); + + // compute the witness for the specified position up to the subtree root + let mut witness = self.store.get_shard(subtree_addr).map_or_else( + || Err(QueryError::TreeIncomplete(vec![subtree_addr])), + |subtree| subtree.witness(position, max_leaf_position + 1), + )?; + + // compute the remaining parts of the witness up to the root + let root_addr = Self::root_addr(); + let mut cur_addr = subtree_addr; + while cur_addr != root_addr { + witness.push(self.root(cur_addr.sibling(), max_leaf_position + 1)?); + cur_addr = cur_addr.parent(); + } + + Ok(witness) + } + } + + /// Make a marked leaf at a position eligible to be pruned. + /// + /// If the checkpoint associated with the specified identifier does not exist because the + /// corresponding checkpoint would have been more than `max_checkpoints` deep, the removal + /// is recorded as of the first existing checkpoint and the associated leaves will be pruned + /// when that checkpoint is subsequently removed. + pub fn remove_mark(&mut self, position: Position, as_of_checkpoint: &C) -> bool { + if self.get_marked_leaf(position).is_some() { + if let Some(checkpoint) = self.checkpoints.get_mut(as_of_checkpoint) { + checkpoint.marks_removed.insert(position); + return true; + } + + if let Some((_, checkpoint)) = self.checkpoints.iter_mut().next() { + checkpoint.marks_removed.insert(position); + return true; + } + } + + false + } } // We need an applicative functor for Result for this function so that we can correctly @@ -2174,16 +2291,20 @@ pub mod testing { #[cfg(test)] mod tests { use crate::{ - LocatedPrunableTree, LocatedTree, Node, PrunableTree, QueryError, ShardStore, ShardTree, - Tree, EPHEMERAL, MARKED, + IncompleteAt, LocatedPrunableTree, LocatedTree, Node, PrunableTree, QueryError, ShardStore, + ShardTree, Tree, EPHEMERAL, MARKED, }; + use assert_matches::assert_matches; use core::convert::Infallible; use incrementalmerkletree::{ testing::{ - self, check_append, check_root_hashes, complete_tree::CompleteTree, CombinedTree, + self, arb_operation, check_append, check_checkpoint_rewind, check_operations, + check_rewind_remove_mark, check_root_hashes, check_witnesses, + complete_tree::CompleteTree, CombinedTree, SipHashable, }, Address, Hashable, Level, Position, Retention, }; + use proptest::prelude::*; use std::collections::BTreeSet; use std::rc::Rc; @@ -2495,6 +2616,97 @@ mod tests { assert_eq!(complete.subtree.right_filled_root(), Ok("abcd".to_string())); } + #[test] + fn shardtree_insertion() { + let mut tree: ShardTree, 4, 3> = + ShardTree::new(vec![], 100, 0); + assert_matches!( + tree.batch_insert( + Position::from(1), + vec![ + ("b".to_string(), Retention::Checkpoint { id: 1, is_marked: false }), + ("c".to_string(), Retention::Ephemeral), + ("d".to_string(), Retention::Marked), + ].into_iter() + ), + Ok(Some((pos, incomplete))) if + pos == Position::from(3) && + incomplete == vec![ + IncompleteAt { + address: Address::from_parts(Level::from(0), 0), + required_for_witness: true + } + ] + ); + + assert_matches!( + tree.root_at_checkpoint(1), + Err(QueryError::TreeIncomplete(v)) if v == vec![Address::from_parts(Level::from(0), 0)] + ); + + assert_matches!( + tree.batch_insert( + Position::from(0), + vec![ + ("a".to_string(), Retention::Ephemeral), + ].into_iter() + ), + Ok(Some((pos, incomplete))) if + pos == Position::from(0) && + incomplete == vec![] + ); + + assert_matches!( + tree.root_at_checkpoint(0), + Ok(h) if h == *"abcd____________" + ); + + assert_matches!( + tree.root_at_checkpoint(1), + Ok(h) if h == *"ab______________" + ); + + assert_matches!( + tree.batch_insert( + Position::from(10), + vec![ + ("k".to_string(), Retention::Ephemeral), + ("l".to_string(), Retention::Checkpoint { id: 2, is_marked: false }), + ("m".to_string(), Retention::Ephemeral), + ].into_iter() + ), + Ok(Some((pos, incomplete))) if + pos == Position::from(12) && + incomplete == vec![ + IncompleteAt { + address: Address::from_parts(Level::from(1), 4), + required_for_witness: false + }, + IncompleteAt { + address: Address::from_parts(Level::from(0), 13), + required_for_witness: false + }, + IncompleteAt { + address: Address::from_parts(Level::from(1), 7), + required_for_witness: false + }, + ] + ); + + assert_matches!( + tree.root_at_checkpoint(0), + // The (0, 13) and (1, 7) incomplete subtrees are + // not considered incomplete here because they appear + // at the tip of the tree. + Err(QueryError::TreeIncomplete(xs)) if xs == vec![ + Address::from_parts(Level::from(2), 1), + Address::from_parts(Level::from(1), 4), + ] + ); + + assert!(tree.truncate_removing_checkpoint(1)); + } + impl< H: Hashable + Ord + Clone, C: Clone + Ord + core::fmt::Debug, @@ -2527,12 +2739,16 @@ mod tests { ShardTree::root_at_checkpoint(self, checkpoint_depth).ok() } - fn witness(&self, _position: Position, _checkpoint_depth: usize) -> Option> { - todo!() + fn witness(&self, position: Position, checkpoint_depth: usize) -> Option> { + ShardTree::witness(self, position, checkpoint_depth).ok() } - fn remove_mark(&mut self, _position: Position) -> bool { - todo!() + fn remove_mark(&mut self, position: Position) -> bool { + if let Some(c) = self.checkpoints.iter().rev().map(|(c, _)| c.clone()).next() { + ShardTree::remove_mark(self, position, &c) + } else { + false + } } fn checkpoint(&mut self, checkpoint_id: C) -> bool { @@ -2540,7 +2756,7 @@ mod tests { } fn rewind(&mut self) -> bool { - todo!() + ShardTree::truncate_removing_checkpoint(self, 1) } } @@ -2557,6 +2773,28 @@ mod tests { ShardTree::, 4, 3>::new(vec![], m, 0) }); } + + #[test] + fn witnesses() { + check_witnesses(|m| { + ShardTree::, 4, 3>::new(vec![], m, 0) + }); + } + + #[test] + fn checkpoint_rewind() { + check_checkpoint_rewind(|m| { + ShardTree::, 4, 3>::new(vec![], m, 0) + }); + } + + #[test] + fn rewind_remove_mark() { + check_rewind_remove_mark(|m| { + ShardTree::, 4, 3>::new(vec![], m, 0) + }); + } + // Combined tree tests #[allow(clippy::type_complexity)] fn new_combined_tree( @@ -2577,4 +2815,37 @@ mod tests { fn combined_append() { check_append(new_combined_tree); } + + #[test] + fn combined_rewind_remove_mark() { + check_rewind_remove_mark(new_combined_tree); + } + + proptest! { + #![proptest_config(ProptestConfig::with_cases(100000))] + + #[test] + fn check_randomized_u64_ops( + ops in proptest::collection::vec( + arb_operation((0..32u64).prop_map(SipHashable), 0usize..100), + 1..100 + ) + ) { + let tree = new_combined_tree(100); + let indexed_ops = ops.iter().enumerate().map(|(i, op)| op.map_checkpoint_id(|_| i)).collect::>(); + check_operations(tree, &indexed_ops)?; + } + + #[test] + fn check_randomized_str_ops( + ops in proptest::collection::vec( + arb_operation((97u8..123).prop_map(|c| char::from(c).to_string()), 0usize..100), + 1..100 + ) + ) { + let tree = new_combined_tree(100); + let indexed_ops = ops.iter().enumerate().map(|(i, op)| op.map_checkpoint_id(|_| i)).collect::>(); + check_operations(tree, &indexed_ops)?; + } + } } From fa7c6673fd18287789925f651cdeea473a7b2193 Mon Sep 17 00:00:00 2001 From: Kris Nuttycombe Date: Mon, 6 Feb 2023 16:04:16 -0700 Subject: [PATCH 10/16] Make `LocatedTree` and `LocatedPrunableTree` type aliases public. --- shardtree/src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/shardtree/src/lib.rs b/shardtree/src/lib.rs index b42f91e..4b4e56a 100644 --- a/shardtree/src/lib.rs +++ b/shardtree/src/lib.rs @@ -267,7 +267,7 @@ impl Tree { } } -type PrunableTree = Tree>, (H, RetentionFlags)>; +pub type PrunableTree = Tree>, (H, RetentionFlags)>; impl PrunableTree { /// Returns the the value if this is a leaf. @@ -690,7 +690,7 @@ impl LocatedTree { } } -type LocatedPrunableTree = LocatedTree>, (H, RetentionFlags)>; +pub type LocatedPrunableTree = LocatedTree>, (H, RetentionFlags)>; /// A data structure describing the nature of a [`Node::Nil`] node in the tree that was introduced /// as the consequence of an insertion. From 37be939c0f0d5e31058e500253de110ba3030504 Mon Sep 17 00:00:00 2001 From: Kris Nuttycombe Date: Thu, 9 Mar 2023 13:36:53 -0700 Subject: [PATCH 11/16] Apply suggestions from code review Co-authored-by: ebfull --- shardtree/src/lib.rs | 29 ++++++++--------------------- 1 file changed, 8 insertions(+), 21 deletions(-) diff --git a/shardtree/src/lib.rs b/shardtree/src/lib.rs index 4b4e56a..ca5a23f 100644 --- a/shardtree/src/lib.rs +++ b/shardtree/src/lib.rs @@ -409,8 +409,7 @@ impl PrunableTree { // a starting valid fill point that is outside the range of leaf positions. let no_default_fill = addr.position_range_end(); match (t0, t1) { - (Tree(Node::Nil), other) => Ok(other), - (other, Tree(Node::Nil)) => Ok(other), + (Tree(Node::Nil), other) | (other, Tree(Node::Nil)) => Ok(other), (Tree(Node::Leaf { value: vl }), Tree(Node::Leaf { value: vr })) => { if vl == vr { Ok(Tree(Node::Leaf { value: vl })) @@ -418,20 +417,8 @@ impl PrunableTree { Err(addr) } } - (Tree(Node::Leaf { value }), parent) => { - // `parent` is statically known to be a `Node::Parent` - if parent - .root_hash(addr, no_default_fill) - .iter() - .all(|r| r == &value.0) - { - Ok(parent.reannotate_root(Some(Rc::new(value.0)))) - } else { - Err(addr) - } - } - (parent, Tree(Node::Leaf { value })) => { - // `parent` is statically known to be a `Node::Parent` + (Tree(Node::Leaf { value }), parent @ Tree(Node::Parent { .. })) + | (parent @ Tree(Node::Parent { .. }), Tree(Node::Leaf { value })) => { if parent .root_hash(addr, no_default_fill) .iter() @@ -818,7 +805,7 @@ impl LocatedPrunableTree { /// Compute the witness for the leaf at the specified position. /// /// This tree will be truncated to the `truncate_at` position, and then empty - /// empty roots corresponding to later positions will be filled by [`H::empty_root`]. + /// roots corresponding to later positions will be filled by [`H::empty_root`]. /// /// Returns either the witness for the leaf at the specified position, or an error that /// describes the causes of failure. @@ -871,8 +858,8 @@ impl LocatedPrunableTree { } } else if left.is_marked_leaf() { // If we have the left-hand leaf and the right-hand leaf is empty, we - // can fill it with the empty leaf, but only if `fill_start` is None or - // it is located at `position + 1`. + // can fill it with the empty leaf, but only if we are truncating at + // a position to the left of the current position if truncate_at <= position + 1 { Ok(vec![H::empty_leaf()]) } else { @@ -1559,9 +1546,9 @@ pub struct ShardTree, const DEPTH: u8, const SHARD_H max_checkpoints: usize, /// A map from position to the count of checkpoints at this position. checkpoints: BTreeMap, - // /// A tree that is used to cache the known roots of subtrees in the "cap" of nodes between + // /// TODO: Add a tree that is used to cache the known roots of subtrees in the "cap" of nodes between // /// `SHARD_HEIGHT` and `DEPTH` that are otherwise not directly represented in the tree. This - // /// cache is automatically updated when computing roots and witnesses. Leaf nodes are empty + // /// cache will be automatically updated when computing roots and witnesses. Leaf nodes are empty // /// because the annotation slot is consistently used to store the subtree hashes at each node. // cap_cache: Tree>, ()> _hash_type: PhantomData, From 664cead68b48383eb2f94123ead326dc9744d24d Mon Sep 17 00:00:00 2001 From: Kris Nuttycombe Date: Thu, 9 Mar 2023 13:51:56 -0700 Subject: [PATCH 12/16] Use `bitflags` crate instead of hand-rolled `RetentionFlags` bit flags. --- shardtree/Cargo.toml | 1 + shardtree/src/lib.rs | 159 ++++++++++++++++++------------------------- 2 files changed, 67 insertions(+), 93 deletions(-) diff --git a/shardtree/Cargo.toml b/shardtree/Cargo.toml index e3f1c63..2707268 100644 --- a/shardtree/Cargo.toml +++ b/shardtree/Cargo.toml @@ -12,6 +12,7 @@ repository = "https://github.com/zcash/incrementalmerkletree" categories = ["algorithms", "data-structures"] [dependencies] +bitflags = "1.3" either = "1.8" incrementalmerkletree = { version = "0.3", path = "../incrementalmerkletree" } proptest = { version = "1.0.0", optional = true } diff --git a/shardtree/src/lib.rs b/shardtree/src/lib.rs index ca5a23f..e7cb212 100644 --- a/shardtree/src/lib.rs +++ b/shardtree/src/lib.rs @@ -1,67 +1,52 @@ +use bitflags::bitflags; use core::convert::Infallible; use core::fmt::Debug; use core::marker::PhantomData; -use core::ops::{BitAnd, BitOr, Deref, Not, Range}; +use core::ops::{Deref, Range}; use either::Either; use std::collections::{BTreeMap, BTreeSet}; use std::rc::Rc; use incrementalmerkletree::{Address, Hashable, Level, Position, Retention}; -/// A type for flags that determine when and how leaves can be pruned from a tree. -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] -pub struct RetentionFlags(u8); +bitflags! { + pub struct RetentionFlags: u8 { + /// An leaf with `EPHEMERAL` retention can be pruned as soon as we are certain that it is not part + /// of the witness for a leaf with `CHECKPOINT` or `MARKED` retention. + const EPHEMERAL = 0b00000000; -impl BitOr for RetentionFlags { - type Output = Self; + /// A leaf with `CHECKPOINT` retention can be pruned when there are more than `max_checkpoints` + /// additional checkpoint leaves, if it is not also a marked leaf. + const CHECKPOINT = 0b00000001; - fn bitor(self, rhs: Self) -> Self { - RetentionFlags(self.0 | rhs.0) + /// A leaf with `MARKED` retention can be pruned only as a consequence of an explicit deletion + /// action. + const MARKED = 0b00000010; } } -impl BitAnd for RetentionFlags { - type Output = Self; - - fn bitand(self, rhs: Self) -> Self { - RetentionFlags(self.0 & rhs.0) - } -} - -/// An leaf with `EPHEMERAL` retention can be pruned as soon as we are certain that it is not part -/// of the witness for a leaf with `CHECKPOINT` or `MARKED` retention. -pub static EPHEMERAL: RetentionFlags = RetentionFlags(0b00000000); - -/// A leaf with `CHECKPOINT` retention can be pruned when there are more than `max_checkpoints` -/// additional checkpoint leaves, if it is not also a marked leaf. -pub static CHECKPOINT: RetentionFlags = RetentionFlags(0b00000001); - -/// A leaf with `MARKED` retention can be pruned only as a consequence of an explicit deletion -/// action. -pub static MARKED: RetentionFlags = RetentionFlags(0b00000010); - impl RetentionFlags { pub fn is_checkpoint(&self) -> bool { - (*self & CHECKPOINT) == CHECKPOINT + (*self & RetentionFlags::CHECKPOINT) == RetentionFlags::CHECKPOINT } pub fn is_marked(&self) -> bool { - (*self & MARKED) == MARKED + (*self & RetentionFlags::MARKED) == RetentionFlags::MARKED } } impl<'a, C> From<&'a Retention> for RetentionFlags { fn from(retention: &'a Retention) -> Self { match retention { - Retention::Ephemeral => EPHEMERAL, + Retention::Ephemeral => RetentionFlags::EPHEMERAL, Retention::Checkpoint { is_marked, .. } => { if *is_marked { - CHECKPOINT | MARKED + RetentionFlags::CHECKPOINT | RetentionFlags::MARKED } else { - CHECKPOINT + RetentionFlags::CHECKPOINT } } - Retention::Marked => MARKED, + Retention::Marked => RetentionFlags::MARKED, } } } @@ -72,26 +57,6 @@ impl From> for RetentionFlags { } } -/// A mask that may be used to unset one or more retention flags. -#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] -pub struct RetentionMask(u8); - -impl Not for RetentionFlags { - type Output = RetentionMask; - - fn not(self) -> Self::Output { - RetentionMask(!self.0) - } -} - -impl BitAnd for RetentionFlags { - type Output = Self; - - fn bitand(self, rhs: RetentionMask) -> Self { - RetentionFlags(self.0 & rhs.0) - } -} - /// A "pattern functor" for a single layer of a binary tree. #[derive(Clone, Debug, PartialEq, Eq)] pub enum Node { @@ -483,7 +448,7 @@ impl PrunableTree { // we can prune right-hand leaves that are not marked; if a leaf // is a checkpoint then that information will be propagated to // the replacement leaf - if lv.1 == EPHEMERAL && (rv.1 & MARKED) == EPHEMERAL => + if lv.1 == RetentionFlags::EPHEMERAL && (rv.1 & RetentionFlags::MARKED) == RetentionFlags::EPHEMERAL => { Tree( Node::Leaf { @@ -1639,7 +1604,7 @@ impl< Ok(Some(LocatedTree { root_addr: addr, root: Tree(Node::Leaf { - value: (value, EPHEMERAL), + value: (value, RetentionFlags::EPHEMERAL), }), })) } @@ -1821,7 +1786,7 @@ impl< } Tree(Node::Leaf { value: (h, r) }) => Some(( Tree(Node::Leaf { - value: (h.clone(), *r | CHECKPOINT), + value: (h.clone(), *r | RetentionFlags::CHECKPOINT), }), root_addr.max_position(), )), @@ -1897,10 +1862,10 @@ impl< .and_modify(|to_clear| { to_clear .entry(pos) - .and_modify(|flags| *flags = *flags | CHECKPOINT) - .or_insert(CHECKPOINT); + .and_modify(|flags| *flags = *flags | RetentionFlags::CHECKPOINT) + .or_insert(RetentionFlags::CHECKPOINT); }) - .or_insert_with(|| BTreeMap::from([(pos, CHECKPOINT)])); + .or_insert_with(|| BTreeMap::from([(pos, RetentionFlags::CHECKPOINT)])); } // clear the leaves that have been marked for removal @@ -1911,10 +1876,10 @@ impl< .and_modify(|to_clear| { to_clear .entry(*unmark_pos) - .and_modify(|flags| *flags = *flags | MARKED) - .or_insert(MARKED); + .and_modify(|flags| *flags = *flags | RetentionFlags::MARKED) + .or_insert(RetentionFlags::MARKED); }) - .or_insert_with(|| BTreeMap::from([(*unmark_pos, MARKED)])); + .or_insert_with(|| BTreeMap::from([(*unmark_pos, RetentionFlags::MARKED)])); } } @@ -2241,7 +2206,12 @@ pub mod testing { use proptest::sample::select; pub fn arb_retention_flags() -> impl Strategy { - select(vec![EPHEMERAL, CHECKPOINT, MARKED, MARKED | CHECKPOINT]) + select(vec![ + RetentionFlags::EPHEMERAL, + RetentionFlags::CHECKPOINT, + RetentionFlags::MARKED, + RetentionFlags::MARKED | RetentionFlags::CHECKPOINT, + ]) } pub fn arb_tree( @@ -2278,8 +2248,8 @@ pub mod testing { #[cfg(test)] mod tests { use crate::{ - IncompleteAt, LocatedPrunableTree, LocatedTree, Node, PrunableTree, QueryError, ShardStore, - ShardTree, Tree, EPHEMERAL, MARKED, + IncompleteAt, LocatedPrunableTree, LocatedTree, Node, PrunableTree, QueryError, + RetentionFlags, ShardStore, ShardTree, Tree, }; use assert_matches::assert_matches; use core::convert::Infallible; @@ -2344,8 +2314,8 @@ mod tests { #[test] fn tree_root() { let t: PrunableTree = parent( - leaf(("a".to_string(), EPHEMERAL)), - leaf(("b".to_string(), EPHEMERAL)), + leaf(("a".to_string(), RetentionFlags::EPHEMERAL)), + leaf(("b".to_string(), RetentionFlags::EPHEMERAL)), ); assert_eq!( @@ -2376,8 +2346,8 @@ mod tests { let t: LocatedPrunableTree = LocatedTree { root_addr: Address::from_parts(3.into(), 1), root: parent( - leaf(("abcd".to_string(), EPHEMERAL)), - parent(nil(), leaf(("gh".to_string(), EPHEMERAL))), + leaf(("abcd".to_string(), RetentionFlags::EPHEMERAL)), + parent(nil(), leaf(("gh".to_string(), RetentionFlags::EPHEMERAL))), ), }; @@ -2385,7 +2355,7 @@ mod tests { t.insert_subtree::( LocatedTree { root_addr: Address::from_parts(1.into(), 6), - root: parent(leaf(("e".to_string(), MARKED)), nil()) + root: parent(leaf(("e".to_string(), RetentionFlags::MARKED)), nil()) }, true ), @@ -2393,10 +2363,10 @@ mod tests { LocatedTree { root_addr: Address::from_parts(3.into(), 1), root: parent( - leaf(("abcd".to_string(), EPHEMERAL)), + leaf(("abcd".to_string(), RetentionFlags::EPHEMERAL)), parent( - parent(leaf(("e".to_string(), MARKED)), nil()), - leaf(("gh".to_string(), EPHEMERAL)) + parent(leaf(("e".to_string(), RetentionFlags::MARKED)), nil()), + leaf(("gh".to_string(), RetentionFlags::EPHEMERAL)) ) ) }, @@ -2410,13 +2380,13 @@ mod tests { let t: LocatedPrunableTree = LocatedTree { root_addr: Address::from_parts(3.into(), 0), root: parent( - leaf(("abcd".to_string(), EPHEMERAL)), + leaf(("abcd".to_string(), RetentionFlags::EPHEMERAL)), parent( parent( - leaf(("e".to_string(), MARKED)), - leaf(("f".to_string(), EPHEMERAL)), + leaf(("e".to_string(), RetentionFlags::MARKED)), + leaf(("f".to_string(), RetentionFlags::EPHEMERAL)), ), - leaf(("gh".to_string(), EPHEMERAL)), + leaf(("gh".to_string(), RetentionFlags::EPHEMERAL)), ), ), }; @@ -2449,8 +2419,8 @@ mod tests { #[test] fn tree_marked_positions() { let t: PrunableTree = parent( - leaf(("a".to_string(), EPHEMERAL)), - leaf(("b".to_string(), MARKED)), + leaf(("a".to_string(), RetentionFlags::EPHEMERAL)), + leaf(("b".to_string(), RetentionFlags::MARKED)), ); assert_eq!( t.marked_positions(Address::from_parts(Level::from(1), 0)), @@ -2467,38 +2437,41 @@ mod tests { #[test] fn tree_prune() { let t: PrunableTree = parent( - leaf(("a".to_string(), EPHEMERAL)), - leaf(("b".to_string(), EPHEMERAL)), + leaf(("a".to_string(), RetentionFlags::EPHEMERAL)), + leaf(("b".to_string(), RetentionFlags::EPHEMERAL)), ); assert_eq!( t.clone().prune(Level::from(1)), - leaf(("ab".to_string(), EPHEMERAL)) + leaf(("ab".to_string(), RetentionFlags::EPHEMERAL)) ); - let t0 = parent(leaf(("c".to_string(), MARKED)), t); + let t0 = parent(leaf(("c".to_string(), RetentionFlags::MARKED)), t); assert_eq!( t0.prune(Level::from(2)), parent( - leaf(("c".to_string(), MARKED)), - leaf(("ab".to_string(), EPHEMERAL)) + leaf(("c".to_string(), RetentionFlags::MARKED)), + leaf(("ab".to_string(), RetentionFlags::EPHEMERAL)) ) ); } #[test] fn tree_merge_checked() { - let t0: PrunableTree = parent(leaf(("a".to_string(), EPHEMERAL)), nil()); + let t0: PrunableTree = + parent(leaf(("a".to_string(), RetentionFlags::EPHEMERAL)), nil()); - let t1: PrunableTree = parent(nil(), leaf(("b".to_string(), EPHEMERAL))); + let t1: PrunableTree = + parent(nil(), leaf(("b".to_string(), RetentionFlags::EPHEMERAL))); assert_eq!( t0.clone() .merge_checked(Address::from_parts(1.into(), 0), t1.clone()), - Ok(leaf(("ab".to_string(), EPHEMERAL))) + Ok(leaf(("ab".to_string(), RetentionFlags::EPHEMERAL))) ); - let t2: PrunableTree = parent(leaf(("c".to_string(), EPHEMERAL)), nil()); + let t2: PrunableTree = + parent(leaf(("c".to_string(), RetentionFlags::EPHEMERAL)), nil()); assert_eq!( t0.clone() .merge_checked(Address::from_parts(1.into(), 0), t2.clone()), @@ -2510,7 +2483,7 @@ mod tests { assert_eq!( t3.merge_checked(Address::from_parts(2.into(), 0), t4), - Ok(leaf(("abcb".to_string(), EPHEMERAL))) + Ok(leaf(("abcb".to_string(), RetentionFlags::EPHEMERAL))) ); } @@ -2582,8 +2555,8 @@ mod tests { LocatedPrunableTree { root_addr: Address::from_parts(2.into(), 0), root: parent( - parent(leaf(("a".to_string(), EPHEMERAL)), nil()), - parent(nil(), leaf(("d".to_string(), EPHEMERAL))) + parent(leaf(("a".to_string(), RetentionFlags::EPHEMERAL)), nil()), + parent(nil(), leaf(("d".to_string(), RetentionFlags::EPHEMERAL))) ) } ); From ac6e8e8212869caf029ebcbf30c9852387a225ee Mon Sep 17 00:00:00 2001 From: Kris Nuttycombe Date: Thu, 9 Mar 2023 16:29:24 -0700 Subject: [PATCH 13/16] Use direct recursion in shardtree instead of reduce/try_reduce These more general functions weren't carrying their weight. --- shardtree/Cargo.toml | 6 -- shardtree/benches/shardtree.rs | 86 ---------------------- shardtree/src/lib.rs | 131 +++++++++++---------------------- 3 files changed, 44 insertions(+), 179 deletions(-) delete mode 100644 shardtree/benches/shardtree.rs diff --git a/shardtree/Cargo.toml b/shardtree/Cargo.toml index 2707268..d0c4bba 100644 --- a/shardtree/Cargo.toml +++ b/shardtree/Cargo.toml @@ -29,9 +29,3 @@ test-dependencies = ["proptest"] [target.'cfg(unix)'.dev-dependencies] pprof = { version = "0.9", features = ["criterion", "flamegraph"] } # MSRV 1.56 inferno = ">=0.11, <0.11.5" # MSRV 1.59 - -[[bench]] -name = "shardtree" -harness = false -required-features = ["test-dependencies"] - diff --git a/shardtree/benches/shardtree.rs b/shardtree/benches/shardtree.rs deleted file mode 100644 index ffba295..0000000 --- a/shardtree/benches/shardtree.rs +++ /dev/null @@ -1,86 +0,0 @@ -use criterion::{criterion_group, criterion_main, Criterion}; -use proptest::prelude::*; -use proptest::strategy::ValueTree; -use proptest::test_runner::TestRunner; - -use incrementalmerkletree::Address; -use shardtree::{testing::arb_tree, Node}; - -#[cfg(unix)] -use pprof::criterion::{Output, PProfProfiler}; - -// An algebra for computing the incomplete roots of a tree (the addresses at which nodes are -// `Nil`). This is used for benchmarking to determine the viability of "attribute grammars" for -// when you want to use `reduce` to compute a value that requires information to be passed top-down -// through the tree. -type RootFn = Box Vec
>; -pub fn incomplete_roots(node: Node) -> RootFn { - Box::new(move |addr| match &node { - Node::Parent { left, right, .. } => { - let (left_addr, right_addr) = addr - .children() - .expect("A parent node cannot appear at level 0"); - let mut left_result = left(left_addr); - let mut right_result = right(right_addr); - left_result.append(&mut right_result); - left_result - } - Node::Leaf { .. } => vec![], - Node::Nil { .. } => vec![addr], - }) -} - -pub fn bench_shardtree(c: &mut Criterion) { - { - //let mut group = c.benchmark_group("shardtree-incomplete"); - - let mut runner = TestRunner::deterministic(); - let input = arb_tree(Just(()), any::(), 16, 4096) - .new_tree(&mut runner) - .unwrap() - .current(); - println!( - "Benchmarking with {} leaves.", - input.reduce( - &(|node| match node { - Node::Parent { left, right } => left + right, - Node::Leaf { .. } => 1, - Node::Nil => 0, - }) - ) - ); - - let input_root = Address::from_parts( - input - .reduce( - &(|node| match node { - Node::Parent { left, right } => std::cmp::max(left, right) + 1, - Node::Leaf { .. } => 0, - Node::Nil => 0, - }), - ) - .into(), - 0, - ); - - c.bench_function("direct_recursion", |b| { - b.iter(|| input.incomplete(input_root)) - }); - - c.bench_function("reduce", |b| { - b.iter(|| input.reduce(&incomplete_roots)(input_root)) - }); - } -} - -#[cfg(unix)] -criterion_group! { - name = benches; - config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None))); - targets = bench_shardtree -} - -#[cfg(not(unix))] -criterion_group!(benches, bench_shardtree); - -criterion_main!(benches); diff --git a/shardtree/src/lib.rs b/shardtree/src/lib.rs index e7cb212..817840d 100644 --- a/shardtree/src/lib.rs +++ b/shardtree/src/lib.rs @@ -109,33 +109,19 @@ impl Node { } } -/// An F-algebra for use with [`Tree::reduce`] for determining whether a tree has any `Nil` nodes. -/// -/// Returns `true` if no [`Node::Nil`] nodes are present in the tree. -pub fn is_complete(node: Node) -> bool { - match node { - Node::Parent { left, right, .. } => left && right, - Node::Leaf { .. } => true, - Node::Nil { .. } => false, - } -} - -/// An F-algebra for use with [`Tree::try_reduce`] for determining whether a tree has any `MARKED` nodes. -/// -/// `Tree::try_reduce` is preferred for this operation because it allows us to short-circuit as -/// soon as we find a marked node. Returns [`Either::Left(())`] if a marked node exists, -/// [`Either::Right(())`] otherwise. -pub fn contains_marked(node: Node<(), A, (V, RetentionFlags)>) -> Either<(), ()> { - match node { - Node::Parent { .. } => Either::Right(()), - Node::Leaf { value: (_, r) } => { - if r.is_marked() { - Either::Left(()) - } else { - Either::Right(()) - } +impl<'a, C: Clone, A: Clone, V: Clone> Node { + pub fn cloned(&self) -> Node { + match self { + Node::Parent { ann, left, right } => Node::Parent { + ann: (*ann).clone(), + left: left.clone(), + right: right.clone(), + }, + Node::Leaf { value } => Node::Leaf { + value: (*value).clone(), + }, + Node::Nil => Node::Nil, } - Node::Nil { .. } => Either::Right(()), } } @@ -157,11 +143,22 @@ impl Tree { Tree(self.0.reannotate(ann)) } + /// Returns `true` if no [`Node::Nil`] nodes are present in the tree, `false` otherwise. + pub fn is_complete(&self) -> bool { + match &self.0 { + Node::Parent { left, right, .. } => { + left.as_ref().is_complete() && right.as_ref().is_complete() + } + Node::Leaf { .. } => true, + Node::Nil { .. } => false, + } + } + /// Returns a vector of the addresses of [`Node::Nil`] subtree roots within this tree. /// /// The given address must correspond to the root of this tree, or this method will /// yield incorrect results or may panic. - pub fn incomplete(&self, root_addr: Address) -> Vec
{ + pub fn incomplete_nodes(&self, root_addr: Address) -> Vec
{ match &self.0 { Node::Parent { left, right, .. } => { // We should never construct parent nodes where both children are Nil. @@ -172,8 +169,8 @@ impl Tree { .children() .expect("A parent node cannot appear at level 0"); - let mut left_incomplete = left.incomplete(left_root); - let mut right_incomplete = right.incomplete(right_root); + let mut left_incomplete = left.incomplete_nodes(left_root); + let mut right_incomplete = right.incomplete_nodes(right_root); left_incomplete.append(&mut right_incomplete); left_incomplete } @@ -183,55 +180,6 @@ impl Tree { } } -impl Tree { - /// Folds over the tree from leaf to root with the given function. - /// - /// See [`is_complete`] for an example of a function that can be used with this method. - /// This operation will visit every node of the tree. See [`try_reduce`] for a variant - /// that can perform a depth-first, left-to-right traversal with the option to - /// short-circuit. - pub fn reduce) -> B>(&self, alg: &F) -> B { - match &self.0 { - Node::Parent { ann, left, right } => { - let left_result = left.reduce(alg); - let right_result = right.reduce(alg); - alg(Node::Parent { - ann: ann.clone(), - left: left_result, - right: right_result, - }) - } - Node::Leaf { value } => alg(Node::Leaf { - value: value.clone(), - }), - Node::Nil => alg(Node::Nil), - } - } - - /// Folds over the tree from leaf to root with the given function. - /// - /// This performs a left-to-right, depth-first traversal that halts on the first - /// [`Either::Left`] result, or builds an [`Either::Right`] from the results computed at every - /// node. - pub fn try_reduce) -> Either>(&self, alg: &F) -> Either { - match &self.0 { - Node::Parent { ann, left, right } => left.try_reduce(alg).right_and_then(|l_value| { - right.try_reduce(alg).right_and_then(move |r_value| { - alg(Node::Parent { - ann: ann.clone(), - left: l_value, - right: r_value, - }) - }) - }), - Node::Leaf { value } => alg(Node::Leaf { - value: value.clone(), - }), - Node::Nil => alg(Node::Nil), - } - } -} - pub type PrunableTree = Tree>, (H, RetentionFlags)>; impl PrunableTree { @@ -256,6 +204,15 @@ impl PrunableTree { .map_or(false, |(_, retention)| retention.is_marked()) } + /// Determines whether a tree has any `MARKED` nodes. + pub fn contains_marked(&self) -> bool { + match &self.0 { + Node::Parent { left, right, .. } => left.contains_marked() || right.contains_marked(), + Node::Leaf { value: (_, r) } => r.is_marked(), + Node::Nil => false, + } + } + /// Returns the Merkle root of this tree, given the address of the root node, or /// a vector of the addresses of `Nil` nodes that inhibited the computation of /// such a root. @@ -496,8 +453,8 @@ impl LocatedTree { /// Returns the set of incomplete subtree roots contained within this tree, ordered by /// increasing position. - pub fn incomplete(&self) -> Vec
{ - self.root.incomplete(self.root_addr) + pub fn incomplete_nodes(&self) -> Vec
{ + self.root.incomplete_nodes(self.root_addr) } /// Returns the maximum position at which a non-Nil leaf has been observed in the tree. @@ -1037,7 +994,7 @@ impl LocatedPrunableTree { let LocatedTree { root_addr, root } = self; if root_addr.contains(&subtree.root_addr) { - let complete = subtree.root.reduce(&is_complete); + let complete = subtree.root.is_complete(); go(*root_addr, root, subtree, complete, contains_marked).map(|(root, incomplete)| { ( LocatedTree { @@ -1635,7 +1592,7 @@ impl< let (append_result, position, checkpoint_id) = if let Some(subtree) = self.store.last_shard() { - if subtree.root.reduce(&is_complete) { + if subtree.root.is_complete() { let addr = subtree.root_addr; if addr.index() + 1 >= 0x1 << (SHARD_HEIGHT - 1) { @@ -1733,7 +1690,7 @@ impl< let mut all_incomplete = vec![]; for subtree in tree.decompose_to_level(Self::subtree_level()).into_iter() { let root_addr = subtree.root_addr; - let contains_marked = subtree.root.try_reduce(&contains_marked).is_left(); + let contains_marked = subtree.root.contains_marked(); let empty = LocatedTree::empty(root_addr); let (new_subtree, mut incomplete) = self .store @@ -2288,22 +2245,22 @@ mod tests { } #[test] - fn tree_incomplete() { + fn tree_incomplete_nodes() { let t: Tree<(), String> = parent(nil(), str_leaf("a")); assert_eq!( - t.incomplete(Address::from_parts(Level::from(1), 0)), + t.incomplete_nodes(Address::from_parts(Level::from(1), 0)), vec![Address::from_parts(Level::from(0), 0)] ); let t0 = parent(str_leaf("b"), t.clone()); assert_eq!( - t0.incomplete(Address::from_parts(Level::from(2), 1)), + t0.incomplete_nodes(Address::from_parts(Level::from(2), 1)), vec![Address::from_parts(Level::from(0), 6)] ); let t1 = parent(nil(), t); assert_eq!( - t1.incomplete(Address::from_parts(Level::from(2), 1)), + t1.incomplete_nodes(Address::from_parts(Level::from(2), 1)), vec![ Address::from_parts(Level::from(1), 2), Address::from_parts(Level::from(0), 6) From d7a04122ea5a87e18513ef2be89366ccb566b04f Mon Sep 17 00:00:00 2001 From: Kris Nuttycombe Date: Mon, 13 Mar 2023 11:42:57 -0600 Subject: [PATCH 14/16] Fix clippy complaints. --- incrementalmerkletree/src/lib.rs | 2 +- incrementalmerkletree/src/testing.rs | 1 - incrementalmerkletree/src/testing/complete_tree.rs | 4 ++-- shardtree/src/lib.rs | 4 ++-- 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/incrementalmerkletree/src/lib.rs b/incrementalmerkletree/src/lib.rs index 08382d4..83df365 100644 --- a/incrementalmerkletree/src/lib.rs +++ b/incrementalmerkletree/src/lib.rs @@ -136,7 +136,7 @@ impl Level { // TODO: replace with an instance for `Step` once `step_trait` // is stabilized pub fn iter_to(self, other: Level) -> impl Iterator { - (self.0..other.0).into_iter().map(Level) + (self.0..other.0).map(Level) } } diff --git a/incrementalmerkletree/src/testing.rs b/incrementalmerkletree/src/testing.rs index 51ed920..e34eccb 100644 --- a/incrementalmerkletree/src/testing.rs +++ b/incrementalmerkletree/src/testing.rs @@ -716,7 +716,6 @@ pub fn check_witnesses + std::fmt::Debug, F: Fn(usize) -> ); let ops = ('a'..='l') - .into_iter() .map(|c| Append(c.to_string(), Marked)) .chain(Some(Append('m'.to_string(), Ephemeral))) .chain(Some(Append('n'.to_string(), Ephemeral))) diff --git a/incrementalmerkletree/src/testing/complete_tree.rs b/incrementalmerkletree/src/testing/complete_tree.rs index 389883d..db4cb65 100644 --- a/incrementalmerkletree/src/testing/complete_tree.rs +++ b/incrementalmerkletree/src/testing/complete_tree.rs @@ -342,7 +342,7 @@ mod tests { #[test] fn correct_root() { const DEPTH: u8 = 3; - let values = (0..(1 << DEPTH)).into_iter().map(SipHashable); + let values = (0..(1 << DEPTH)).map(SipHashable); let mut tree = CompleteTree::::new(100, ()); for value in values { @@ -391,7 +391,7 @@ mod tests { use crate::{testing::Tree, Retention}; const DEPTH: u8 = 3; - let values = (0..(1 << DEPTH)).into_iter().map(SipHashable); + let values = (0..(1 << DEPTH)).map(SipHashable); let mut tree = CompleteTree::::new(100, ()); for value in values { diff --git a/shardtree/src/lib.rs b/shardtree/src/lib.rs index 817840d..aa0dcad 100644 --- a/shardtree/src/lib.rs +++ b/shardtree/src/lib.rs @@ -1819,7 +1819,7 @@ impl< .and_modify(|to_clear| { to_clear .entry(pos) - .and_modify(|flags| *flags = *flags | RetentionFlags::CHECKPOINT) + .and_modify(|flags| *flags |= RetentionFlags::CHECKPOINT) .or_insert(RetentionFlags::CHECKPOINT); }) .or_insert_with(|| BTreeMap::from([(pos, RetentionFlags::CHECKPOINT)])); @@ -1833,7 +1833,7 @@ impl< .and_modify(|to_clear| { to_clear .entry(*unmark_pos) - .and_modify(|flags| *flags = *flags | RetentionFlags::MARKED) + .and_modify(|flags| *flags |= RetentionFlags::MARKED) .or_insert(RetentionFlags::MARKED); }) .or_insert_with(|| BTreeMap::from([(*unmark_pos, RetentionFlags::MARKED)])); From 257402db532d49ac8a940e0500177094fcee5b94 Mon Sep 17 00:00:00 2001 From: Kris Nuttycombe Date: Mon, 20 Mar 2023 16:11:07 -0600 Subject: [PATCH 15/16] Address comments from review. --- shardtree/src/lib.rs | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/shardtree/src/lib.rs b/shardtree/src/lib.rs index aa0dcad..5dd267d 100644 --- a/shardtree/src/lib.rs +++ b/shardtree/src/lib.rs @@ -1454,8 +1454,8 @@ impl ShardStore for Vec> { } } -/// A left-dense, sparse binary Merkle tree of the specified depth, represented as a vector of -/// subtrees (shards) of the given maximum height. +/// A sparse binary Merkle tree of the specified depth, represented as an ordered collection of +/// subtrees (shards) of a given maximum height. /// /// This tree maintains a collection of "checkpoints" which represent positions, usually near the /// front of the tree, that are maintained such that it's possible to truncate nodes to the right @@ -1466,7 +1466,7 @@ pub struct ShardTree, const DEPTH: u8, const SHARD_H store: S, /// The maximum number of checkpoints to retain before pruning. max_checkpoints: usize, - /// A map from position to the count of checkpoints at this position. + /// An ordered map from checkpoint identifier to checkpoint. checkpoints: BTreeMap, // /// TODO: Add a tree that is used to cache the known roots of subtrees in the "cap" of nodes between // /// `SHARD_HEIGHT` and `DEPTH` that are otherwise not directly represented in the tree. This @@ -1478,7 +1478,7 @@ pub struct ShardTree, const DEPTH: u8, const SHARD_H impl< H: Hashable + Clone + PartialEq, - C: Clone + Ord + core::fmt::Debug, + C: Clone + Ord, S: ShardStore, const DEPTH: u8, const SHARD_HEIGHT: u8, @@ -1722,7 +1722,7 @@ impl< l_addr.level(), ann.clone(), new_left, - right.as_ref().clone(), + Tree(Node::Nil), ), pos, ) @@ -1811,32 +1811,27 @@ impl< { checkpoints_to_delete.push(cid.clone()); - // clear the checkpoint leaf - if let TreeState::AtPosition(pos) = checkpoint.tree_state { + let mut clear_at = |pos, flags_to_clear| { let subtree_addr = Address::above_position(Self::subtree_level(), pos); clear_positions .entry(subtree_addr) .and_modify(|to_clear| { to_clear .entry(pos) - .and_modify(|flags| *flags |= RetentionFlags::CHECKPOINT) - .or_insert(RetentionFlags::CHECKPOINT); + .and_modify(|flags| *flags |= flags_to_clear) + .or_insert(flags_to_clear); }) - .or_insert_with(|| BTreeMap::from([(pos, RetentionFlags::CHECKPOINT)])); + .or_insert_with(|| BTreeMap::from([(pos, flags_to_clear)])); + }; + + // clear the checkpoint leaf + if let TreeState::AtPosition(pos) = checkpoint.tree_state { + clear_at(pos, RetentionFlags::CHECKPOINT) } // clear the leaves that have been marked for removal for unmark_pos in checkpoint.marks_removed.iter() { - let subtree_addr = Address::above_position(Self::subtree_level(), *unmark_pos); - clear_positions - .entry(subtree_addr) - .and_modify(|to_clear| { - to_clear - .entry(*unmark_pos) - .and_modify(|flags| *flags |= RetentionFlags::MARKED) - .or_insert(RetentionFlags::MARKED); - }) - .or_insert_with(|| BTreeMap::from([(*unmark_pos, RetentionFlags::MARKED)])); + clear_at(*unmark_pos, RetentionFlags::MARKED) } } From f7931ec31f7a1ed1ab13c519f9b90d0291bbdfdf Mon Sep 17 00:00:00 2001 From: Kris Nuttycombe Date: Fri, 24 Mar 2023 08:38:11 -0600 Subject: [PATCH 16/16] Apply suggestions from code review. --- bridgetree/src/lib.rs | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/bridgetree/src/lib.rs b/bridgetree/src/lib.rs index 49f8bbb..926dfab 100644 --- a/bridgetree/src/lib.rs +++ b/bridgetree/src/lib.rs @@ -477,7 +477,7 @@ impl MerkleBridge { } } -impl<'a, H: Hashable + Ord + Clone + 'a> MerkleBridge { +impl<'a, H: Hashable + Clone + Ord + 'a> MerkleBridge { /// Constructs a new bridge to follow this one. If `mark_current_leaf` is true, the successor /// will track the information necessary to create a witness for the leaf most /// recently appended to this bridge's frontier. @@ -653,8 +653,7 @@ impl Checkpoint { } } - /// The unique identifier for the checkpoint, which is simply an automatically incrementing - /// index over all checkpoints that have ever been created in the history of the tree. + /// The unique identifier for the checkpoint. pub fn id(&self) -> &C { &self.id } @@ -823,7 +822,7 @@ impl BridgeTree { } } -impl BridgeTree { +impl BridgeTree { /// Construct a new BridgeTree that will start recording changes from the state of /// the specified frontier. pub fn from_frontier(max_checkpoints: usize, frontier: NonEmptyFrontier) -> Self { @@ -1290,7 +1289,7 @@ mod tests { Hashable, }; - impl testing::Tree + impl testing::Tree for BridgeTree { fn append(&mut self, value: H, retention: Retention) -> bool { @@ -1468,7 +1467,7 @@ mod tests { max_count: usize, ) -> impl Strategy> where - G::Value: Hashable + Ord + Clone + Debug + 'static, + G::Value: Hashable + Clone + Ord + Debug + 'static, { proptest::collection::vec(arb_operation(item_gen, 0..max_count), 0..max_count).prop_map( |ops| { @@ -1587,7 +1586,7 @@ mod tests { } // Combined tree tests - fn new_combined_tree( + fn new_combined_tree( max_checkpoints: usize, ) -> CombinedTree, BridgeTree> { CombinedTree::new(