diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f7b4fa1..bf4f97e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -23,7 +23,7 @@ jobs: - uses: actions/checkout@v3 # Build benchmarks to prevent bitrot - name: Build benchmarks - run: cargo build --workspace --benches + run: cargo build --workspace --benches --all-features doc-links: name: Intra-doc links diff --git a/Cargo.toml b/Cargo.toml index fd76a54..98ebc88 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,4 +2,5 @@ members = [ "incrementalmerkletree", "bridgetree", + "shardtree", ] diff --git a/bridgetree/src/lib.rs b/bridgetree/src/lib.rs index 151e8ce..40860fa 100644 --- a/bridgetree/src/lib.rs +++ b/bridgetree/src/lib.rs @@ -185,7 +185,7 @@ impl MerkleBridge { } } -impl<'a, H: Hashable + Ord + Clone + 'a> MerkleBridge { +impl<'a, H: Hashable + Clone + Ord + 'a> MerkleBridge { /// Constructs a new bridge to follow this one. If `mark_current_leaf` is true, the successor /// will track the information necessary to create a witness for the leaf most /// recently appended to this bridge's frontier. @@ -363,8 +363,7 @@ impl Checkpoint { } } - /// The unique identifier for the checkpoint, which is simply an automatically incrementing - /// index over all checkpoints that have ever been created in the history of the tree. + /// The unique identifier for the checkpoint. pub fn id(&self) -> &C { &self.id } @@ -533,7 +532,7 @@ impl BridgeTree { } } -impl BridgeTree { +impl BridgeTree { /// Construct a new BridgeTree that will start recording changes from the state of /// the specified frontier. pub fn from_frontier(max_checkpoints: usize, frontier: NonEmptyFrontier, checkpoint_id: C) -> Self { @@ -1005,7 +1004,7 @@ mod tests { Hashable, }; - impl testing::Tree + impl testing::Tree for BridgeTree { fn append(&mut self, value: H, retention: Retention) -> bool { @@ -1088,7 +1087,7 @@ mod tests { max_count: usize, ) -> impl Strategy> where - G::Value: Hashable + Ord + Clone + Debug + 'static, + G::Value: Hashable + Clone + Ord + Debug + 'static, { proptest::collection::vec(arb_operation(item_gen, 0..max_count), 0..max_count).prop_map( |ops| { @@ -1207,7 +1206,7 @@ mod tests { } // Combined tree tests - fn new_combined_tree( + fn new_combined_tree( max_checkpoints: usize, ) -> CombinedTree, BridgeTree> { CombinedTree::new( diff --git a/incrementalmerkletree/src/lib.rs b/incrementalmerkletree/src/lib.rs index c88ddfc..1352379 100644 --- a/incrementalmerkletree/src/lib.rs +++ b/incrementalmerkletree/src/lib.rs @@ -193,7 +193,7 @@ impl Level { // TODO: replace with an instance for `Step` once `step_trait` // is stabilized pub fn iter_to(self, other: Level) -> impl Iterator { - (self.0..other.0).into_iter().map(Level) + (self.0..other.0).map(Level) } } diff --git a/incrementalmerkletree/src/testing.rs b/incrementalmerkletree/src/testing.rs index f511f51..e34eccb 100644 --- a/incrementalmerkletree/src/testing.rs +++ b/incrementalmerkletree/src/testing.rs @@ -65,17 +65,16 @@ pub trait Tree { /// Creates a new checkpoint for the current tree state. /// - /// It is valid to have multiple checkpoints for the same tree state, and - /// each `rewind` call will remove a single checkpoint. Returns `false` - /// if the checkpoint identifier provided is less than or equal to the - /// maximum checkpoint identifier observed. + /// It is valid to have multiple checkpoints for the same tree state, and each `rewind` call + /// will remove a single checkpoint. Returns `false` if the checkpoint identifier provided is + /// less than or equal to the maximum checkpoint identifier observed. fn checkpoint(&mut self, id: C) -> bool; - /// Rewinds the tree state to the previous checkpoint, and then removes - /// that checkpoint record. If there are multiple checkpoints at a given - /// tree state, the tree state will not be altered until all checkpoints - /// at that tree state have been removed using `rewind`. This function - /// return false and leave the tree unmodified if no checkpoints exist. + /// Rewinds the tree state to the previous checkpoint, and then removes that checkpoint record. + /// + /// If there are multiple checkpoints at a given tree state, the tree state will not be altered + /// until all checkpoints at that tree state have been removed using `rewind`. This function + /// will return false and leave the tree unmodified if no checkpoints exist. fn rewind(&mut self) -> bool; } @@ -288,7 +287,10 @@ pub fn check_operations>( tree_checkpoints.push(tree_size); } } else { - prop_assert_eq!(tree_size, 1 << tree.depth()); + prop_assert_eq!( + tree_size, + tree.current_position().map_or(0, |p| usize::from(p) + 1) + ); } } CurrentPosition => { @@ -375,7 +377,7 @@ pub fn compute_root_from_witness(value: H, position: Position, path // Types and utilities for cross-verification property tests // -#[derive(Clone)] +#[derive(Clone, Debug)] pub struct CombinedTree, E: Tree> { inefficient: I, efficient: E, @@ -498,9 +500,37 @@ pub fn check_root_hashes, F: Fn(usize) -> T>(new_tree: F) assert_eq!(t.root(0).unwrap(), "aaaa____________"); } -pub fn check_witnesses + std::fmt::Debug, F: Fn(usize) -> T>(new_tree: F) { +/// This test expects a depth-4 tree and verifies that the tree reports itself as full after 2^4 +/// appends. +pub fn check_append + std::fmt::Debug, F: Fn(usize) -> T>(new_tree: F) { + use Retention::*; + let mut tree = new_tree(100); - tree.append("a".to_string(), Retention::Marked); + assert_eq!(tree.depth(), 4); + + // 16 appends should succeed + for i in 0..16 { + assert!(tree.append(i.to_string(), Ephemeral)); + assert_eq!(tree.current_position(), Some(Position::from(i))); + } + + // 17th append should fail + assert!(!tree.append("16".to_string(), Ephemeral)); + + // The following checks a condition on state restoration in the case that an append fails. + // We want to ensure that a failed append does not cause a loss of information. + let ops = (0..17) + .map(|i| Append(i.to_string(), Ephemeral)) + .collect::>(); + let tree = new_tree(100); + check_operations(tree, &ops).unwrap(); +} + +pub fn check_witnesses + std::fmt::Debug, F: Fn(usize) -> T>(new_tree: F) { + use Retention::*; + + let mut tree = new_tree(100); + tree.append("a".to_string(), Marked); assert_eq!( tree.witness(Position::from(0), 0), Some(vec![ @@ -511,7 +541,7 @@ pub fn check_witnesses + std::fmt::Debug, F: Fn(usize) -> ]) ); - tree.append("b".to_string(), Retention::Ephemeral); + tree.append("b".to_string(), Ephemeral); assert_eq!( tree.witness(0.into(), 0), Some(vec![ @@ -522,7 +552,7 @@ pub fn check_witnesses + std::fmt::Debug, F: Fn(usize) -> ]) ); - tree.append("c".to_string(), Retention::Marked); + tree.append("c".to_string(), Marked); assert_eq!( tree.witness(Position::from(2), 0), Some(vec![ @@ -533,7 +563,7 @@ pub fn check_witnesses + std::fmt::Debug, F: Fn(usize) -> ]) ); - tree.append("d".to_string(), Retention::Ephemeral); + tree.append("d".to_string(), Ephemeral); assert_eq!( tree.witness(Position::from(2), 0), Some(vec![ @@ -544,7 +574,7 @@ pub fn check_witnesses + std::fmt::Debug, F: Fn(usize) -> ]) ); - tree.append("e".to_string(), Retention::Ephemeral); + tree.append("e".to_string(), Ephemeral); assert_eq!( tree.witness(Position::from(2), 0), Some(vec![ @@ -556,12 +586,12 @@ pub fn check_witnesses + std::fmt::Debug, F: Fn(usize) -> ); let mut tree = new_tree(100); - tree.append("a".to_string(), Retention::Marked); + tree.append("a".to_string(), Marked); for c in 'b'..'g' { - tree.append(c.to_string(), Retention::Ephemeral); + tree.append(c.to_string(), Ephemeral); } - tree.append("g".to_string(), Retention::Marked); - tree.append("h".to_string(), Retention::Ephemeral); + tree.append("g".to_string(), Marked); + tree.append("h".to_string(), Ephemeral); assert_eq!( tree.witness(0.into(), 0), @@ -574,13 +604,13 @@ pub fn check_witnesses + std::fmt::Debug, F: Fn(usize) -> ); let mut tree = new_tree(100); - tree.append("a".to_string(), Retention::Marked); - tree.append("b".to_string(), Retention::Ephemeral); - tree.append("c".to_string(), Retention::Ephemeral); - tree.append("d".to_string(), Retention::Marked); - tree.append("e".to_string(), Retention::Marked); - tree.append("f".to_string(), Retention::Marked); - tree.append("g".to_string(), Retention::Ephemeral); + tree.append("a".to_string(), Marked); + tree.append("b".to_string(), Ephemeral); + tree.append("c".to_string(), Ephemeral); + tree.append("d".to_string(), Marked); + tree.append("e".to_string(), Marked); + tree.append("f".to_string(), Marked); + tree.append("g".to_string(), Ephemeral); assert_eq!( tree.witness(Position::from(5), 0), @@ -594,10 +624,10 @@ pub fn check_witnesses + std::fmt::Debug, F: Fn(usize) -> let mut tree = new_tree(100); for c in 'a'..'k' { - tree.append(c.to_string(), Retention::Ephemeral); + assert!(tree.append(c.to_string(), Ephemeral)); } - tree.append('k'.to_string(), Retention::Marked); - tree.append('l'.to_string(), Retention::Ephemeral); + assert!(tree.append('k'.to_string(), Marked)); + assert!(tree.append('l'.to_string(), Ephemeral)); assert_eq!( tree.witness(Position::from(10), 0), @@ -612,18 +642,18 @@ pub fn check_witnesses + std::fmt::Debug, F: Fn(usize) -> let mut tree = new_tree(100); assert!(tree.append( 'a'.to_string(), - Retention::Checkpoint { + Checkpoint { id: 1, is_marked: true } )); assert!(tree.rewind()); for c in 'b'..'e' { - tree.append(c.to_string(), Retention::Ephemeral); + tree.append(c.to_string(), Ephemeral); } - tree.append("e".to_string(), Retention::Marked); + tree.append("e".to_string(), Marked); for c in 'f'..'i' { - tree.append(c.to_string(), Retention::Ephemeral); + tree.append(c.to_string(), Ephemeral); } assert_eq!( tree.witness(0.into(), 0), @@ -636,20 +666,20 @@ pub fn check_witnesses + std::fmt::Debug, F: Fn(usize) -> ); let mut tree = new_tree(100); - tree.append('a'.to_string(), Retention::Ephemeral); - tree.append('b'.to_string(), Retention::Ephemeral); - tree.append('c'.to_string(), Retention::Marked); - tree.append('d'.to_string(), Retention::Ephemeral); - tree.append('e'.to_string(), Retention::Ephemeral); - tree.append('f'.to_string(), Retention::Ephemeral); + tree.append('a'.to_string(), Ephemeral); + tree.append('b'.to_string(), Ephemeral); + tree.append('c'.to_string(), Marked); + tree.append('d'.to_string(), Ephemeral); + tree.append('e'.to_string(), Ephemeral); + tree.append('f'.to_string(), Ephemeral); assert!(tree.append( 'g'.to_string(), - Retention::Checkpoint { + Checkpoint { id: 1, is_marked: true } )); - tree.append('h'.to_string(), Retention::Ephemeral); + tree.append('h'.to_string(), Ephemeral); assert!(tree.rewind()); assert_eq!( tree.witness(Position::from(2), 0), @@ -662,18 +692,18 @@ pub fn check_witnesses + std::fmt::Debug, F: Fn(usize) -> ); let mut tree = new_tree(100); - tree.append('a'.to_string(), Retention::Ephemeral); - tree.append('b'.to_string(), Retention::Marked); + tree.append('a'.to_string(), Ephemeral); + tree.append('b'.to_string(), Marked); assert_eq!(tree.witness(Position::from(0), 0), None); let mut tree = new_tree(100); for c in 'a'..'m' { - tree.append(c.to_string(), Retention::Ephemeral); + tree.append(c.to_string(), Ephemeral); } - tree.append('m'.to_string(), Retention::Marked); - tree.append('n'.to_string(), Retention::Marked); - tree.append('o'.to_string(), Retention::Ephemeral); - tree.append('p'.to_string(), Retention::Ephemeral); + tree.append('m'.to_string(), Marked); + tree.append('n'.to_string(), Marked); + tree.append('o'.to_string(), Ephemeral); + tree.append('p'.to_string(), Ephemeral); assert_eq!( tree.witness(Position::from(12), 0), @@ -686,10 +716,9 @@ pub fn check_witnesses + std::fmt::Debug, F: Fn(usize) -> ); let ops = ('a'..='l') - .into_iter() - .map(|c| Append(c.to_string(), Retention::Marked)) - .chain(Some(Append('m'.to_string(), Retention::Ephemeral))) - .chain(Some(Append('n'.to_string(), Retention::Ephemeral))) + .map(|c| Append(c.to_string(), Marked)) + .chain(Some(Append('m'.to_string(), Ephemeral))) + .chain(Some(Append('n'.to_string(), Ephemeral))) .chain(Some(Witness(11usize.into(), 0))) .collect::>(); @@ -706,6 +735,153 @@ pub fn check_witnesses + std::fmt::Debug, F: Fn(usize) -> ] )) ); + + let ops = vec![ + Append("a".to_string(), Ephemeral), + Append("b".to_string(), Ephemeral), + Append("c".to_string(), Ephemeral), + Append( + "d".to_string(), + Checkpoint { + id: 1, + is_marked: true, + }, + ), + Append("e".to_string(), Marked), + Operation::Checkpoint(2), + Append( + "f".to_string(), + Checkpoint { + id: 3, + is_marked: false, + }, + ), + Append( + "g".to_string(), + Checkpoint { + id: 4, + is_marked: false, + }, + ), + Append( + "h".to_string(), + Checkpoint { + id: 5, + is_marked: false, + }, + ), + Witness(3usize.into(), 5), + ]; + let mut tree = new_tree(100); + assert_eq!( + Operation::apply_all(&ops, &mut tree), + Some(( + Position::from(3), + vec![ + "c".to_string(), + "ab".to_string(), + "____".to_string(), + "________".to_string() + ] + )) + ); + let ops = vec![ + Append("a".to_string(), Ephemeral), + Append("a".to_string(), Ephemeral), + Append("a".to_string(), Ephemeral), + Append( + "a".to_string(), + Checkpoint { + id: 1, + is_marked: true, + }, + ), + Append("a".to_string(), Ephemeral), + Append("a".to_string(), Ephemeral), + Append("a".to_string(), Ephemeral), + Append( + "a".to_string(), + Checkpoint { + id: 2, + is_marked: false, + }, + ), + Append("a".to_string(), Ephemeral), + Append("a".to_string(), Ephemeral), + Witness(Position(3), 1), + ]; + let mut tree = new_tree(100); + assert_eq!( + Operation::apply_all(&ops, &mut tree), + Some(( + Position::from(3), + vec![ + "a".to_string(), + "aa".to_string(), + "aaaa".to_string(), + "________".to_string() + ] + )) + ); + + let ops = vec![ + Append("a".to_string(), Marked), + Append("a".to_string(), Ephemeral), + Append("a".to_string(), Ephemeral), + Append("a".to_string(), Ephemeral), + Append("a".to_string(), Ephemeral), + Append("a".to_string(), Ephemeral), + Append("a".to_string(), Ephemeral), + Operation::Checkpoint(1), + Append("a".to_string(), Marked), + Operation::Checkpoint(2), + Operation::Checkpoint(3), + Append( + "a".to_string(), + Checkpoint { + id: 4, + is_marked: false, + }, + ), + Rewind, + Rewind, + Witness(Position(7), 2), + ]; + let mut tree = new_tree(100); + assert_eq!(Operation::apply_all(&ops, &mut tree), None); + + let ops = vec![ + Append("a".to_string(), Marked), + Append("a".to_string(), Ephemeral), + Append( + "a".to_string(), + Checkpoint { + id: 1, + is_marked: true, + }, + ), + Append( + "a".to_string(), + Checkpoint { + id: 4, + is_marked: false, + }, + ), + Witness(Position(2), 2), + ]; + let mut tree = new_tree(100); + assert_eq!( + Operation::apply_all(&ops, &mut tree), + Some(( + Position::from(2), + vec![ + "_".to_string(), + "aa".to_string(), + "____".to_string(), + "________".to_string() + ] + )) + ); } pub fn check_checkpoint_rewind, F: Fn(usize) -> T>(new_tree: F) { @@ -791,15 +967,15 @@ pub fn check_rewind_remove_mark, F: Fn(usize) -> T>(new_t // use a maximum number of checkpoints of 1 let mut tree = new_tree(1); - tree.append("e".to_string(), Retention::Marked); - tree.checkpoint(1); + assert!(tree.append("e".to_string(), Retention::Marked)); + assert!(tree.checkpoint(1)); assert!(tree.marked_positions().contains(&0usize.into())); - tree.append("f".to_string(), Retention::Ephemeral); + assert!(tree.append("f".to_string(), Retention::Ephemeral)); // simulate a spend of `e` at `f` assert!(tree.remove_mark(0usize.into())); // even though the mark has been staged for removal, it's not gone yet assert!(tree.marked_positions().contains(&0usize.into())); - tree.checkpoint(2); + assert!(tree.checkpoint(2)); // the newest checkpoint will have caused the oldest to roll off, and // so the forgotten node will be unmarked assert!(!tree.marked_positions().contains(&0usize.into())); diff --git a/incrementalmerkletree/src/testing/complete_tree.rs b/incrementalmerkletree/src/testing/complete_tree.rs index 073da85..db4cb65 100644 --- a/incrementalmerkletree/src/testing/complete_tree.rs +++ b/incrementalmerkletree/src/testing/complete_tree.rs @@ -321,8 +321,8 @@ mod tests { use super::CompleteTree; use crate::{ testing::{ - check_checkpoint_rewind, check_rewind_remove_mark, check_root_hashes, check_witnesses, - compute_root_from_witness, SipHashable, Tree, + check_append, check_checkpoint_rewind, check_rewind_remove_mark, check_root_hashes, + check_witnesses, compute_root_from_witness, SipHashable, Tree, }, Hashable, Level, Position, Retention, }; @@ -342,7 +342,7 @@ mod tests { #[test] fn correct_root() { const DEPTH: u8 = 3; - let values = (0..(1 << DEPTH)).into_iter().map(SipHashable); + let values = (0..(1 << DEPTH)).map(SipHashable); let mut tree = CompleteTree::::new(100, ()); for value in values { @@ -367,6 +367,11 @@ mod tests { assert_eq!(tree.root(0).unwrap(), expected); } + #[test] + fn append() { + check_append(|max_checkpoints| CompleteTree::::new(max_checkpoints, 0)); + } + #[test] fn root_hashes() { check_root_hashes(|max_checkpoints| { @@ -375,7 +380,7 @@ mod tests { } #[test] - fn witness() { + fn witnesses() { check_witnesses(|max_checkpoints| { CompleteTree::::new(max_checkpoints, 0) }); @@ -386,7 +391,7 @@ mod tests { use crate::{testing::Tree, Retention}; const DEPTH: u8 = 3; - let values = (0..(1 << DEPTH)).into_iter().map(SipHashable); + let values = (0..(1 << DEPTH)).map(SipHashable); let mut tree = CompleteTree::::new(100, ()); for value in values { diff --git a/shardtree/Cargo.toml b/shardtree/Cargo.toml new file mode 100644 index 0000000..d0c4bba --- /dev/null +++ b/shardtree/Cargo.toml @@ -0,0 +1,31 @@ +[package] +name = "shardtree" +version = "0.0.0" +authors = [ + "Kris Nuttycombe ", +] +edition = "2018" +license = "MIT OR Apache-2.0" +description = "A space-efficient Merkle tree with witnessing of marked leaves, checkpointing & state restoration." +homepage = "https://github.com/zcash/incrementalmerkletree" +repository = "https://github.com/zcash/incrementalmerkletree" +categories = ["algorithms", "data-structures"] + +[dependencies] +bitflags = "1.3" +either = "1.8" +incrementalmerkletree = { version = "0.3", path = "../incrementalmerkletree" } +proptest = { version = "1.0.0", optional = true } + +[dev-dependencies] +assert_matches = "1.5" +criterion = "0.3" +incrementalmerkletree = { version = "0.3", path = "../incrementalmerkletree", features = ["test-dependencies"] } +proptest = "1.0.0" + +[features] +test-dependencies = ["proptest"] + +[target.'cfg(unix)'.dev-dependencies] +pprof = { version = "0.9", features = ["criterion", "flamegraph"] } # MSRV 1.56 +inferno = ">=0.11, <0.11.5" # MSRV 1.59 diff --git a/shardtree/LICENSE-APACHE b/shardtree/LICENSE-APACHE new file mode 100644 index 0000000..1e5006d --- /dev/null +++ b/shardtree/LICENSE-APACHE @@ -0,0 +1,202 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + diff --git a/shardtree/LICENSE-MIT b/shardtree/LICENSE-MIT new file mode 100644 index 0000000..94ac1a7 --- /dev/null +++ b/shardtree/LICENSE-MIT @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2015-2021 The Electric Coin Company + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/shardtree/proptest-regressions/lib.txt b/shardtree/proptest-regressions/lib.txt new file mode 100644 index 0000000..4054830 --- /dev/null +++ b/shardtree/proptest-regressions/lib.txt @@ -0,0 +1,18 @@ +# Seeds for failure cases proptest has generated in the past. It is +# automatically read and these particular cases re-run before any +# novel cases are generated. +# +# It is recommended to check this file in to source control so that +# everyone who runs the test benefits from these saved cases. +cc 38b4ca3c029291dfe2a6b5907c33a2e8ae7900f19c759c5db74b616ab19f6c5c # shrinks to ops = [Append(SipHashable(0), MC), Rewind, Rewind] +cc f1a96f73b9f3ba2a2e4b5271037322150450c573b6c3a5ef34f71a540ff0fad2 # shrinks to ops = [Append("a", C), Rewind, Rewind] +cc ad5f5d4276adea6e928376c6dc8c013745e60a8b507e6fa4e716f6c35477fe65 # shrinks to ops = [Append("a", E), Append("a", E), Append("a", E), Append("a", E), Append("a", E), Append("a", E), Append("a", E), Append("a", E), Append("a", E)] +cc ab2690ff3cf593d2e7a78b2f56d76699e525391b87852bbf15315a1f36742f48 # shrinks to ops = [Append(SipHashable(0), C), Append(SipHashable(0), E), Append(SipHashable(0), E), Unmark(Position(0))] +cc 1603359084b0c614a2ff9008036a5e10db9f32fb31b3333e59aaa517686e174d # shrinks to ops = [Append(SipHashable(0), Checkpoint { id: (), is_marked: false })] +cc faaf929be4b27e652712b705e297bfe15c53767102516e56882d177ac6fc58d9 # shrinks to ops = [CurrentPosition, Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Checkpoint { id: (), is_marked: true }), Append("a", Marked), Checkpoint(()), Append("a", Checkpoint { id: (), is_marked: false }), Append("a", Checkpoint { id: (), is_marked: false }), Append("a", Checkpoint { id: (), is_marked: false }), Witness(Position(3), 5)] +cc 8836c27ead7afb8d10092bdaeed33eb31007eaa47e3c3fca248cc00fbce772a3 # shrinks to ops = [Append(SipHashable(0), Ephemeral), Append(SipHashable(0), Ephemeral), Append(SipHashable(0), Ephemeral), Append(SipHashable(0), Ephemeral), Append(SipHashable(0), Ephemeral), Append(SipHashable(0), Ephemeral), Append(SipHashable(0), Ephemeral), Append(SipHashable(0), Ephemeral), Append(SipHashable(0), Ephemeral), Append(SipHashable(0), Ephemeral), Append(SipHashable(0), Ephemeral), Append(SipHashable(0), Ephemeral), Append(SipHashable(0), Ephemeral), Append(SipHashable(0), Ephemeral), Append(SipHashable(0), Ephemeral), Append(SipHashable(0), Ephemeral), Append(SipHashable(0), Ephemeral), Append(SipHashable(0), Ephemeral)] +cc 2303f82f255c60d7bdd45e2d13ff526bdc1d7f5ce846a7c07b38ab7f255c0300 # shrinks to ops = [Append("a", Marked), Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Ephemeral)] +cc cf1a33ef6df58bbf7cc199b8b1879c3a078e7784aa3edc0aba9ca03772bea5f2 # shrinks to ops = [Append(SipHashable(0), Checkpoint { id: (), is_marked: false }), Append(SipHashable(0), Checkpoint { id: (), is_marked: false }), Append(SipHashable(0), Checkpoint { id: (), is_marked: false }), Checkpoint(()), Append(SipHashable(0), Checkpoint { id: (), is_marked: false }), Checkpoint(()), Rewind, Rewind, Rewind, Append(SipHashable(0), Ephemeral), Append(SipHashable(0), Checkpoint { id: (), is_marked: false }), Append(SipHashable(0), Checkpoint { id: (), is_marked: false }), Append(SipHashable(0), Ephemeral), Append(SipHashable(0), Ephemeral), Append(SipHashable(0), Ephemeral), Append(SipHashable(0), Checkpoint { id: (), is_marked: true }), Witness(Position(8), 2)] +cc 544e027d994eaf7f97b1c8d9ee7b35522a64a610b1430d56d74ec947018b759d # shrinks to ops = [Append("a", Marked), Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Ephemeral), Append("a", Ephemeral), Checkpoint(()), Append("a", Marked), Checkpoint(()), Checkpoint(()), Append("a", Checkpoint { id: (), is_marked: false }), Rewind, Rewind, Witness(Position(7), 2)] +cc 55d00b68a0f0a02f83ab53f18a29d16d0233153b69a01414a1622104e0eead31 # shrinks to ops = [Append("a", Marked), Append("a", Checkpoint { id: (), is_marked: false }), Append("a", Marked), Checkpoint(()), Checkpoint(()), Checkpoint(()), Append("a", Checkpoint { id: (), is_marked: false }), Append("a", Checkpoint { id: (), is_marked: false }), Witness(Position(0), 7)] +cc 9dd966ff1ab66965c5b84153ae13f684258560cdd5e84c7deb24f724cb12aba7 # shrinks to ops = [Append("a", Marked), Append("a", Ephemeral), Append("a", Checkpoint { id: (), is_marked: true }), Checkpoint(()), Append("a", Checkpoint { id: (), is_marked: false }), Rewind, Rewind, Append("a", Checkpoint { id: (), is_marked: false }), Append("a", Checkpoint { id: (), is_marked: false }), Checkpoint(()), Witness(Position(2), 4)] diff --git a/shardtree/src/lib.rs b/shardtree/src/lib.rs new file mode 100644 index 0000000..5dd267d --- /dev/null +++ b/shardtree/src/lib.rs @@ -0,0 +1,2763 @@ +use bitflags::bitflags; +use core::convert::Infallible; +use core::fmt::Debug; +use core::marker::PhantomData; +use core::ops::{Deref, Range}; +use either::Either; +use std::collections::{BTreeMap, BTreeSet}; +use std::rc::Rc; + +use incrementalmerkletree::{Address, Hashable, Level, Position, Retention}; + +bitflags! { + pub struct RetentionFlags: u8 { + /// An leaf with `EPHEMERAL` retention can be pruned as soon as we are certain that it is not part + /// of the witness for a leaf with `CHECKPOINT` or `MARKED` retention. + const EPHEMERAL = 0b00000000; + + /// A leaf with `CHECKPOINT` retention can be pruned when there are more than `max_checkpoints` + /// additional checkpoint leaves, if it is not also a marked leaf. + const CHECKPOINT = 0b00000001; + + /// A leaf with `MARKED` retention can be pruned only as a consequence of an explicit deletion + /// action. + const MARKED = 0b00000010; + } +} + +impl RetentionFlags { + pub fn is_checkpoint(&self) -> bool { + (*self & RetentionFlags::CHECKPOINT) == RetentionFlags::CHECKPOINT + } + + pub fn is_marked(&self) -> bool { + (*self & RetentionFlags::MARKED) == RetentionFlags::MARKED + } +} + +impl<'a, C> From<&'a Retention> for RetentionFlags { + fn from(retention: &'a Retention) -> Self { + match retention { + Retention::Ephemeral => RetentionFlags::EPHEMERAL, + Retention::Checkpoint { is_marked, .. } => { + if *is_marked { + RetentionFlags::CHECKPOINT | RetentionFlags::MARKED + } else { + RetentionFlags::CHECKPOINT + } + } + Retention::Marked => RetentionFlags::MARKED, + } + } +} + +impl From> for RetentionFlags { + fn from(retention: Retention) -> Self { + RetentionFlags::from(&retention) + } +} + +/// A "pattern functor" for a single layer of a binary tree. +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum Node { + /// A parent node in the tree, annotated with a value of type `A` and with left and right + /// children of type `C`. + Parent { ann: A, left: C, right: C }, + /// A node of the tree that contains a value (usually a hash, sometimes with additional + /// metadata) and that has no children. + /// + /// Note that leaf nodes may appear at any position in the tree; i.e. they may contain computed + /// subtree root values and not just level-0 leaves. + Leaf { value: V }, + /// The empty tree; a subtree or leaf for which no information is available. + Nil, +} + +impl Node { + /// Returns whether or not this is the `Nil` tree. + /// + /// This is useful for cases where the compiler can automatically dereference an `Rc`, where + /// one would otherwise need additional ceremony to make an equality check. + pub fn is_nil(&self) -> bool { + matches!(self, Node::Nil) + } + + /// Returns the contained leaf value, if this is a leaf node. + pub fn leaf_value(&self) -> Option<&V> { + match self { + Node::Parent { .. } => None, + Node::Leaf { value } => Some(value), + Node::Nil { .. } => None, + } + } + + pub fn annotation(&self) -> Option<&A> { + match self { + Node::Parent { ann, .. } => Some(ann), + Node::Leaf { .. } => None, + Node::Nil => None, + } + } + + /// Replaces the annotation on this node, if it is a `Node::Parent`; otherwise + /// returns this node unaltered. + pub fn reannotate(self, ann: A) -> Self { + match self { + Node::Parent { left, right, .. } => Node::Parent { ann, left, right }, + other => other, + } + } +} + +impl<'a, C: Clone, A: Clone, V: Clone> Node { + pub fn cloned(&self) -> Node { + match self { + Node::Parent { ann, left, right } => Node::Parent { + ann: (*ann).clone(), + left: left.clone(), + right: right.clone(), + }, + Node::Leaf { value } => Node::Leaf { + value: (*value).clone(), + }, + Node::Nil => Node::Nil, + } + } +} + +/// An immutable binary tree with each of its nodes tagged with an annotation value. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Tree(Node>, A, V>); + +impl Deref for Tree { + type Target = Node>, A, V>; + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl Tree { + /// Replaces the annotation at the root of the tree, if the root is a `Node::Parent`; otherwise + /// returns this tree unaltered. + pub fn reannotate_root(self, ann: A) -> Tree { + Tree(self.0.reannotate(ann)) + } + + /// Returns `true` if no [`Node::Nil`] nodes are present in the tree, `false` otherwise. + pub fn is_complete(&self) -> bool { + match &self.0 { + Node::Parent { left, right, .. } => { + left.as_ref().is_complete() && right.as_ref().is_complete() + } + Node::Leaf { .. } => true, + Node::Nil { .. } => false, + } + } + + /// Returns a vector of the addresses of [`Node::Nil`] subtree roots within this tree. + /// + /// The given address must correspond to the root of this tree, or this method will + /// yield incorrect results or may panic. + pub fn incomplete_nodes(&self, root_addr: Address) -> Vec
{ + match &self.0 { + Node::Parent { left, right, .. } => { + // We should never construct parent nodes where both children are Nil. + // While we could handle that here, if we encountered that case it would + // be indicative of a programming error elsewhere and so we assert instead. + assert!(!(left.0.is_nil() && right.0.is_nil())); + let (left_root, right_root) = root_addr + .children() + .expect("A parent node cannot appear at level 0"); + + let mut left_incomplete = left.incomplete_nodes(left_root); + let mut right_incomplete = right.incomplete_nodes(right_root); + left_incomplete.append(&mut right_incomplete); + left_incomplete + } + Node::Leaf { .. } => vec![], + Node::Nil => vec![root_addr], + } + } +} + +pub type PrunableTree = Tree>, (H, RetentionFlags)>; + +impl PrunableTree { + /// Returns the the value if this is a leaf. + pub fn leaf_value(&self) -> Option<&H> { + self.0.leaf_value().map(|(h, _)| h) + } + + /// Returns the cached root value with which the tree has been annotated for this node if it is + /// available, otherwise return the value if this is a leaf. + pub fn node_value(&self) -> Option<&H> { + self.0.annotation().map_or_else( + || self.leaf_value(), + |rc_opt| rc_opt.as_ref().map(|rc| rc.as_ref()), + ) + } + + /// Returns whether or not this tree is a leaf with `Marked` retention. + pub fn is_marked_leaf(&self) -> bool { + self.0 + .leaf_value() + .map_or(false, |(_, retention)| retention.is_marked()) + } + + /// Determines whether a tree has any `MARKED` nodes. + pub fn contains_marked(&self) -> bool { + match &self.0 { + Node::Parent { left, right, .. } => left.contains_marked() || right.contains_marked(), + Node::Leaf { value: (_, r) } => r.is_marked(), + Node::Nil => false, + } + } + + /// Returns the Merkle root of this tree, given the address of the root node, or + /// a vector of the addresses of `Nil` nodes that inhibited the computation of + /// such a root. + /// + /// ### Parameters: + /// * `truncate_at` An inclusive lower bound on positions in the tree beyond which all leaf + /// values will be treated as `Nil`. + pub fn root_hash(&self, root_addr: Address, truncate_at: Position) -> Result> { + if truncate_at <= root_addr.position_range_start() { + // we are in the part of the tree where we're generating empty roots, + // so no need to inspect the tree + Ok(H::empty_root(root_addr.level())) + } else { + match self { + Tree(Node::Parent { ann, left, right }) => ann + .as_ref() + .filter(|_| truncate_at >= root_addr.position_range_end()) + .map_or_else( + || { + // Compute the roots of the left and right children and hash them + // together. + let (l_addr, r_addr) = root_addr.children().unwrap(); + accumulate_result_with( + left.root_hash(l_addr, truncate_at), + right.root_hash(r_addr, truncate_at), + |left_root, right_root| { + H::combine(l_addr.level(), &left_root, &right_root) + }, + ) + }, + |rc| { + // Since we have an annotation on the root, and we are not truncating + // within this subtree, we can just use the cached value. + Ok(rc.as_ref().clone()) + }, + ), + Tree(Node::Leaf { value }) => { + if truncate_at >= root_addr.position_range_end() { + // no truncation of this leaf is necessary, just use it + Ok(value.0.clone()) + } else { + // we have a leaf value that is a subtree root created by hashing together + // the roots of child subtrees, but truncation would require that that leaf + // value be "split" into its constituent parts, which we can't do so we + // return an error + Err(vec![root_addr]) + } + } + Tree(Node::Nil) => Err(vec![root_addr]), + } + } + } + + /// Returns a vector of the positions of [`Node::Leaf`] values in the tree having [`MARKED`] + /// retention. + /// + /// Computing the set of marked positions requires a full traversal of the tree, and so should + /// be considered to be a somewhat expensive operation. + pub fn marked_positions(&self, root_addr: Address) -> BTreeSet { + match &self.0 { + Node::Parent { left, right, .. } => { + // We should never construct parent nodes where both children are Nil. + // While we could handle that here, if we encountered that case it would + // be indicative of a programming error elsewhere and so we assert instead. + assert!(!(left.0.is_nil() && right.0.is_nil())); + let (left_root, right_root) = root_addr + .children() + .expect("A parent node cannot appear at level 0"); + + let mut left_incomplete = left.marked_positions(left_root); + let mut right_incomplete = right.marked_positions(right_root); + left_incomplete.append(&mut right_incomplete); + left_incomplete + } + Node::Leaf { + value: (_, retention), + } => { + let mut result = BTreeSet::new(); + if root_addr.level() == 0.into() && retention.is_marked() { + result.insert(Position::from(root_addr.index())); + } + result + } + Node::Nil => BTreeSet::new(), + } + } + + /// Prunes the tree by hashing together ephemeral sibling nodes. + /// + /// `level` must be the level of the root of the node being pruned. + pub fn prune(self, level: Level) -> Self { + match self { + Tree(Node::Parent { ann, left, right }) => Tree::unite( + level, + ann, + left.as_ref().clone().prune(level - 1), + right.as_ref().clone().prune(level - 1), + ), + other => other, + } + } + + /// Merge two subtrees having the same root address. + /// + /// The merge operation is checked to be strictly additive and returns an error if merging + /// would cause information loss or if a conflict between root hashes occurs at a node. The + /// returned error contains the address of the node where such a conflict occurred. + pub fn merge_checked(self, root_addr: Address, other: Self) -> Result { + #[allow(clippy::type_complexity)] + fn go( + addr: Address, + t0: PrunableTree, + t1: PrunableTree, + ) -> Result, Address> { + // Require that any roots the we compute will not be default-filled by picking + // a starting valid fill point that is outside the range of leaf positions. + let no_default_fill = addr.position_range_end(); + match (t0, t1) { + (Tree(Node::Nil), other) | (other, Tree(Node::Nil)) => Ok(other), + (Tree(Node::Leaf { value: vl }), Tree(Node::Leaf { value: vr })) => { + if vl == vr { + Ok(Tree(Node::Leaf { value: vl })) + } else { + Err(addr) + } + } + (Tree(Node::Leaf { value }), parent @ Tree(Node::Parent { .. })) + | (parent @ Tree(Node::Parent { .. }), Tree(Node::Leaf { value })) => { + if parent + .root_hash(addr, no_default_fill) + .iter() + .all(|r| r == &value.0) + { + Ok(parent.reannotate_root(Some(Rc::new(value.0)))) + } else { + Err(addr) + } + } + (lparent, rparent) => { + let lroot = lparent.root_hash(addr, no_default_fill).ok(); + let rroot = rparent.root_hash(addr, no_default_fill).ok(); + // If both parents share the same root hash (or if one of them is absent), + // they can be merged + if lroot.zip(rroot).iter().all(|(l, r)| l == r) { + // using `if let` here to bind variables; we need to borrow the trees for + // root hash calculation but binding the children of the parent node + // interferes with binding a reference to the parent. + if let ( + Tree(Node::Parent { + ann: lann, + left: ll, + right: lr, + }), + Tree(Node::Parent { + ann: rann, + left: rl, + right: rr, + }), + ) = (lparent, rparent) + { + let (l_addr, r_addr) = addr.children().unwrap(); + Ok(Tree::unite( + addr.level() - 1, + lann.or(rann), + go(l_addr, ll.as_ref().clone(), rl.as_ref().clone())?, + go(r_addr, lr.as_ref().clone(), rr.as_ref().clone())?, + )) + } else { + unreachable!() + } + } else { + Err(addr) + } + } + } + } + + go(root_addr, self, other) + } + + /// Unite two nodes by either constructing a new parent node, or, if both nodes are ephemeral + /// leaves or Nil, constructing a replacement root by hashing leaf values together (or a + /// replacement `Nil` value). + /// + /// `level` must be the level of the two nodes that are being joined. + fn unite(level: Level, ann: Option>, left: Self, right: Self) -> Self { + match (left, right) { + (Tree(Node::Nil), Tree(Node::Nil)) => Tree(Node::Nil), + (Tree(Node::Leaf { value: lv }), Tree(Node::Leaf { value: rv })) + // we can prune right-hand leaves that are not marked; if a leaf + // is a checkpoint then that information will be propagated to + // the replacement leaf + if lv.1 == RetentionFlags::EPHEMERAL && (rv.1 & RetentionFlags::MARKED) == RetentionFlags::EPHEMERAL => + { + Tree( + Node::Leaf { + value: (H::combine(level, &lv.0, &rv.0), rv.1), + }, + ) + } + (left, right) => Tree( + Node::Parent { + ann, + left: Rc::new(left), + right: Rc::new(right), + }, + ), + } + } +} + +/// A binary Merkle tree with its root at the given address. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct LocatedTree { + root_addr: Address, + root: Tree, +} + +impl LocatedTree { + /// Returns the root address of this tree. + pub fn root_addr(&self) -> Address { + self.root_addr + } + + /// Returns a reference to the root of the tree. + pub fn root(&self) -> &Tree { + &self.root + } + + /// Returns a new [`LocatedTree`] with the provided value replacing the annotation of its root + /// node, if that root node is a `Node::Parent`. Otherwise . + pub fn reannotate_root(self, value: A) -> Self { + LocatedTree { + root_addr: self.root_addr, + root: self.root.reannotate_root(value), + } + } + + /// Returns the set of incomplete subtree roots contained within this tree, ordered by + /// increasing position. + pub fn incomplete_nodes(&self) -> Vec
{ + self.root.incomplete_nodes(self.root_addr) + } + + /// Returns the maximum position at which a non-Nil leaf has been observed in the tree. + /// + /// Note that no actual leaf value may exist at this position, as it may have previously been + /// pruned. + pub fn max_position(&self) -> Option { + fn go(addr: Address, root: &Tree) -> Option { + match &root.0 { + Node::Nil => None, + Node::Leaf { .. } => Some(addr.position_range_end() - 1), + Node::Parent { left, right, .. } => { + let (l_addr, r_addr) = addr.children().unwrap(); + go(r_addr, right.as_ref()).or_else(|| go(l_addr, left.as_ref())) + } + } + } + + go(self.root_addr, &self.root) + } + + /// Returns the value at the specified position, if any. + pub fn value_at_position(&self, position: Position) -> Option<&V> { + fn go(pos: Position, addr: Address, root: &Tree) -> Option<&V> { + match &root.0 { + Node::Parent { left, right, .. } => { + let (l_addr, r_addr) = addr.children().unwrap(); + if l_addr.position_range().contains(&pos) { + go(pos, l_addr, left) + } else { + go(pos, r_addr, right) + } + } + Node::Leaf { value } if addr.level() == Level::from(0) => Some(value), + _ => None, + } + } + + if self.root_addr.position_range().contains(&position) { + go(position, self.root_addr, &self.root) + } else { + None + } + } +} + +impl LocatedTree { + /// Constructs a new empty tree with its root at the provided address. + pub fn empty(root_addr: Address) -> Self { + Self { + root_addr, + root: Tree(Node::Nil), + } + } + + /// Constructs a new tree consisting of a single leaf with the provided value, and the + /// specified root address. + pub fn with_root_value(root_addr: Address, value: V) -> Self { + Self { + root_addr, + root: Tree(Node::Leaf { value }), + } + } + + /// Traverses this tree to find the child node at the specified address and returns it. + /// + /// Returns `None` if the specified address is not a descendant of this tree's root address, or + /// if the tree is terminated by a [`Node::Nil`] or leaf node before the specified address can + /// be reached. + pub fn subtree(&self, addr: Address) -> Option { + fn go( + root_addr: Address, + root: &Tree, + addr: Address, + ) -> Option> { + if root_addr == addr { + Some(LocatedTree { + root_addr, + root: root.clone(), + }) + } else { + match &root.0 { + Node::Parent { left, right, .. } => { + let (l_addr, r_addr) = root_addr.children().unwrap(); + if l_addr.contains(&addr) { + go(l_addr, left.as_ref(), addr) + } else { + go(r_addr, right.as_ref(), addr) + } + } + _ => None, + } + } + } + + if self.root_addr.contains(&addr) { + go(self.root_addr, &self.root, addr) + } else { + None + } + } + + /// Decomposes this tree into the vector of its subtrees having height `level + 1`. + /// + /// If this root address of this tree is lower down in the tree than the level specified, + /// the entire tree is returned as the sole element of the result vector. + pub fn decompose_to_level(self, level: Level) -> Vec { + fn go( + level: Level, + root_addr: Address, + root: Tree, + ) -> Vec> { + if root_addr.level() == level { + vec![LocatedTree { root_addr, root }] + } else { + match root.0 { + Node::Parent { left, right, .. } => { + let (l_addr, r_addr) = root_addr.children().unwrap(); + let mut l_decomposed = go( + level, + l_addr, + Rc::try_unwrap(left).unwrap_or_else(|rc| (*rc).clone()), + ); + let mut r_decomposed = go( + level, + r_addr, + Rc::try_unwrap(right).unwrap_or_else(|rc| (*rc).clone()), + ); + l_decomposed.append(&mut r_decomposed); + l_decomposed + } + _ => vec![], + } + } + } + + if level >= self.root_addr.level() { + vec![self] + } else { + go(level, self.root_addr, self.root) + } + } +} + +pub type LocatedPrunableTree = LocatedTree>, (H, RetentionFlags)>; + +/// A data structure describing the nature of a [`Node::Nil`] node in the tree that was introduced +/// as the consequence of an insertion. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct IncompleteAt { + /// The address of the empty node. + pub address: Address, + /// A flag identifying whether or not the missing node is required in order to construct a + /// witness for a node with [`MARKED`] retention. + pub required_for_witness: bool, +} + +/// A type for the result of a batch insertion operation. +/// +/// This result type contains the newly constructed tree, the addresses any new incomplete internal +/// nodes within that tree that were introduced as a consequence of that insertion, and the +/// remainder of the iterator that provided the inserted values. +#[derive(Debug)] +pub struct BatchInsertionResult)>> { + /// The updated tree after all insertions have been performed. + pub subtree: LocatedPrunableTree, + /// A flag identifying whether the constructed subtree contains a marked node. + pub contains_marked: bool, + /// The vector of addresses of [`Node::Nil`] nodes that were inserted into the tree as part of + /// the insertion operation, for nodes that are required in order to construct a witness for + /// each inserted leaf with [`MARKED`] retention. + pub incomplete: Vec, + /// The maximum position at which a leaf was inserted. + pub max_insert_position: Option, + /// The positions of all leaves with [`CHECKPOINT`] retention that were inserted. + pub checkpoints: BTreeMap, + /// The unconsumed remainder of the iterator from which leaves were inserted, if the tree + /// was completely filled before the iterator was fully consumed. + pub remainder: I, +} + +/// An error prevented the insertion of values into the subtree. +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum InsertionError { + /// The caller attempted to insert a subtree into a tree that does not contain + /// the subtree's root address. + NotContained, + /// The start of the range of positions provided for insertion is not included + /// in the range of positions within this subtree. + OutOfRange(Range), + /// An existing root hash conflicts with the root hash of a node being inserted. + Conflict(Address), + /// An out-of-order checkpoint was detected + /// + /// Checkpoint identifiers must be in nondecreasing order relative to tree positions. + CheckpointOutOfOrder, + /// An append operation has exceeded the capacity of the tree. + TreeFull, + /// An error was produced by the underlying [`ShardStore`] + Storage(S), +} + +/// Errors that may be returned in the process of querying a [`ShardTree`] +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum QueryError { + /// The caller attempted to query the value at an address within a tree that does not contain + /// that address. + NotContained(Address), + /// A leaf required by a given checkpoint has been pruned, or is otherwise not accessible in + /// the tree. + CheckpointPruned, + /// It is not possible to compute a root for one or more subtrees because they contain + /// [`Node::Nil`] values at positions that cannot be replaced with default hashes. + TreeIncomplete(Vec
), +} + +/// Operations on [`LocatedTree`]s that are annotated with Merkle hashes. +impl LocatedPrunableTree { + /// Computes the root hash of this tree, truncated to the given position. + /// + /// If the tree contains any [`Node::Nil`] nodes corresponding to positions less than + /// `truncate_at`, this will return an error containing the addresses of those nodes within the + /// tree. + pub fn root_hash(&self, truncate_at: Position) -> Result> { + self.root.root_hash(self.root_addr, truncate_at) + } + + /// Compute the root hash of this subtree, filling empty nodes along the rightmost path of the + /// subtree with the empty root value for the given level. + /// + /// This should only be used for computing roots when it is known that no successor trees + /// exist. + /// + /// If the tree contains any [`Node::Nil`] nodes that are to the left of filled nodes in the + /// tree, this will return an error containing the addresses of those nodes. + pub fn right_filled_root(&self) -> Result> { + self.root_hash( + self.max_position() + .map_or_else(|| self.root_addr.position_range_start(), |pos| pos + 1), + ) + } + + /// Returns the positions of marked leaves in the tree. + pub fn marked_positions(&self) -> BTreeSet { + fn go( + root_addr: Address, + root: &PrunableTree, + acc: &mut BTreeSet, + ) { + match &root.0 { + Node::Parent { left, right, .. } => { + let (l_addr, r_addr) = root_addr.children().unwrap(); + go(l_addr, left.as_ref(), acc); + go(r_addr, right.as_ref(), acc); + } + Node::Leaf { value } => { + if value.1.is_marked() && root_addr.level() == 0.into() { + acc.insert(Position::from(root_addr.index())); + } + } + _ => {} + } + } + + let mut result = BTreeSet::new(); + go(self.root_addr, &self.root, &mut result); + result + } + + /// Compute the witness for the leaf at the specified position. + /// + /// This tree will be truncated to the `truncate_at` position, and then empty + /// roots corresponding to later positions will be filled by [`H::empty_root`]. + /// + /// Returns either the witness for the leaf at the specified position, or an error that + /// describes the causes of failure. + pub fn witness(&self, position: Position, truncate_at: Position) -> Result, QueryError> { + // traverse down to the desired leaf position, and then construct + // the authentication path on the way back up. + fn go( + root: &PrunableTree, + root_addr: Address, + position: Position, + truncate_at: Position, + ) -> Result, Vec
> { + match &root.0 { + Node::Parent { left, right, .. } => { + let (l_addr, r_addr) = root_addr.children().unwrap(); + if root_addr.level() > 1.into() { + let r_start = r_addr.position_range_start(); + if position < r_start { + accumulate_result_with( + go(left.as_ref(), l_addr, position, truncate_at), + right.as_ref().root_hash(r_addr, truncate_at), + |mut witness, sibling_root| { + witness.push(sibling_root); + witness + }, + ) + } else { + // if the position we're witnessing is down the right-hand branch then + // we always set the truncation bound outside the range of leaves on the + // left, because we don't allow any empty nodes to the left + accumulate_result_with( + left.as_ref().root_hash(l_addr, r_start), + go(right.as_ref(), r_addr, position, truncate_at), + |sibling_root, mut witness| { + witness.push(sibling_root); + witness + }, + ) + } + } else { + // we handle the level 0 leaves here by adding the sibling of our desired + // leaf to the witness + if position.is_odd() { + if right.is_marked_leaf() { + left.leaf_value() + .map(|v| vec![v.clone()]) + .ok_or_else(|| vec![l_addr]) + } else { + Err(vec![l_addr]) + } + } else if left.is_marked_leaf() { + // If we have the left-hand leaf and the right-hand leaf is empty, we + // can fill it with the empty leaf, but only if we are truncating at + // a position to the left of the current position + if truncate_at <= position + 1 { + Ok(vec![H::empty_leaf()]) + } else { + right + .leaf_value() + .map_or_else(|| Err(vec![r_addr]), |v| Ok(vec![v.clone()])) + } + } else { + Err(vec![r_addr]) + } + } + } + _ => { + // if we encounter a nil or leaf node, we were unable to descend + // to the leaf at the desired position. + Err(vec![root_addr]) + } + } + } + + if self.root_addr.position_range().contains(&position) { + go(&self.root, self.root_addr, position, truncate_at) + .map_err(QueryError::TreeIncomplete) + } else { + Err(QueryError::NotContained(self.root_addr)) + } + } + + /// Prunes this tree by replacing all nodes that are right-hand children along the path + /// to the specified position with [`Node::Nil`]. + /// + /// The leaf at the specified position is retained. + pub fn truncate_to_position(&self, position: Position) -> Option { + fn go( + position: Position, + root_addr: Address, + root: &PrunableTree, + ) -> Option> { + match &root.0 { + Node::Parent { ann, left, right } => { + let (l_child, r_child) = root_addr.children().unwrap(); + if position < r_child.position_range_start() { + // we are truncating within the range of the left node, so recurse + // to the left to truncate the left child and then reconstruct the + // node with `Nil` as the right sibling + go(position, l_child, left.as_ref()).map(|left| { + Tree::unite(l_child.level(), ann.clone(), left, Tree(Node::Nil)) + }) + } else { + // we are truncating within the range of the right node, so recurse + // to the right to truncate the right child and then reconstruct the + // node with the left sibling unchanged + go(position, r_child, right.as_ref()).map(|right| { + Tree::unite(r_child.level(), ann.clone(), left.as_ref().clone(), right) + }) + } + } + Node::Leaf { .. } => { + if root_addr.max_position() <= position { + Some(root.clone()) + } else { + None + } + } + Node::Nil => None, + } + } + + if self.root_addr.position_range().contains(&position) { + go(position, self.root_addr, &self.root).map(|root| LocatedTree { + root_addr: self.root_addr, + root, + }) + } else { + None + } + } + + /// Inserts a descendant subtree into this subtree, creating empty sibling nodes as necessary + /// to fill out the tree. + /// + /// In the case that a leaf node would be replaced by an incomplete subtree, the resulting + /// parent node will be annotated with the existing leaf value. + /// + /// Returns the updated tree, along with the addresses of any [`Node::Nil`] nodes that were + /// inserted in the process of creating the parent nodes down to the insertion point, or an + /// error if the specified subtree's root address is not in the range of valid descendants of + /// the root node of this tree or if the insertion would result in a conflict between computed + /// root hashes of complete subtrees. + pub fn insert_subtree( + &self, + subtree: Self, + contains_marked: bool, + ) -> Result<(Self, Vec), InsertionError> { + // A function to recursively dig into the tree, creating a path downward and introducing + // empty nodes as necessary until we can insert the provided subtree. + #[allow(clippy::type_complexity)] + fn go( + root_addr: Address, + into: &PrunableTree, + subtree: LocatedPrunableTree, + is_complete: bool, + contains_marked: bool, + ) -> Result<(PrunableTree, Vec), InsertionError> { + // In the case that we are replacing a node entirely, we need to extend the + // subtree up to the level of the node being replaced, adding Nil siblings + // and recording the presence of those incomplete nodes when necessary + let replacement = |ann: Option>, mut node: LocatedPrunableTree| { + // construct the replacement node bottom-up + let mut incomplete = vec![]; + while node.root_addr.level() < root_addr.level() { + incomplete.push(IncompleteAt { + address: node.root_addr.sibling(), + required_for_witness: contains_marked, + }); + node = LocatedTree { + root_addr: node.root_addr.parent(), + root: if node.root_addr.is_right_child() { + Tree(Node::Parent { + ann: None, + left: Rc::new(Tree(Node::Nil)), + right: Rc::new(node.root), + }) + } else { + Tree(Node::Parent { + ann: None, + left: Rc::new(node.root), + right: Rc::new(Tree(Node::Nil)), + }) + }, + }; + } + (node.root.reannotate_root(ann), incomplete) + }; + + match into { + Tree(Node::Nil) => Ok(replacement(None, subtree)), + Tree(Node::Leaf { value: (value, _) }) => { + if root_addr == subtree.root_addr { + if is_complete { + // It is safe to replace the existing root unannotated, because we + // can always recompute the root from a complete subtree. + Ok((subtree.root, vec![])) + } else if subtree + .root + .0 + .annotation() + .and_then(|ann| ann.as_ref()) + .iter() + .all(|v| v.as_ref() == value) + { + Ok(( + // at this point we statically know the root to be a parent + subtree.root.reannotate_root(Some(Rc::new(value.clone()))), + vec![], + )) + } else { + Err(InsertionError::Conflict(root_addr)) + } + } else { + Ok(replacement(Some(Rc::new(value.clone())), subtree)) + } + } + parent if root_addr == subtree.root_addr => { + // Merge the existing subtree with the subtree being inserted. + // A merge operation can't introduce any new incomplete roots. + parent + .clone() + .merge_checked(root_addr, subtree.root) + .map_err(InsertionError::Conflict) + .map(|tree| (tree, vec![])) + } + Tree(Node::Parent { ann, left, right }) => { + // In this case, we have an existing parent but we need to dig down farther + // before we can insert the subtree that we're carrying for insertion. + let (l_addr, r_addr) = root_addr.children().unwrap(); + if l_addr.contains(&subtree.root_addr) { + let (new_left, incomplete) = + go(l_addr, left.as_ref(), subtree, is_complete, contains_marked)?; + Ok(( + Tree::unite( + root_addr.level() - 1, + ann.clone(), + new_left, + right.as_ref().clone(), + ), + incomplete, + )) + } else { + let (new_right, incomplete) = go( + r_addr, + right.as_ref(), + subtree, + is_complete, + contains_marked, + )?; + Ok(( + Tree::unite( + root_addr.level() - 1, + ann.clone(), + left.as_ref().clone(), + new_right, + ), + incomplete, + )) + } + } + } + } + + let LocatedTree { root_addr, root } = self; + if root_addr.contains(&subtree.root_addr) { + let complete = subtree.root.is_complete(); + go(*root_addr, root, subtree, complete, contains_marked).map(|(root, incomplete)| { + ( + LocatedTree { + root_addr: *root_addr, + root, + }, + incomplete, + ) + }) + } else { + Err(InsertionError::NotContained) + } + } + + /// Append a single value at the first available position in the tree. + /// + /// Prefer to use [`Self::batch_append`] or [`Self::batch_insert`] when appending multiple + /// values, as these operations require fewer traversals of the tree than are necessary when + /// performing multiple sequential calls to [`Self::append`]. + pub fn append( + &self, + value: H, + retention: Retention, + ) -> Result<(Self, Position, Option), InsertionError> { + let checkpoint_id = if let Retention::Checkpoint { id, .. } = &retention { + Some(id.clone()) + } else { + None + }; + + self.batch_append(Some((value, retention)).into_iter()) + // We know that the max insert position will have been incremented by one. + .and_then(|r| { + let mut r = r.expect("We know the iterator to have been nonempty."); + if r.remainder.next().is_some() { + Err(InsertionError::TreeFull) + } else { + Ok((r.subtree, r.max_insert_position.unwrap(), checkpoint_id)) + } + }) + } + + /// Append a values from an iterator, beginning at the first available position in the tree. + /// + /// Returns an error if the tree is full. If the position at the end of the iterator is outside + /// of the subtree's range, the unconsumed part of the iterator will be returned as part of + /// the result. + pub fn batch_append)>, E>( + &self, + values: I, + ) -> Result>, InsertionError> { + let append_position = self + .max_position() + .map(|p| p + 1) + .unwrap_or_else(|| self.root_addr.position_range_start()); + self.batch_insert(append_position, values) + } + + /// Builds a [`LocatedPrunableTree`] from an iterator of level-0 leaves. + /// + /// This may be used in conjunction with [`ShardTree::insert_tree`] to support + /// partially-parallelizable tree construction. Multiple subtrees may be constructed in + /// parallel from iterators over (preferably, though not necessarily) disjoint leaf ranges, and + /// [`ShardTree::insert_tree`] may be used to insert those subtrees into the `ShardTree` in + /// arbitrary order. + /// + /// * `position_range` - The range of leaf positions at which values will be inserted. This + /// range is also used to place an upper bound on the number of items that will be consumed + /// from the `values` iterator. + /// * `prune_below` - Nodes with [`EPHEMERAL`] retention that are not required to be retained + /// in order to construct a witness for a marked node or to make it possible to rewind to a + /// checkpointed node may be pruned so long as their address is at less than the specified + /// level. + /// * `values` The iterator of `(H, Retention)` pairs from which to construct the tree. + pub fn from_iter)>>( + position_range: Range, + prune_below: Level, + mut values: I, + ) -> Option> { + // Unite two subtrees by either adding a parent node, or a leaf containing the Merkle root + // of such a parent if both nodes are ephemeral leaves. + // + // `unite` is only called when both root addrs have the same parent. `batch_insert` never + // constructs Nil nodes, so we don't create any incomplete root information here. + fn unite( + lroot: LocatedPrunableTree, + rroot: LocatedPrunableTree, + prune_below: Level, + ) -> LocatedTree>, (H, RetentionFlags)> { + LocatedTree { + root_addr: lroot.root_addr.parent(), + root: if lroot.root_addr.level() < prune_below { + Tree::unite(lroot.root_addr.level(), None, lroot.root, rroot.root) + } else { + Tree(Node::Parent { + ann: None, + left: Rc::new(lroot.root), + right: Rc::new(rroot.root), + }) + }, + } + } + + // Builds a single tree from the provided stack of subtrees, which must be non-overlapping + // and in position order. Returns the resulting tree, a flag indicating whether the + // resulting tree contains a `MARKED` node, and the vector of [`IncompleteAt`] values for + // [`Node::Nil`] nodes that were introduced in the process of constructing the tree. + fn build_minimal_tree( + mut xs: Vec<(LocatedPrunableTree, bool)>, + prune_below: Level, + ) -> Option<(LocatedPrunableTree, bool, Vec)> { + // First, consume the stack from the right, building up a single tree + // until we can't combine any more. + if let Some((mut cur, mut contains_marked)) = xs.pop() { + let mut incomplete = vec![]; + while let Some((top, top_marked)) = xs.pop() { + while cur.root_addr.level() < top.root_addr.level() { + let sibling_addr = cur.root_addr.sibling(); + incomplete.push(IncompleteAt { + address: sibling_addr, + required_for_witness: top_marked, + }); + cur = unite( + cur, + LocatedTree { + root_addr: sibling_addr, + root: Tree(Node::Nil), + }, + prune_below, + ); + } + + if cur.root_addr.level() == top.root_addr.level() { + contains_marked = contains_marked || top_marked; + if cur.root_addr.is_right_child() { + // We have a left child and a right child, so unite them. + cur = unite(top, cur, prune_below); + } else { + // This is a left child, so we build it up one more level and then + // we've merged as much as we can from the right and need to work from + // the left + xs.push((top, top_marked)); + let sibling_addr = cur.root_addr.sibling(); + incomplete.push(IncompleteAt { + address: sibling_addr, + required_for_witness: top_marked, + }); + cur = unite( + cur, + LocatedTree { + root_addr: sibling_addr, + root: Tree(Node::Nil), + }, + prune_below, + ); + break; + } + } else { + // top.root_addr.level < cur.root_addr.level, so we've merged as much as we + // can from the right and now need to work from the left. + xs.push((top, top_marked)); + break; + } + } + + // push our accumulated max-height right hand node back on to the stack. + xs.push((cur, contains_marked)); + + // From the stack of subtrees, construct a single sparse tree that can be + // inserted/merged into the existing tree + let res_tree = xs.into_iter().fold( + None, + |acc: Option>, (next_tree, next_marked)| { + if let Some(mut prev_tree) = acc { + // add nil branches to build up the left tree until we can merge it + // with the right + while prev_tree.root_addr.level() < next_tree.root_addr.level() { + let sibling_addr = prev_tree.root_addr.sibling(); + contains_marked = contains_marked || next_marked; + incomplete.push(IncompleteAt { + address: sibling_addr, + required_for_witness: next_marked, + }); + prev_tree = unite( + LocatedTree { + root_addr: sibling_addr, + root: Tree(Node::Nil), + }, + prev_tree, + prune_below, + ); + } + + // at this point, prev_tree.level == next_tree.level + Some(unite(prev_tree, next_tree, prune_below)) + } else { + Some(next_tree) + } + }, + ); + + res_tree.map(|t| (t, contains_marked, incomplete)) + } else { + None + } + } + + // A stack of complete subtrees to be inserted as descendants into the subtree labeled + // with the addresses at which they will be inserted, along with their root hashes. + let mut fragments: Vec<(Self, bool)> = vec![]; + let mut position = position_range.start; + let mut checkpoints: BTreeMap = BTreeMap::new(); + while position < position_range.end { + if let Some((value, retention)) = values.next() { + if let Retention::Checkpoint { id, .. } = &retention { + checkpoints.insert(id.clone(), position); + } + + let rflags = RetentionFlags::from(retention); + let mut subtree = LocatedTree { + root_addr: Address::from(position), + root: Tree(Node::Leaf { + value: (value.clone(), rflags), + }), + }; + + if position.is_odd() { + // At odd positions, we are completing a subtree and so we unite fragments + // up the stack until we get the largest possible subtree + while let Some((potential_sibling, marked)) = fragments.pop() { + if potential_sibling.root_addr.parent() == subtree.root_addr.parent() { + subtree = unite(potential_sibling, subtree, prune_below); + } else { + // this is not a sibling node, so we push it back on to the stack + // and are done + fragments.push((potential_sibling, marked)); + break; + } + } + } + + fragments.push((subtree, rflags.is_marked())); + position += 1; + } else { + break; + } + } + + build_minimal_tree(fragments, prune_below).map( + |(to_insert, contains_marked, incomplete)| BatchInsertionResult { + subtree: to_insert, + contains_marked, + incomplete, + max_insert_position: Some(position - 1), + checkpoints, + remainder: values, + }, + ) + } + + /// Put a range of values into the subtree by consuming the given iterator, starting at the + /// specified position. + /// + /// The start position must exist within the position range of this subtree. If the position at + /// the end of the iterator is outside of the subtree's range, the unconsumed part of the + /// iterator will be returned as part of the result. + /// + /// Returns `Ok(None)` if the provided iterator is empty, `Ok(Some)` if + /// values were successfully inserted, or an error if the start position provided is outside + /// of this tree's position range or if a conflict with an existing subtree root is detected. + pub fn batch_insert)>, E>( + &self, + start: Position, + values: I, + ) -> Result>, InsertionError> { + let subtree_range = self.root_addr.position_range(); + let contains_start = subtree_range.contains(&start); + if contains_start { + let position_range = Range { + start, + end: subtree_range.end, + }; + Self::from_iter(position_range, self.root_addr.level(), values) + .map(|mut res| { + let (subtree, mut incomplete) = self + .clone() + .insert_subtree(res.subtree, res.contains_marked)?; + res.subtree = subtree; + res.incomplete.append(&mut incomplete); + Ok(res) + }) + .transpose() + } else { + Err(InsertionError::OutOfRange(subtree_range)) + } + } + + /// Clears the specified retention flags at all positions specified, pruning any branches + /// that no longer need to be retained. + pub fn clear_flags(&self, to_clear: BTreeMap) -> Self { + fn go( + to_clear: &[(Position, RetentionFlags)], + root_addr: Address, + root: &PrunableTree, + ) -> PrunableTree { + if to_clear.is_empty() { + // nothing to do, so we just return the root + root.clone() + } else { + match root { + Tree(Node::Parent { ann, left, right }) => { + let (l_addr, r_addr) = root_addr.children().unwrap(); + + let p = to_clear.partition_point(|(p, _)| p < &l_addr.position_range_end()); + Tree::unite( + l_addr.level(), + ann.clone(), + go(&to_clear[0..p], l_addr, left), + go(&to_clear[p..], r_addr, right), + ) + } + Tree(Node::Leaf { value: (h, r) }) => { + // When we reach a leaf, we should be down to just a single position + // which should correspond to the last level-0 child of the address's + // subtree range; if it's a checkpoint this will always be the case for + // a partially-pruned branch, and if it's a marked node then it will + // be a level-0 leaf. + match to_clear { + [(pos, flags)] => { + assert_eq!(*pos, root_addr.max_position()); + Tree(Node::Leaf { + value: (h.clone(), *r & !*flags), + }) + } + _ => { + panic!("Tree state inconsistent with checkpoints."); + } + } + } + Tree(Node::Nil) => Tree(Node::Nil), + } + } + } + + let to_clear = to_clear.into_iter().collect::>(); + Self { + root_addr: self.root_addr, + root: go(&to_clear, self.root_addr, &self.root), + } + } +} + +/// An enumeration of possible checkpoint locations. +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum TreeState { + /// Checkpoints of the empty tree. + Empty, + /// Checkpoint at a (possibly pruned) leaf state corresponding to the + /// wrapped leaf position. + AtPosition(Position), +} + +#[derive(Clone, Debug)] +pub struct Checkpoint { + tree_state: TreeState, + marks_removed: BTreeSet, +} + +impl Checkpoint { + pub fn tree_empty() -> Self { + Checkpoint { + tree_state: TreeState::Empty, + marks_removed: BTreeSet::new(), + } + } + + pub fn at_position(position: Position) -> Self { + Checkpoint { + tree_state: TreeState::AtPosition(position), + marks_removed: BTreeSet::new(), + } + } + + pub fn is_tree_empty(&self) -> bool { + matches!(self.tree_state, TreeState::Empty) + } + + pub fn position(&self) -> Option { + match self.tree_state { + TreeState::Empty => None, + TreeState::AtPosition(pos) => Some(pos), + } + } +} + +/// A capability for storage of fragment subtrees of the `ShardTree` type. +/// +/// All fragment subtrees must have roots at level `SHARD_HEIGHT - 1` +pub trait ShardStore { + type Error; + + /// Returns the subtree at the given root address, if any such subtree exists. + fn get_shard(&self, shard_root: Address) -> Option<&LocatedPrunableTree>; + + /// Returns the subtree containing the maximum inserted leaf position. + fn last_shard(&self) -> Option<&LocatedPrunableTree>; + + /// Inserts or replaces the subtree having the same root address as the provided tree. + /// + /// Implementations of this method MUST enforce the constraint that the root address + /// of the provided subtree has level `SHARD_HEIGHT - 1`. + fn put_shard(&mut self, subtree: LocatedPrunableTree) -> Result<(), Self::Error>; + + /// Returns the vector of addresses corresponding to the roots of subtrees stored in this + /// store. + fn get_shard_roots(&self) -> Vec
; + + /// Removes subtrees from the underlying store having root addresses at indices greater + /// than or equal to that of the specified address. + /// + /// Implementations of this method MUST enforce the constraint that the root address + /// provided has level `SHARD_HEIGHT - 1`. + fn truncate(&mut self, from: Address) -> Result<(), Self::Error>; +} + +impl ShardStore for Vec> { + type Error = Infallible; + + fn get_shard(&self, shard_root: Address) -> Option<&LocatedPrunableTree> { + self.get(shard_root.index()) + } + + fn last_shard(&self) -> Option<&LocatedPrunableTree> { + self.last() + } + + fn put_shard(&mut self, subtree: LocatedPrunableTree) -> Result<(), Self::Error> { + let subtree_addr = subtree.root_addr; + for subtree_idx in self.last().map_or(0, |s| s.root_addr.index() + 1)..=subtree_addr.index() + { + self.push(LocatedTree { + root_addr: Address::from_parts(subtree_addr.level(), subtree_idx), + root: Tree(Node::Nil), + }) + } + self[subtree_addr.index()] = subtree; + Ok(()) + } + + fn get_shard_roots(&self) -> Vec
{ + self.iter().map(|s| s.root_addr).collect() + } + + fn truncate(&mut self, from: Address) -> Result<(), Self::Error> { + self.truncate(from.index()); + Ok(()) + } +} + +/// A sparse binary Merkle tree of the specified depth, represented as an ordered collection of +/// subtrees (shards) of a given maximum height. +/// +/// This tree maintains a collection of "checkpoints" which represent positions, usually near the +/// front of the tree, that are maintained such that it's possible to truncate nodes to the right +/// of the specified position. +#[derive(Debug)] +pub struct ShardTree, const DEPTH: u8, const SHARD_HEIGHT: u8> { + /// The vector of tree shards. + store: S, + /// The maximum number of checkpoints to retain before pruning. + max_checkpoints: usize, + /// An ordered map from checkpoint identifier to checkpoint. + checkpoints: BTreeMap, + // /// TODO: Add a tree that is used to cache the known roots of subtrees in the "cap" of nodes between + // /// `SHARD_HEIGHT` and `DEPTH` that are otherwise not directly represented in the tree. This + // /// cache will be automatically updated when computing roots and witnesses. Leaf nodes are empty + // /// because the annotation slot is consistently used to store the subtree hashes at each node. + // cap_cache: Tree>, ()> + _hash_type: PhantomData, +} + +impl< + H: Hashable + Clone + PartialEq, + C: Clone + Ord, + S: ShardStore, + const DEPTH: u8, + const SHARD_HEIGHT: u8, + > ShardTree +{ + /// Creates a new empty tree. + pub fn new(store: S, max_checkpoints: usize, initial_checkpoint_id: C) -> Self { + Self { + store, + max_checkpoints, + checkpoints: BTreeMap::from([(initial_checkpoint_id, Checkpoint::tree_empty())]), + //cap_cache: Tree(None, ()) + _hash_type: PhantomData, + } + } + + /// Returns the root address of the tree. + pub fn root_addr() -> Address { + Address::from_parts(Level::from(DEPTH), 0) + } + + /// Returns the fixed level of subtree roots within the vector of subtrees used as this tree's + /// representation. + pub fn subtree_level() -> Level { + Level::from(SHARD_HEIGHT - 1) + } + + /// Returns the position and checkpoint count for each checkpointed position in the tree. + pub fn checkpoints(&self) -> &BTreeMap { + &self.checkpoints + } + + /// Returns the leaf value at the specified position, if it is a marked leaf. + pub fn get_marked_leaf(&self, position: Position) -> Option<&H> { + self.store + .get_shard(Address::above_position(Self::subtree_level(), position)) + .and_then(|t| t.value_at_position(position)) + .and_then(|(v, r)| if r.is_marked() { Some(v) } else { None }) + } + + /// Returns the positions of marked leaves in the tree. + pub fn marked_positions(&self) -> BTreeSet { + let mut result = BTreeSet::new(); + for subtree_addr in &self.store.get_shard_roots() { + if let Some(subtree) = self.store.get_shard(*subtree_addr) { + result.append(&mut subtree.marked_positions()); + } + } + result + } + + /// Inserts a new root into the tree at the given address. + /// + /// This will pad from the left until the tree's subtrees vector contains enough trees to reach + /// the specified address, which must be at the [`Self::subtree_level`] level. If a subtree + /// already exists at this address, its root will be annotated with the specified hash value. + /// + /// This will return an error if the specified hash conflicts with any existing annotation. + pub fn put_root(&mut self, addr: Address, value: H) -> Result<(), InsertionError> { + let updated_subtree = match self.store.get_shard(addr) { + Some(s) if !s.root.is_nil() => s.root.node_value().map_or_else( + || { + Ok(Some( + s.clone().reannotate_root(Some(Rc::new(value.clone()))), + )) + }, + |v| { + if v == &value { + // the existing root is already correctly annotated, so no need to + // do anything + Ok(None) + } else { + // the provided value conflicts with the existing root value + Err(InsertionError::Conflict(addr)) + } + }, + ), + _ => { + // there is no existing subtree root, so construct a new one. + Ok(Some(LocatedTree { + root_addr: addr, + root: Tree(Node::Leaf { + value: (value, RetentionFlags::EPHEMERAL), + }), + })) + } + }?; + + if let Some(s) = updated_subtree { + self.store.put_shard(s).map_err(InsertionError::Storage)?; + } + + Ok(()) + } + + /// Append a single value at the first available position in the tree. + /// + /// Prefer to use [`Self::batch_insert`] when appending multiple values, as these operations + /// require fewer traversals of the tree than are necessary when performing multiple sequential + /// calls to [`Self::append`]. + pub fn append( + &mut self, + value: H, + retention: Retention, + ) -> Result<(), InsertionError> { + if let Retention::Checkpoint { id, .. } = &retention { + if self.checkpoints.keys().last() >= Some(id) { + return Err(InsertionError::CheckpointOutOfOrder); + } + } + + let (append_result, position, checkpoint_id) = + if let Some(subtree) = self.store.last_shard() { + if subtree.root.is_complete() { + let addr = subtree.root_addr; + + if addr.index() + 1 >= 0x1 << (SHARD_HEIGHT - 1) { + return Err(InsertionError::OutOfRange(addr.position_range())); + } else { + LocatedTree::empty(addr.next_at_level()).append(value, retention)? + } + } else { + subtree.append(value, retention)? + } + } else { + let root_addr = Address::from_parts(Self::subtree_level(), 0); + LocatedTree::empty(root_addr).append(value, retention)? + }; + + self.store + .put_shard(append_result) + .map_err(InsertionError::Storage)?; + if let Some(c) = checkpoint_id { + self.checkpoints + .insert(c, Checkpoint::at_position(position)); + } + + self.prune_excess_checkpoints() + .map_err(InsertionError::Storage)?; + + Ok(()) + } + + /// Put a range of values into the subtree to fill leaves starting from the given position. + /// + /// This operation will pad the tree until it contains enough subtrees to reach the starting + /// position. It will fully consume the provided iterator, constructing successive subtrees + /// until no more values are available. It aggressively prunes the tree as it goes, retaining + /// only nodes that either have [`MARKED`] retention, are required to construct a witness for + /// such marked nodes, or that must be retained in order to make it possible to truncate the + /// tree to any position with [`CHECKPOINT`] retention. + /// + /// This operation returns the final position at which a leaf was inserted, and the vector of + /// [`IncompleteAt`] values that identify addresses at which [`Node::Nil`] nodes were + /// introduced to the tree, as well as whether or not those newly introduced nodes will need to + /// be filled with values in order to produce witnesses for inserted leaves with [`MARKED`] + /// retention. + #[allow(clippy::type_complexity)] + pub fn batch_insert)>>( + &mut self, + mut start: Position, + values: I, + ) -> Result)>, InsertionError> { + let mut values = values.peekable(); + let mut subtree_root_addr = Address::above_position(Self::subtree_level(), start); + let mut max_insert_position = None; + let mut all_incomplete = vec![]; + loop { + if values.peek().is_some() { + let empty = LocatedTree::empty(subtree_root_addr); + let mut res = self + .store + .get_shard(subtree_root_addr) + .unwrap_or(&empty) + .batch_insert(start, values)? + .expect( + "Iterator containing leaf values to insert was verified to be nonempty.", + ); + self.store + .put_shard(res.subtree) + .map_err(InsertionError::Storage)?; + for (id, position) in res.checkpoints.into_iter() { + self.checkpoints + .insert(id, Checkpoint::at_position(position)); + } + + values = res.remainder; + subtree_root_addr = subtree_root_addr.next_at_level(); + max_insert_position = res.max_insert_position; + start = max_insert_position.unwrap() + 1; + all_incomplete.append(&mut res.incomplete); + } else { + break; + } + } + + self.prune_excess_checkpoints() + .map_err(InsertionError::Storage)?; + + Ok(max_insert_position.map(|p| (p, all_incomplete))) + } + + /// Insert a tree by decomposing it into its [`SHARD_HEIGHT`] or smaller parts (if necessary) + /// and inserting those at their appropriate locations. + pub fn insert_tree( + &mut self, + tree: LocatedPrunableTree, + ) -> Result, InsertionError> { + let mut all_incomplete = vec![]; + for subtree in tree.decompose_to_level(Self::subtree_level()).into_iter() { + let root_addr = subtree.root_addr; + let contains_marked = subtree.root.contains_marked(); + let empty = LocatedTree::empty(root_addr); + let (new_subtree, mut incomplete) = self + .store + .get_shard(root_addr) + .unwrap_or(&empty) + .insert_subtree(subtree, contains_marked)?; + self.store + .put_shard(new_subtree) + .map_err(InsertionError::Storage)?; + all_incomplete.append(&mut incomplete); + } + Ok(all_incomplete) + } + + /// Adds a checkpoint at the rightmost leaf state of the tree. + pub fn checkpoint(&mut self, checkpoint_id: C) -> bool { + fn go( + root_addr: Address, + root: &PrunableTree, + ) -> Option<(PrunableTree, Position)> { + match root { + Tree(Node::Parent { ann, left, right }) => { + let (l_addr, r_addr) = root_addr.children().unwrap(); + go(r_addr, right).map_or_else( + || { + go(l_addr, left).map(|(new_left, pos)| { + ( + Tree::unite( + l_addr.level(), + ann.clone(), + new_left, + Tree(Node::Nil), + ), + pos, + ) + }) + }, + |(new_right, pos)| { + Some(( + Tree::unite( + l_addr.level(), + ann.clone(), + left.as_ref().clone(), + new_right, + ), + pos, + )) + }, + ) + } + Tree(Node::Leaf { value: (h, r) }) => Some(( + Tree(Node::Leaf { + value: (h.clone(), *r | RetentionFlags::CHECKPOINT), + }), + root_addr.max_position(), + )), + Tree(Node::Nil) => None, + } + } + + // checkpoint identifiers at the tip must be in increasing order + if self.checkpoints.keys().last() >= Some(&checkpoint_id) { + return false; + } + + // Search backward from the end of the subtrees iter to find a non-empty subtree. + // When we find one, update the subtree to add the `CHECKPOINT` flag to the + // right-most leaf (which need not be a level-0 leaf; it's fine to rewind to a + // pruned state). + for subtree_addr in self.store.get_shard_roots().iter().rev() { + let subtree = self.store.get_shard(*subtree_addr).expect( + "The store should not return root addresses for subtrees it cannot provide.", + ); + if let Some((replacement, checkpoint_position)) = go(*subtree_addr, &subtree.root) { + if self + .store + .put_shard(LocatedTree { + root_addr: *subtree_addr, + root: replacement, + }) + .is_err() + { + return false; + } + self.checkpoints + .insert(checkpoint_id, Checkpoint::at_position(checkpoint_position)); + + // early return once we've updated the tree state + return self + .prune_excess_checkpoints() + .map_err(InsertionError::Storage) + .is_ok(); + } + } + + self.checkpoints + .insert(checkpoint_id, Checkpoint::tree_empty()); + + // TODO: it should not be necessary to do this on every checkpoint, + // but currently that's how the reference tree behaves so we're maintaining + // those semantics for test compatibility. + self.prune_excess_checkpoints() + .map_err(InsertionError::Storage) + .is_ok() + } + + fn prune_excess_checkpoints(&mut self) -> Result<(), S::Error> { + if self.checkpoints.len() > self.max_checkpoints { + // Batch removals by subtree & create a list of the checkpoint identifiers that + // will be removed from the checkpoints map. + let mut checkpoints_to_delete = vec![]; + let mut clear_positions: BTreeMap> = + BTreeMap::new(); + for (cid, checkpoint) in self + .checkpoints + .iter() + .take(self.checkpoints.len() - self.max_checkpoints) + { + checkpoints_to_delete.push(cid.clone()); + + let mut clear_at = |pos, flags_to_clear| { + let subtree_addr = Address::above_position(Self::subtree_level(), pos); + clear_positions + .entry(subtree_addr) + .and_modify(|to_clear| { + to_clear + .entry(pos) + .and_modify(|flags| *flags |= flags_to_clear) + .or_insert(flags_to_clear); + }) + .or_insert_with(|| BTreeMap::from([(pos, flags_to_clear)])); + }; + + // clear the checkpoint leaf + if let TreeState::AtPosition(pos) = checkpoint.tree_state { + clear_at(pos, RetentionFlags::CHECKPOINT) + } + + // clear the leaves that have been marked for removal + for unmark_pos in checkpoint.marks_removed.iter() { + clear_at(*unmark_pos, RetentionFlags::MARKED) + } + } + + // Prune each affected subtree + for (subtree_addr, positions) in clear_positions.into_iter() { + let cleared = self + .store + .get_shard(subtree_addr) + .map(|subtree| subtree.clear_flags(positions)); + if let Some(cleared) = cleared { + self.store.put_shard(cleared)?; + } + } + + // Now that the leaves have been pruned, actually remove the checkpoints + for c in checkpoints_to_delete { + self.checkpoints.remove(&c); + } + } + + Ok(()) + } + + /// Truncates the tree, discarding all information after the checkpoint at the specified depth. + /// + /// This will also discard all checkpoints with depth <= the specified depth. Returns `true` + /// if the truncation succeeds or has no effect, or `false` if no checkpoint exists at the + /// specified depth. + pub fn truncate_removing_checkpoint(&mut self, checkpoint_depth: usize) -> bool { + if checkpoint_depth == 0 { + true + } else if self.checkpoints.len() > 1 { + match self.checkpoint_at_depth(checkpoint_depth) { + Some((checkpoint_id, c)) => { + let checkpoint_id = checkpoint_id.clone(); + match c.tree_state { + TreeState::Empty => { + if (self + .store + .truncate(Address::from_parts(Self::subtree_level(), 0))) + .is_err() + { + return false; + } + self.checkpoints.split_off(&checkpoint_id); + true + } + TreeState::AtPosition(position) => { + let subtree_addr = + Address::above_position(Self::subtree_level(), position); + let replacement = self + .store + .get_shard(subtree_addr) + .and_then(|s| s.truncate_to_position(position)); + match replacement { + Some(truncated) => { + if self.store.truncate(subtree_addr).is_err() + || self.store.put_shard(truncated).is_err() + { + false + } else { + self.checkpoints.split_off(&checkpoint_id); + true + } + } + None => false, + } + } + } + } + None => false, + } + } else { + // do not remove the first checkpoint. + false + } + } + + /// Computes the root of any subtree of this tree rooted at the given address, with the overall + /// tree truncated to the specified position. + /// + /// The specified address is not required to be at any particular level, though it cannot + /// exceed the level corresponding to the maximum depth of the tree. Nodes to the right of the + /// given position, and parents of such nodes, will be replaced by the empty root for the + /// associated level. + /// + /// Use [`Self::root_at_checkpoint`] to obtain the root of the overall tree. + pub fn root(&self, address: Address, truncate_at: Position) -> Result { + match address.context(Self::subtree_level()) { + Either::Left(subtree_addr) => { + // The requested root address is fully contained within one of the subtrees. + if truncate_at <= address.position_range_start() { + Ok(H::empty_root(address.level())) + } else { + // get the child of the subtree with its root at `address` + self.store + .get_shard(subtree_addr) + .ok_or_else(|| vec![subtree_addr]) + .and_then(|subtree| { + subtree.subtree(address).map_or_else( + || Err(vec![address]), + |child| child.root_hash(truncate_at), + ) + }) + .map_err(QueryError::TreeIncomplete) + } + } + Either::Right(subtree_range) => { + // The requested root requires hashing together the roots of several subtrees. + let mut root_stack = vec![]; + let mut incomplete = vec![]; + + for subtree_idx in subtree_range { + let subtree_addr = Address::from_parts(Self::subtree_level(), subtree_idx); + if truncate_at <= subtree_addr.position_range_start() { + break; + } + + let subtree_root = self + .store + .get_shard(subtree_addr) + .ok_or_else(|| vec![subtree_addr]) + .and_then(|s| s.root_hash(truncate_at)); + + match subtree_root { + Ok(mut cur_hash) => { + if subtree_addr.index() % 2 == 0 { + root_stack.push((subtree_addr, cur_hash)) + } else { + let mut cur_addr = subtree_addr; + while let Some((addr, hash)) = root_stack.pop() { + if addr.parent() == cur_addr.parent() { + cur_hash = H::combine(cur_addr.level(), &hash, &cur_hash); + cur_addr = cur_addr.parent(); + } else { + root_stack.push((addr, hash)); + break; + } + } + root_stack.push((cur_addr, cur_hash)); + } + } + Err(mut new_incomplete) => { + // Accumulate incomplete root information and continue, so that we can + // return the complete set of incomplete results. + incomplete.append(&mut new_incomplete); + } + } + } + + if !incomplete.is_empty() { + return Err(QueryError::TreeIncomplete(incomplete)); + } + + // Now hash with empty roots to obtain the root at maximum height + if let Some((mut cur_addr, mut cur_hash)) = root_stack.pop() { + while let Some((addr, hash)) = root_stack.pop() { + while addr.level() > cur_addr.level() { + cur_hash = H::combine( + cur_addr.level(), + &cur_hash, + &H::empty_root(cur_addr.level()), + ); + cur_addr = cur_addr.parent(); + } + cur_hash = H::combine(cur_addr.level(), &hash, &cur_hash); + cur_addr = cur_addr.parent(); + } + + while cur_addr.level() < address.level() { + cur_hash = H::combine( + cur_addr.level(), + &cur_hash, + &H::empty_root(cur_addr.level()), + ); + cur_addr = cur_addr.parent(); + } + + Ok(cur_hash) + } else { + // if the stack is empty, we just return the default root at max height + Ok(H::empty_root(address.level())) + } + } + } + } + + /// Returns the position of the checkpoint, if any, along with the number of subsequent + /// checkpoints at the same position. Returns `None` if `checkpoint_depth == 0` or if + /// insufficient checkpoints exist to seek back to the requested depth. + pub fn checkpoint_at_depth(&self, checkpoint_depth: usize) -> Option<(&C, &Checkpoint)> { + if checkpoint_depth == 0 { + None + } else { + self.checkpoints.iter().rev().nth(checkpoint_depth - 1) + } + } + + /// Returns the position of the rightmost leaf inserted as of the given checkpoint. + /// + /// Returns the maximum leaf position if `checkpoint_depth == 0` (or `Ok(None)` in this + /// case if the tree is empty) or an error if the checkpointed position cannot be restored + /// because it has been pruned. Note that no actual level-0 leaf may exist at this position. + pub fn max_leaf_position( + &self, + checkpoint_depth: usize, + ) -> Result, QueryError> { + if checkpoint_depth == 0 { + // TODO: This relies on the invariant that the last shard in the subtrees vector is + // never created without a leaf then being added to it. However, this may be a + // difficult invariant to maintain when adding empty roots, so perhaps we need a + // better way of tracking the actual max position of the tree; we might want to + // just store it directly. + Ok(self.store.last_shard().and_then(|t| t.max_position())) + } else { + match self.checkpoint_at_depth(checkpoint_depth) { + Some((_, c)) => Ok(c.position()), + None => { + // There is no checkpoint at the specified depth, so we report it as pruned. + Err(QueryError::CheckpointPruned) + } + } + } + } + + /// Computes the root of the tree as of the checkpointed position at the specified depth. + /// + /// Returns the root as of the most recently appended leaf if `checkpoint_depth == 0`. Note + /// that if the most recently appended leaf is also a checkpoint, this will return the same + /// result as `checkpoint_depth == 1`. + pub fn root_at_checkpoint(&self, checkpoint_depth: usize) -> Result { + self.max_leaf_position(checkpoint_depth)?.map_or_else( + || Ok(H::empty_root(Self::root_addr().level())), + |pos| self.root(Self::root_addr(), pos + 1), + ) + } + + /// Computes the witness for the leaf at the specified position. + /// + /// Returns the witness as of the most recently appended leaf if `checkpoint_depth == 0`. Note + /// that if the most recently appended leaf is also a checkpoint, this will return the same + /// result as `checkpoint_depth == 1`. + pub fn witness( + &self, + position: Position, + checkpoint_depth: usize, + ) -> Result, QueryError> { + let max_leaf_position = self + .max_leaf_position(checkpoint_depth) + .and_then(|v| v.ok_or_else(|| QueryError::TreeIncomplete(vec![Self::root_addr()])))?; + + if position > max_leaf_position { + Err(QueryError::NotContained(Address::from_parts( + Level::from(0), + position.into(), + ))) + } else { + let subtree_addr = Address::above_position(Self::subtree_level(), position); + + // compute the witness for the specified position up to the subtree root + let mut witness = self.store.get_shard(subtree_addr).map_or_else( + || Err(QueryError::TreeIncomplete(vec![subtree_addr])), + |subtree| subtree.witness(position, max_leaf_position + 1), + )?; + + // compute the remaining parts of the witness up to the root + let root_addr = Self::root_addr(); + let mut cur_addr = subtree_addr; + while cur_addr != root_addr { + witness.push(self.root(cur_addr.sibling(), max_leaf_position + 1)?); + cur_addr = cur_addr.parent(); + } + + Ok(witness) + } + } + + /// Make a marked leaf at a position eligible to be pruned. + /// + /// If the checkpoint associated with the specified identifier does not exist because the + /// corresponding checkpoint would have been more than `max_checkpoints` deep, the removal + /// is recorded as of the first existing checkpoint and the associated leaves will be pruned + /// when that checkpoint is subsequently removed. + pub fn remove_mark(&mut self, position: Position, as_of_checkpoint: &C) -> bool { + if self.get_marked_leaf(position).is_some() { + if let Some(checkpoint) = self.checkpoints.get_mut(as_of_checkpoint) { + checkpoint.marks_removed.insert(position); + return true; + } + + if let Some((_, checkpoint)) = self.checkpoints.iter_mut().next() { + checkpoint.marks_removed.insert(position); + return true; + } + } + + false + } +} + +// We need an applicative functor for Result for this function so that we can correctly +// accumulate errors, but we don't have one so we just write a special- cased version here. +fn accumulate_result_with( + left: Result>, + right: Result>, + combine_success: impl FnOnce(A, B) -> C, +) -> Result> { + match (left, right) { + (Ok(a), Ok(b)) => Ok(combine_success(a, b)), + (Err(mut xs), Err(mut ys)) => { + xs.append(&mut ys); + Err(xs) + } + (Ok(_), Err(xs)) => Err(xs), + (Err(xs), Ok(_)) => Err(xs), + } +} + +#[cfg(any(bench, test, feature = "test-dependencies"))] +pub mod testing { + use super::*; + use incrementalmerkletree::Hashable; + use proptest::prelude::*; + use proptest::sample::select; + + pub fn arb_retention_flags() -> impl Strategy { + select(vec![ + RetentionFlags::EPHEMERAL, + RetentionFlags::CHECKPOINT, + RetentionFlags::MARKED, + RetentionFlags::MARKED | RetentionFlags::CHECKPOINT, + ]) + } + + pub fn arb_tree( + arb_annotation: A, + arb_leaf: V, + depth: u32, + size: u32, + ) -> impl Strategy> + where + A::Value: Clone + 'static, + V::Value: Hashable + Clone + 'static, + { + let leaf = prop_oneof![ + Just(Tree(Node::Nil)), + arb_leaf.prop_map(|value| Tree(Node::Leaf { value })) + ]; + + leaf.prop_recursive(depth, size, 2, move |inner| { + (arb_annotation.clone(), inner.clone(), inner).prop_map(|(ann, left, right)| { + Tree(if left.is_nil() && right.is_nil() { + Node::Nil + } else { + Node::Parent { + ann, + left: Rc::new(left), + right: Rc::new(right), + } + }) + }) + }) + } +} + +#[cfg(test)] +mod tests { + use crate::{ + IncompleteAt, LocatedPrunableTree, LocatedTree, Node, PrunableTree, QueryError, + RetentionFlags, ShardStore, ShardTree, Tree, + }; + use assert_matches::assert_matches; + use core::convert::Infallible; + use incrementalmerkletree::{ + testing::{ + self, arb_operation, check_append, check_checkpoint_rewind, check_operations, + check_rewind_remove_mark, check_root_hashes, check_witnesses, + complete_tree::CompleteTree, CombinedTree, SipHashable, + }, + Address, Hashable, Level, Position, Retention, + }; + use proptest::prelude::*; + use std::collections::BTreeSet; + use std::rc::Rc; + + fn nil() -> Tree { + Tree(Node::Nil) + } + + fn str_leaf(c: &str) -> Tree { + Tree(Node::Leaf { + value: c.to_string(), + }) + } + + fn leaf(value: B) -> Tree { + Tree(Node::Leaf { value }) + } + + fn parent(left: Tree, right: Tree) -> Tree { + Tree(Node::Parent { + ann: A::default(), + left: Rc::new(left), + right: Rc::new(right), + }) + } + + #[test] + fn tree_incomplete_nodes() { + let t: Tree<(), String> = parent(nil(), str_leaf("a")); + assert_eq!( + t.incomplete_nodes(Address::from_parts(Level::from(1), 0)), + vec![Address::from_parts(Level::from(0), 0)] + ); + + let t0 = parent(str_leaf("b"), t.clone()); + assert_eq!( + t0.incomplete_nodes(Address::from_parts(Level::from(2), 1)), + vec![Address::from_parts(Level::from(0), 6)] + ); + + let t1 = parent(nil(), t); + assert_eq!( + t1.incomplete_nodes(Address::from_parts(Level::from(2), 1)), + vec![ + Address::from_parts(Level::from(1), 2), + Address::from_parts(Level::from(0), 6) + ] + ); + } + + #[test] + fn tree_root() { + let t: PrunableTree = parent( + leaf(("a".to_string(), RetentionFlags::EPHEMERAL)), + leaf(("b".to_string(), RetentionFlags::EPHEMERAL)), + ); + + assert_eq!( + t.root_hash(Address::from_parts(Level::from(1), 0), Position::from(2)), + Ok("ab".to_string()) + ); + + let t0 = parent(nil(), t.clone()); + assert_eq!( + t0.root_hash(Address::from_parts(Level::from(2), 0), Position::from(4)), + Err(vec![Address::from_parts(Level::from(1), 0)]) + ); + + // Check root computation with truncation + let t1 = parent(t, nil()); + assert_eq!( + t1.root_hash(Address::from_parts(Level::from(2), 0), Position::from(2)), + Ok("ab__".to_string()) + ); + assert_eq!( + t1.root_hash(Address::from_parts(Level::from(2), 0), Position::from(3)), + Err(vec![Address::from_parts(Level::from(1), 1)]) + ); + } + + #[test] + fn located_prunable_tree_insert_subtree() { + let t: LocatedPrunableTree = LocatedTree { + root_addr: Address::from_parts(3.into(), 1), + root: parent( + leaf(("abcd".to_string(), RetentionFlags::EPHEMERAL)), + parent(nil(), leaf(("gh".to_string(), RetentionFlags::EPHEMERAL))), + ), + }; + + assert_eq!( + t.insert_subtree::( + LocatedTree { + root_addr: Address::from_parts(1.into(), 6), + root: parent(leaf(("e".to_string(), RetentionFlags::MARKED)), nil()) + }, + true + ), + Ok(( + LocatedTree { + root_addr: Address::from_parts(3.into(), 1), + root: parent( + leaf(("abcd".to_string(), RetentionFlags::EPHEMERAL)), + parent( + parent(leaf(("e".to_string(), RetentionFlags::MARKED)), nil()), + leaf(("gh".to_string(), RetentionFlags::EPHEMERAL)) + ) + ) + }, + vec![] + )) + ); + } + + #[test] + fn located_prunable_tree_witness() { + let t: LocatedPrunableTree = LocatedTree { + root_addr: Address::from_parts(3.into(), 0), + root: parent( + leaf(("abcd".to_string(), RetentionFlags::EPHEMERAL)), + parent( + parent( + leaf(("e".to_string(), RetentionFlags::MARKED)), + leaf(("f".to_string(), RetentionFlags::EPHEMERAL)), + ), + leaf(("gh".to_string(), RetentionFlags::EPHEMERAL)), + ), + ), + }; + + assert_eq!( + t.witness(4.into(), 8.into()), + Ok(vec!["f", "gh", "abcd"] + .into_iter() + .map(|s| s.to_string()) + .collect()) + ); + assert_eq!( + t.witness(4.into(), 6.into()), + Ok(vec!["f", "__", "abcd"] + .into_iter() + .map(|s| s.to_string()) + .collect()) + ); + assert_eq!( + t.witness(4.into(), 7.into()), + Err(QueryError::TreeIncomplete(vec![Address::from_parts( + 1.into(), + 3 + )])) + ); + } + + type VecShardStore = Vec>; + + #[test] + fn tree_marked_positions() { + let t: PrunableTree = parent( + leaf(("a".to_string(), RetentionFlags::EPHEMERAL)), + leaf(("b".to_string(), RetentionFlags::MARKED)), + ); + assert_eq!( + t.marked_positions(Address::from_parts(Level::from(1), 0)), + BTreeSet::from([Position::from(1)]) + ); + + let t0 = parent(t.clone(), t); + assert_eq!( + t0.marked_positions(Address::from_parts(Level::from(2), 1)), + BTreeSet::from([Position::from(5), Position::from(7)]) + ); + } + + #[test] + fn tree_prune() { + let t: PrunableTree = parent( + leaf(("a".to_string(), RetentionFlags::EPHEMERAL)), + leaf(("b".to_string(), RetentionFlags::EPHEMERAL)), + ); + + assert_eq!( + t.clone().prune(Level::from(1)), + leaf(("ab".to_string(), RetentionFlags::EPHEMERAL)) + ); + + let t0 = parent(leaf(("c".to_string(), RetentionFlags::MARKED)), t); + assert_eq!( + t0.prune(Level::from(2)), + parent( + leaf(("c".to_string(), RetentionFlags::MARKED)), + leaf(("ab".to_string(), RetentionFlags::EPHEMERAL)) + ) + ); + } + + #[test] + fn tree_merge_checked() { + let t0: PrunableTree = + parent(leaf(("a".to_string(), RetentionFlags::EPHEMERAL)), nil()); + + let t1: PrunableTree = + parent(nil(), leaf(("b".to_string(), RetentionFlags::EPHEMERAL))); + + assert_eq!( + t0.clone() + .merge_checked(Address::from_parts(1.into(), 0), t1.clone()), + Ok(leaf(("ab".to_string(), RetentionFlags::EPHEMERAL))) + ); + + let t2: PrunableTree = + parent(leaf(("c".to_string(), RetentionFlags::EPHEMERAL)), nil()); + assert_eq!( + t0.clone() + .merge_checked(Address::from_parts(1.into(), 0), t2.clone()), + Err(Address::from_parts(0.into(), 0)) + ); + + let t3: PrunableTree = parent(t0, t2); + let t4: PrunableTree = parent(t1.clone(), t1); + + assert_eq!( + t3.merge_checked(Address::from_parts(2.into(), 0), t4), + Ok(leaf(("abcb".to_string(), RetentionFlags::EPHEMERAL))) + ); + } + + #[test] + fn located_tree() { + let l = parent(str_leaf("a"), str_leaf("b")); + let r = parent(str_leaf("c"), str_leaf("d")); + + let t: LocatedTree<(), String> = LocatedTree { + root_addr: Address::from_parts(2.into(), 1), + root: parent(l.clone(), r.clone()), + }; + + assert_eq!(t.max_position(), Some(7.into())); + assert_eq!(t.value_at_position(5.into()), Some(&"b".to_string())); + assert_eq!(t.value_at_position(8.into()), None); + assert_eq!(t.subtree(Address::from_parts(0.into(), 1)), None); + assert_eq!(t.subtree(Address::from_parts(3.into(), 0)), None); + + let subtree_addr = Address::from_parts(1.into(), 3); + assert_eq!( + t.subtree(subtree_addr), + Some(LocatedTree { + root_addr: subtree_addr, + root: r.clone() + }) + ); + + assert_eq!( + t.decompose_to_level(1.into()), + vec![ + LocatedTree { + root_addr: Address::from_parts(1.into(), 2), + root: l, + }, + LocatedTree { + root_addr: Address::from_parts(1.into(), 3), + root: r, + } + ] + ); + } + + #[test] + fn located_prunable_tree_insert() { + let tree = LocatedPrunableTree::empty(Address::from_parts(Level::from(2), 0)); + let (base, _, _) = tree + .append::<(), Infallible>("a".to_string(), Retention::Ephemeral) + .unwrap(); + assert_eq!(base.right_filled_root(), Ok("a___".to_string())); + + // Perform an in-order insertion. + let (in_order, pos, _) = base + .append::<(), Infallible>("b".to_string(), Retention::Ephemeral) + .unwrap(); + assert_eq!(pos, 1.into()); + assert_eq!(in_order.right_filled_root(), Ok("ab__".to_string())); + + // On the same tree, perform an out-of-order insertion. + let out_of_order = base + .batch_insert::<(), _, Infallible>( + Position::from(3), + vec![("d".to_string(), Retention::Ephemeral)].into_iter(), + ) + .unwrap() + .unwrap(); + assert_eq!( + out_of_order.subtree, + LocatedPrunableTree { + root_addr: Address::from_parts(2.into(), 0), + root: parent( + parent(leaf(("a".to_string(), RetentionFlags::EPHEMERAL)), nil()), + parent(nil(), leaf(("d".to_string(), RetentionFlags::EPHEMERAL))) + ) + } + ); + + let complete = out_of_order + .subtree + .batch_insert::<(), _, Infallible>( + Position::from(1), + vec![ + ("b".to_string(), Retention::Ephemeral), + ("c".to_string(), Retention::Ephemeral), + ] + .into_iter(), + ) + .unwrap() + .unwrap(); + assert_eq!(complete.subtree.right_filled_root(), Ok("abcd".to_string())); + } + + #[test] + fn shardtree_insertion() { + let mut tree: ShardTree, 4, 3> = + ShardTree::new(vec![], 100, 0); + assert_matches!( + tree.batch_insert( + Position::from(1), + vec![ + ("b".to_string(), Retention::Checkpoint { id: 1, is_marked: false }), + ("c".to_string(), Retention::Ephemeral), + ("d".to_string(), Retention::Marked), + ].into_iter() + ), + Ok(Some((pos, incomplete))) if + pos == Position::from(3) && + incomplete == vec![ + IncompleteAt { + address: Address::from_parts(Level::from(0), 0), + required_for_witness: true + } + ] + ); + + assert_matches!( + tree.root_at_checkpoint(1), + Err(QueryError::TreeIncomplete(v)) if v == vec![Address::from_parts(Level::from(0), 0)] + ); + + assert_matches!( + tree.batch_insert( + Position::from(0), + vec![ + ("a".to_string(), Retention::Ephemeral), + ].into_iter() + ), + Ok(Some((pos, incomplete))) if + pos == Position::from(0) && + incomplete == vec![] + ); + + assert_matches!( + tree.root_at_checkpoint(0), + Ok(h) if h == *"abcd____________" + ); + + assert_matches!( + tree.root_at_checkpoint(1), + Ok(h) if h == *"ab______________" + ); + + assert_matches!( + tree.batch_insert( + Position::from(10), + vec![ + ("k".to_string(), Retention::Ephemeral), + ("l".to_string(), Retention::Checkpoint { id: 2, is_marked: false }), + ("m".to_string(), Retention::Ephemeral), + ].into_iter() + ), + Ok(Some((pos, incomplete))) if + pos == Position::from(12) && + incomplete == vec![ + IncompleteAt { + address: Address::from_parts(Level::from(1), 4), + required_for_witness: false + }, + IncompleteAt { + address: Address::from_parts(Level::from(0), 13), + required_for_witness: false + }, + IncompleteAt { + address: Address::from_parts(Level::from(1), 7), + required_for_witness: false + }, + ] + ); + + assert_matches!( + tree.root_at_checkpoint(0), + // The (0, 13) and (1, 7) incomplete subtrees are + // not considered incomplete here because they appear + // at the tip of the tree. + Err(QueryError::TreeIncomplete(xs)) if xs == vec![ + Address::from_parts(Level::from(2), 1), + Address::from_parts(Level::from(1), 4), + ] + ); + + assert!(tree.truncate_removing_checkpoint(1)); + } + + impl< + H: Hashable + Ord + Clone, + C: Clone + Ord + core::fmt::Debug, + S: ShardStore, + const DEPTH: u8, + const SHARD_HEIGHT: u8, + > testing::Tree for ShardTree + { + fn depth(&self) -> u8 { + DEPTH + } + + fn append(&mut self, value: H, retention: Retention) -> bool { + ShardTree::append(self, value, retention).is_ok() + } + + fn current_position(&self) -> Option { + ShardTree::max_leaf_position(self, 0).ok().flatten() + } + + fn get_marked_leaf(&self, position: Position) -> Option<&H> { + ShardTree::get_marked_leaf(self, position) + } + + fn marked_positions(&self) -> BTreeSet { + ShardTree::marked_positions(self) + } + + fn root(&self, checkpoint_depth: usize) -> Option { + ShardTree::root_at_checkpoint(self, checkpoint_depth).ok() + } + + fn witness(&self, position: Position, checkpoint_depth: usize) -> Option> { + ShardTree::witness(self, position, checkpoint_depth).ok() + } + + fn remove_mark(&mut self, position: Position) -> bool { + if let Some(c) = self.checkpoints.iter().rev().map(|(c, _)| c.clone()).next() { + ShardTree::remove_mark(self, position, &c) + } else { + false + } + } + + fn checkpoint(&mut self, checkpoint_id: C) -> bool { + ShardTree::checkpoint(self, checkpoint_id) + } + + fn rewind(&mut self) -> bool { + ShardTree::truncate_removing_checkpoint(self, 1) + } + } + + #[test] + fn append() { + check_append(|m| { + ShardTree::, 4, 3>::new(vec![], m, 0) + }); + } + + #[test] + fn root_hashes() { + check_root_hashes(|m| { + ShardTree::, 4, 3>::new(vec![], m, 0) + }); + } + + #[test] + fn witnesses() { + check_witnesses(|m| { + ShardTree::, 4, 3>::new(vec![], m, 0) + }); + } + + #[test] + fn checkpoint_rewind() { + check_checkpoint_rewind(|m| { + ShardTree::, 4, 3>::new(vec![], m, 0) + }); + } + + #[test] + fn rewind_remove_mark() { + check_rewind_remove_mark(|m| { + ShardTree::, 4, 3>::new(vec![], m, 0) + }); + } + + // Combined tree tests + #[allow(clippy::type_complexity)] + fn new_combined_tree( + max_checkpoints: usize, + ) -> CombinedTree< + H, + usize, + CompleteTree, + ShardTree, 4, 3>, + > { + CombinedTree::new( + CompleteTree::new(max_checkpoints, 0), + ShardTree::new(vec![], max_checkpoints, 0), + ) + } + + #[test] + fn combined_append() { + check_append(new_combined_tree); + } + + #[test] + fn combined_rewind_remove_mark() { + check_rewind_remove_mark(new_combined_tree); + } + + proptest! { + #![proptest_config(ProptestConfig::with_cases(100000))] + + #[test] + fn check_randomized_u64_ops( + ops in proptest::collection::vec( + arb_operation((0..32u64).prop_map(SipHashable), 0usize..100), + 1..100 + ) + ) { + let tree = new_combined_tree(100); + let indexed_ops = ops.iter().enumerate().map(|(i, op)| op.map_checkpoint_id(|_| i)).collect::>(); + check_operations(tree, &indexed_ops)?; + } + + #[test] + fn check_randomized_str_ops( + ops in proptest::collection::vec( + arb_operation((97u8..123).prop_map(|c| char::from(c).to_string()), 0usize..100), + 1..100 + ) + ) { + let tree = new_combined_tree(100); + let indexed_ops = ops.iter().enumerate().map(|(i, op)| op.map_checkpoint_id(|_| i)).collect::>(); + check_operations(tree, &indexed_ops)?; + } + } +}