diff --git a/merkle/README.md b/merkle/README.md new file mode 100644 index 00000000..c4497836 --- /dev/null +++ b/merkle/README.md @@ -0,0 +1,4 @@ +## Simple Merkle Tree + +For smaller static data structures that don't require immutable snapshots or mutability; +for instance the transactions and validation signatures of a block can be hashed using this simple merkle tree logic. diff --git a/merkle/doc.go b/merkle/doc.go new file mode 100644 index 00000000..da65dd85 --- /dev/null +++ b/merkle/doc.go @@ -0,0 +1,31 @@ +/* +Package merkle computes a deterministic minimal height Merkle tree hash. +If the number of items is not a power of two, some leaves +will be at different levels. Tries to keep both sides of +the tree the same size, but the left may be one greater. + +Use this for short deterministic trees, such as the validator list. +For larger datasets, use IAVLTree. + +Be aware that the current implementation by itself does not prevent +second pre-image attacks. Hence, use this library with caution. +Otherwise you might run into similar issues as, e.g., in early Bitcoin: +https://bitcointalk.org/?topic=102395 + + * + / \ + / \ + / \ + / \ + * * + / \ / \ + / \ / \ + / \ / \ + * * * h6 + / \ / \ / \ + h0 h1 h2 h3 h4 h5 + +TODO(ismail): add 2nd pre-image protection or clarify further on how we use this and why this secure. + +*/ +package merkle \ No newline at end of file diff --git a/merkle/simple_map.go b/merkle/simple_map.go new file mode 100644 index 00000000..cde5924f --- /dev/null +++ b/merkle/simple_map.go @@ -0,0 +1,91 @@ +package merkle + +import ( + "github.com/tendermint/go-crypto/tmhash" + cmn "github.com/tendermint/tmlibs/common" +) + +// Merkle tree from a map. +// Leaves are `hash(key) | hash(value)`. +// Leaves are sorted before Merkle hashing. +type simpleMap struct { + kvs cmn.KVPairs + sorted bool +} + +func newSimpleMap() *simpleMap { + return &simpleMap{ + kvs: nil, + sorted: false, + } +} + +// Set hashes the key and value and appends it to the kv pairs. +func (sm *simpleMap) Set(key string, value Hasher) { + sm.sorted = false + + // Hash the key to blind it... why not? + khash := tmhash.Sum([]byte(key)) + + // And the value is hashed too, so you can + // check for equality with a cached value (say) + // and make a determination to fetch or not. + vhash := value.Hash() + + sm.kvs = append(sm.kvs, cmn.KVPair{ + Key: khash, + Value: vhash, + }) +} + +// Hash Merkle root hash of items sorted by key +// (UNSTABLE: and by value too if duplicate key). +func (sm *simpleMap) Hash() []byte { + sm.Sort() + return hashKVPairs(sm.kvs) +} + +func (sm *simpleMap) Sort() { + if sm.sorted { + return + } + sm.kvs.Sort() + sm.sorted = true +} + +// Returns a copy of sorted KVPairs. +// NOTE these contain the hashed key and value. +func (sm *simpleMap) KVPairs() cmn.KVPairs { + sm.Sort() + kvs := make(cmn.KVPairs, len(sm.kvs)) + copy(kvs, sm.kvs) + return kvs +} + +//---------------------------------------- + +// A local extension to KVPair that can be hashed. +// Key and value are length prefixed and concatenated, +// then hashed. +type kvPair cmn.KVPair + +func (kv kvPair) Hash() []byte { + hasher := tmhash.New() + err := encodeByteSlice(hasher, kv.Key) + if err != nil { + panic(err) + } + err = encodeByteSlice(hasher, kv.Value) + if err != nil { + panic(err) + } + return hasher.Sum(nil) +} + +func hashKVPairs(kvs cmn.KVPairs) []byte { + kvsH := make([]Hasher, len(kvs)) + for i, kvp := range kvs { + kvsH[i] = kvPair(kvp) + } + return SimpleHashFromHashers(kvsH) +} diff --git a/merkle/simple_map_test.go b/merkle/simple_map_test.go new file mode 100644 index 00000000..a89289a8 --- /dev/null +++ b/merkle/simple_map_test.go @@ -0,0 +1,54 @@ +package merkle + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/tendermint/go-crypto/tmhash" +) + +type strHasher string + +func (str strHasher) Hash() []byte { + return tmhash.Sum([]byte(str)) +} + +func TestSimpleMap(t *testing.T) { + { + db := newSimpleMap() + db.Set("key1", strHasher("value1")) + assert.Equal(t, "3dafc06a52039d029be57c75c9d16356a4256ef4", fmt.Sprintf("%x", db.Hash()), "Hash didn't match") + } + { + db := newSimpleMap() + db.Set("key1", strHasher("value2")) + assert.Equal(t, "03eb5cfdff646bc4e80fec844e72fd248a1c6b2c", fmt.Sprintf("%x", db.Hash()), "Hash didn't match") + } + { + db := newSimpleMap() + db.Set("key1", strHasher("value1")) + db.Set("key2", strHasher("value2")) + assert.Equal(t, "acc3971eab8513171cc90ce8b74f368c38f9657d", fmt.Sprintf("%x", db.Hash()), "Hash didn't match") + } + { + db := newSimpleMap() + db.Set("key2", strHasher("value2")) // NOTE: out of order + db.Set("key1", strHasher("value1")) + assert.Equal(t, "acc3971eab8513171cc90ce8b74f368c38f9657d", fmt.Sprintf("%x", db.Hash()), "Hash didn't match") + } + { + db := newSimpleMap() + db.Set("key1", strHasher("value1")) + db.Set("key2", strHasher("value2")) + db.Set("key3", strHasher("value3")) + assert.Equal(t, "0cd117ad14e6cd22edcd9aa0d84d7063b54b862f", fmt.Sprintf("%x", db.Hash()), "Hash didn't match") + } + { + db := newSimpleMap() + db.Set("key2", strHasher("value2")) // NOTE: out of order + db.Set("key1", strHasher("value1")) + db.Set("key3", strHasher("value3")) + assert.Equal(t, "0cd117ad14e6cd22edcd9aa0d84d7063b54b862f", fmt.Sprintf("%x", db.Hash()), "Hash didn't match") + } +} diff --git a/merkle/simple_proof.go b/merkle/simple_proof.go new file mode 100644 index 00000000..f52d1ad9 --- /dev/null +++ b/merkle/simple_proof.go @@ -0,0 +1,152 @@ +package merkle + +import ( + "bytes" + "fmt" +) + +// SimpleProof represents a simple merkle proof. +type SimpleProof struct { + Aunts [][]byte `json:"aunts"` // Hashes from leaf's sibling to a root's child. +} + +// SimpleProofsFromHashers computes inclusion proof for given items. +// proofs[0] is the proof for items[0]. +func SimpleProofsFromHashers(items []Hasher) (rootHash []byte, proofs []*SimpleProof) { + trails, rootSPN := trailsFromHashers(items) + rootHash = rootSPN.Hash + proofs = make([]*SimpleProof, len(items)) + for i, trail := range trails { + proofs[i] = &SimpleProof{ + Aunts: trail.FlattenAunts(), + } + } + return +} + +// SimpleProofsFromMap generates proofs from a map. The keys/values of the map will be used as the keys/values +// in the underlying key-value pairs. +// The keys are sorted before the proofs are computed. +func SimpleProofsFromMap(m map[string]Hasher) (rootHash []byte, proofs []*SimpleProof) { + sm := newSimpleMap() + for k, v := range m { + sm.Set(k, v) + } + sm.Sort() + kvs := sm.kvs + kvsH := make([]Hasher, 0, len(kvs)) + for _, kvp := range kvs { + kvsH = append(kvsH, kvPair(kvp)) + } + return SimpleProofsFromHashers(kvsH) +} + +// Verify that leafHash is a leaf hash of the simple-merkle-tree +// which hashes to rootHash. +func (sp *SimpleProof) Verify(index int, total int, leafHash []byte, rootHash []byte) bool { + computedHash := computeHashFromAunts(index, total, leafHash, sp.Aunts) + return computedHash != nil && bytes.Equal(computedHash, rootHash) +} + +// String implements the stringer interface for SimpleProof. +// It is a wrapper around StringIndented. +func (sp *SimpleProof) String() string { + return sp.StringIndented("") +} + +// StringIndented generates a canonical string representation of a SimpleProof. +func (sp *SimpleProof) StringIndented(indent string) string { + return fmt.Sprintf(`SimpleProof{ +%s Aunts: %X +%s}`, + indent, sp.Aunts, + indent) +} + +// Use the leafHash and innerHashes to get the root merkle hash. +// If the length of the innerHashes slice isn't exactly correct, the result is nil. +// Recursive impl. +func computeHashFromAunts(index int, total int, leafHash []byte, innerHashes [][]byte) []byte { + if index >= total || index < 0 || total <= 0 { + return nil + } + switch total { + case 0: + panic("Cannot call computeHashFromAunts() with 0 total") + case 1: + if len(innerHashes) != 0 { + return nil + } + return leafHash + default: + if len(innerHashes) == 0 { + return nil + } + numLeft := (total + 1) / 2 + if index < numLeft { + leftHash := computeHashFromAunts(index, numLeft, leafHash, innerHashes[:len(innerHashes)-1]) + if leftHash == nil { + return nil + } + return SimpleHashFromTwoHashes(leftHash, innerHashes[len(innerHashes)-1]) + } + rightHash := computeHashFromAunts(index-numLeft, total-numLeft, leafHash, innerHashes[:len(innerHashes)-1]) + if rightHash == nil { + return nil + } + return SimpleHashFromTwoHashes(innerHashes[len(innerHashes)-1], rightHash) + } +} + +// SimpleProofNode is a helper structure to construct merkle proof. +// The node and the tree is thrown away afterwards. +// Exactly one of node.Left and node.Right is nil, unless node is the root, in which case both are nil. +// node.Parent.Hash = hash(node.Hash, node.Right.Hash) or +// hash(node.Left.Hash, node.Hash), depending on whether node is a left/right child. +type SimpleProofNode struct { + Hash []byte + Parent *SimpleProofNode + Left *SimpleProofNode // Left sibling (only one of Left,Right is set) + Right *SimpleProofNode // Right sibling (only one of Left,Right is set) +} + +// FlattenAunts will return the inner hashes for the item corresponding to the leaf, +// starting from a leaf SimpleProofNode. +func (spn *SimpleProofNode) FlattenAunts() [][]byte { + // Nonrecursive impl. + innerHashes := [][]byte{} + for spn != nil { + if spn.Left != nil { + innerHashes = append(innerHashes, spn.Left.Hash) + } else if spn.Right != nil { + innerHashes = append(innerHashes, spn.Right.Hash) + } else { + break + } + spn = spn.Parent + } + return innerHashes +} + +// trails[0].Hash is the leaf hash for items[0]. +// trails[i].Parent.Parent....Parent == root for all i. +func trailsFromHashers(items []Hasher) (trails []*SimpleProofNode, root *SimpleProofNode) { + // Recursive impl. + switch len(items) { + case 0: + return nil, nil + case 1: + trail := &SimpleProofNode{items[0].Hash(), nil, nil, nil} + return []*SimpleProofNode{trail}, trail + default: + lefts, leftRoot := trailsFromHashers(items[:(len(items)+1)/2]) + rights, rightRoot := trailsFromHashers(items[(len(items)+1)/2:]) + rootHash := SimpleHashFromTwoHashes(leftRoot.Hash, rightRoot.Hash) + root := &SimpleProofNode{rootHash, nil, nil, nil} + leftRoot.Parent = root + leftRoot.Right = rightRoot + rightRoot.Parent = root + rightRoot.Left = leftRoot + return append(lefts, rights...), root + } +} diff --git a/merkle/simple_tree.go b/merkle/simple_tree.go new file mode 100644 index 00000000..c23f8426 --- /dev/null +++ b/merkle/simple_tree.go @@ -0,0 +1,58 @@ +package merkle + +import ( + "github.com/tendermint/go-crypto/tmhash" +) + +// SimpleHashFromTwoHashes is the basic operation of the Merkle tree: Hash(left | right). +func SimpleHashFromTwoHashes(left, right []byte) []byte { + var hasher = tmhash.New() + err := encodeByteSlice(hasher, left) + if err != nil { + panic(err) + } + err = encodeByteSlice(hasher, right) + if err != nil { + panic(err) + } + return hasher.Sum(nil) +} + +// SimpleHashFromHashers computes a Merkle tree from items that can be hashed. +func SimpleHashFromHashers(items []Hasher) []byte { + hashes := make([][]byte, len(items)) + for i, item := range items { + hash := item.Hash() + hashes[i] = hash + } + return simpleHashFromHashes(hashes) +} + +// SimpleHashFromMap computes a Merkle tree from sorted map. +// Like calling SimpleHashFromHashers with +// `item = []byte(Hash(key) | Hash(value))`, +// sorted by `item`. +func SimpleHashFromMap(m map[string]Hasher) []byte { + sm := newSimpleMap() + for k, v := range m { + sm.Set(k, v) + } + return sm.Hash() +} + +//---------------------------------------------------------------- + +// Expects hashes! +func simpleHashFromHashes(hashes [][]byte) []byte { + // Recursive impl. + switch len(hashes) { + case 0: + return nil + case 1: + return hashes[0] + default: + left := simpleHashFromHashes(hashes[:(len(hashes)+1)/2]) + right := simpleHashFromHashes(hashes[(len(hashes)+1)/2:]) + return SimpleHashFromTwoHashes(left, right) + } +} diff --git a/merkle/simple_tree_test.go b/merkle/simple_tree_test.go new file mode 100644 index 00000000..db8e3d7f --- /dev/null +++ b/merkle/simple_tree_test.go @@ -0,0 +1,88 @@ +package merkle + +import ( + "bytes" + + cmn "github.com/tendermint/tmlibs/common" + . "github.com/tendermint/tmlibs/test" + + "testing" + "github.com/tendermint/go-crypto/tmhash" +) + +type testItem []byte + +func (tI testItem) Hash() []byte { + return []byte(tI) +} + +func TestSimpleProof(t *testing.T) { + + total := 100 + + items := make([]Hasher, total) + for i := 0; i < total; i++ { + items[i] = testItem(cmn.RandBytes(tmhash.Size)) + } + + rootHash := SimpleHashFromHashers(items) + + rootHash2, proofs := SimpleProofsFromHashers(items) + + if !bytes.Equal(rootHash, rootHash2) { + t.Errorf("Unmatched root hashes: %X vs %X", rootHash, rootHash2) + } + + // For each item, check the trail. + for i, item := range items { + itemHash := item.Hash() + proof := proofs[i] + + // Verify success + ok := proof.Verify(i, total, itemHash, rootHash) + if !ok { + t.Errorf("Verification failed for index %v.", i) + } + + // Wrong item index should make it fail + { + ok = proof.Verify((i+1)%total, total, itemHash, rootHash) + if ok { + t.Errorf("Expected verification to fail for wrong index %v.", i) + } + } + + // Trail too long should make it fail + origAunts := proof.Aunts + proof.Aunts = append(proof.Aunts, cmn.RandBytes(32)) + { + ok = proof.Verify(i, total, itemHash, rootHash) + if ok { + t.Errorf("Expected verification to fail for wrong trail length.") + } + } + proof.Aunts = origAunts + + // Trail too short should make it fail + proof.Aunts = proof.Aunts[0 : len(proof.Aunts)-1] + { + ok = proof.Verify(i, total, itemHash, rootHash) + if ok { + t.Errorf("Expected verification to fail for wrong trail length.") + } + } + proof.Aunts = origAunts + + // Mutating the itemHash should make it fail. + ok = proof.Verify(i, total, MutateByteSlice(itemHash), rootHash) + if ok { + t.Errorf("Expected verification to fail for mutated leaf hash") + } + + // Mutating the rootHash should make it fail. + ok = proof.Verify(i, total, itemHash, MutateByteSlice(rootHash)) + if ok { + t.Errorf("Expected verification to fail for mutated root hash") + } + } +} diff --git a/merkle/types.go b/merkle/types.go new file mode 100644 index 00000000..2fcb3f39 --- /dev/null +++ b/merkle/types.go @@ -0,0 +1,38 @@ +package merkle + +import ( + "io" + + amino "github.com/tendermint/go-amino" +) + +// Tree is a Merkle tree interface. +type Tree interface { + Size() (size int) + Height() (height int8) + Has(key []byte) (has bool) + Proof(key []byte) (value []byte, proof []byte, exists bool) // TODO make it return an index + Get(key []byte) (index int, value []byte, exists bool) + GetByIndex(index int) (key []byte, value []byte) + Set(key []byte, value []byte) (updated bool) + Remove(key []byte) (value []byte, removed bool) + HashWithCount() (hash []byte, count int) + Hash() (hash []byte) + Save() (hash []byte) + Load(hash []byte) + Copy() Tree + Iterate(func(key []byte, value []byte) (stop bool)) (stopped bool) + IterateRange(start []byte, end []byte, ascending bool, fx func(key []byte, value []byte) (stop bool)) (stopped bool) +} + +// Hasher represents a hashable piece of data which can be hashed in the Tree. +type Hasher interface { + Hash() []byte +} + +//----------------------------------------------------------------------- + +// Uvarint length prefixed byteslice +func encodeByteSlice(w io.Writer, bz []byte) (err error) { + return amino.EncodeByteSlice(w, bz) +} diff --git a/tmhash/hash.go b/tmhash/hash.go new file mode 100644 index 00000000..1b29d868 --- /dev/null +++ b/tmhash/hash.go @@ -0,0 +1,48 @@ +package tmhash + +import ( + "crypto/sha256" + "hash" +) + +const ( + Size = 20 + BlockSize = sha256.BlockSize +) + +type sha256trunc struct { + sha256 hash.Hash +} + +func (h sha256trunc) Write(p []byte) (n int, err error) { + return h.sha256.Write(p) +} +func (h sha256trunc) Sum(b []byte) []byte { + shasum := h.sha256.Sum(b) + return shasum[:Size] +} + +func (h sha256trunc) Reset() { + h.sha256.Reset() +} + +func (h sha256trunc) Size() int { + return Size +} + +func (h sha256trunc) BlockSize() int { + return h.sha256.BlockSize() +} + +// New returns a new hash.Hash. +func New() hash.Hash { + return sha256trunc{ + sha256: sha256.New(), + } +} + +// Sum returns the first 20 bytes of SHA256 of the bz. +func Sum(bz []byte) []byte { + hash := sha256.Sum256(bz) + return hash[:Size] +} diff --git a/tmhash/hash_test.go b/tmhash/hash_test.go new file mode 100644 index 00000000..abf0247a --- /dev/null +++ b/tmhash/hash_test.go @@ -0,0 +1,23 @@ +package tmhash_test + +import ( + "crypto/sha256" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/tendermint/go-crypto/tmhash" +) + +func TestHash(t *testing.T) { + testVector := []byte("abc") + hasher := tmhash.New() + hasher.Write(testVector) + bz := hasher.Sum(nil) + + hasher = sha256.New() + hasher.Write(testVector) + bz2 := hasher.Sum(nil) + bz2 = bz2[:20] + + assert.Equal(t, bz, bz2) +}