perf: Make CacheKV store interleaved iterator and insertion not O(n^2) (#10026)

This commit is contained in:
Dev Ojha 2021-09-10 15:42:56 -04:00 committed by GitHub
parent 3d3bc7c432
commit 28bf2c124b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 79 additions and 118 deletions

View File

@ -87,6 +87,7 @@ Ref: https://keepachangelog.com/en/1.0.0/
### Improvements
* (store) [\#10026](https://github.com/cosmos/cosmos-sdk/pull/10026) Improve CacheKVStore datastructures / algorithms, to no longer take O(N^2) time when interleaving iterators and insertions.
* (types) [\#10076](https://github.com/cosmos/cosmos-sdk/pull/10076) Significantly speedup and lower allocations for `Coins.String()`.
* (x/bank) [\#10022](https://github.com/cosmos/cosmos-sdk/pull/10022) `BankKeeper.SendCoins` now takes less execution time.
* (deps) [\#9956](https://github.com/cosmos/cosmos-sdk/pull/9956) Bump Tendermint to [v0.34.12](https://github.com/tendermint/tendermint/releases/tag/v0.34.12).

View File

@ -1,107 +1,50 @@
package cachekv
import (
"errors"
dbm "github.com/tendermint/tm-db"
"github.com/cosmos/cosmos-sdk/types/kv"
"github.com/cosmos/cosmos-sdk/store/types"
)
// Iterates over iterKVCache items.
// if key is nil, means it was deleted.
// Implements Iterator.
type memIterator struct {
start, end []byte
items []*kv.Pair
ascending bool
types.Iterator
deleted map[string]struct{}
}
func newMemIterator(start, end []byte, items *kv.List, ascending bool) *memIterator {
itemsInDomain := make([]*kv.Pair, 0, items.Len())
func newMemIterator(start, end []byte, items *dbm.MemDB, deleted map[string]struct{}, ascending bool) *memIterator {
var iter types.Iterator
var err error
var entered bool
for e := items.Front(); e != nil; e = e.Next() {
item := e.Value
if !dbm.IsKeyInDomain(item.Key, start, end) {
if entered {
break
if ascending {
iter, err = items.Iterator(start, end)
} else {
iter, err = items.ReverseIterator(start, end)
}
continue
if err != nil {
panic(err)
}
itemsInDomain = append(itemsInDomain, item)
entered = true
newDeleted := make(map[string]struct{})
for k, v := range deleted {
newDeleted[k] = v
}
return &memIterator{
start: start,
end: end,
items: itemsInDomain,
ascending: ascending,
Iterator: iter,
deleted: newDeleted,
}
}
func (mi *memIterator) Domain() ([]byte, []byte) {
return mi.start, mi.end
}
func (mi *memIterator) Valid() bool {
return len(mi.items) > 0
}
func (mi *memIterator) assertValid() {
if err := mi.Error(); err != nil {
panic(err)
}
}
func (mi *memIterator) Next() {
mi.assertValid()
if mi.ascending {
mi.items = mi.items[1:]
} else {
mi.items = mi.items[:len(mi.items)-1]
}
}
func (mi *memIterator) Key() []byte {
mi.assertValid()
if mi.ascending {
return mi.items[0].Key
}
return mi.items[len(mi.items)-1].Key
}
func (mi *memIterator) Value() []byte {
mi.assertValid()
if mi.ascending {
return mi.items[0].Value
}
return mi.items[len(mi.items)-1].Value
}
func (mi *memIterator) Close() error {
mi.start = nil
mi.end = nil
mi.items = nil
key := mi.Iterator.Key()
if _, ok := mi.deleted[string(key)]; ok {
return nil
}
// Error returns an error if the memIterator is invalid defined by the Valid
// method.
func (mi *memIterator) Error() error {
if !mi.Valid() {
return errors.New("invalid memIterator")
}
return nil
return mi.Iterator.Value()
}

View File

@ -21,7 +21,6 @@ import (
// key. (No need to delete upon Write())
type cValue struct {
value []byte
deleted bool
dirty bool
}
@ -29,8 +28,9 @@ type cValue struct {
type Store struct {
mtx sync.Mutex
cache map[string]*cValue
deleted map[string]struct{}
unsortedCache map[string]struct{}
sortedCache *kv.List // always ascending sorted
sortedCache *dbm.MemDB // always ascending sorted
parent types.KVStore
}
@ -40,8 +40,9 @@ var _ types.CacheKVStore = (*Store)(nil)
func NewStore(parent types.KVStore) *Store {
return &Store{
cache: make(map[string]*cValue),
deleted: make(map[string]struct{}),
unsortedCache: make(map[string]struct{}),
sortedCache: kv.NewList(),
sortedCache: dbm.NewMemDB(),
parent: parent,
}
}
@ -120,7 +121,7 @@ func (store *Store) Write() {
cacheValue := store.cache[key]
switch {
case cacheValue.deleted:
case store.isDeleted(key):
store.parent.Delete([]byte(key))
case cacheValue.value == nil:
// Skip, it already doesn't exist in parent.
@ -131,8 +132,9 @@ func (store *Store) Write() {
// Clear the cache
store.cache = make(map[string]*cValue)
store.deleted = make(map[string]struct{})
store.unsortedCache = make(map[string]struct{})
store.sortedCache = kv.NewList()
store.sortedCache = dbm.NewMemDB()
}
// CacheWrap implements CacheWrapper.
@ -176,23 +178,40 @@ func (store *Store) iterator(start, end []byte, ascending bool) types.Iterator {
}
store.dirtyItems(start, end)
cache = newMemIterator(start, end, store.sortedCache, ascending)
cache = newMemIterator(start, end, store.sortedCache, store.deleted, ascending)
return newCacheMergeIterator(parent, cache, ascending)
}
// Constructs a slice of dirty items, to use w/ memIterator.
func (store *Store) dirtyItems(start, end []byte) {
unsorted := make([]*kv.Pair, 0)
n := len(store.unsortedCache)
unsorted := make([]*kv.Pair, 0)
// If the unsortedCache is too big, its costs too much to determine
// whats in the subset we are concerned about.
// If you are interleaving iterator calls with writes, this can easily become an
// O(N^2) overhead.
// Even without that, too many range checks eventually becomes more expensive
// than just not having the cache.
if n >= 1024 {
for key := range store.unsortedCache {
cacheValue := store.cache[key]
unsorted = append(unsorted, &kv.Pair{Key: []byte(key), Value: cacheValue.value})
}
} else {
// else do a linear scan to determine if the unsorted pairs are in the pool.
for key := range store.unsortedCache {
if dbm.IsKeyInDomain(conv.UnsafeStrToBytes(key), start, end) {
cacheValue := store.cache[key]
unsorted = append(unsorted, &kv.Pair{Key: []byte(key), Value: cacheValue.value})
}
}
}
store.clearUnsortedCacheSubset(unsorted)
}
func (store *Store) clearUnsortedCacheSubset(unsorted []*kv.Pair) {
n := len(store.unsortedCache)
if len(unsorted) == n { // This pattern allows the Go compiler to emit the map clearing idiom for the entire map.
for key := range store.unsortedCache {
delete(store.unsortedCache, key)
@ -202,32 +221,21 @@ func (store *Store) dirtyItems(start, end []byte) {
delete(store.unsortedCache, conv.UnsafeBytesToStr(kv.Key))
}
}
sort.Slice(unsorted, func(i, j int) bool {
return bytes.Compare(unsorted[i].Key, unsorted[j].Key) < 0
})
for e := store.sortedCache.Front(); e != nil && len(unsorted) != 0; {
uitem := unsorted[0]
sitem := e.Value
comp := bytes.Compare(uitem.Key, sitem.Key)
switch comp {
case -1:
unsorted = unsorted[1:]
store.sortedCache.InsertBefore(uitem, e)
case 1:
e = e.Next()
case 0:
unsorted = unsorted[1:]
e.Value = uitem
e = e.Next()
for _, item := range unsorted {
if item.Value == nil {
// deleted element, tracked by store.deleted
// setting arbitrary value
store.sortedCache.Set(item.Key, []byte{})
continue
}
err := store.sortedCache.Set(item.Key, item.Value)
if err != nil {
panic(err)
}
for _, kvp := range unsorted {
store.sortedCache.PushBack(kvp)
}
}
@ -239,10 +247,19 @@ func (store *Store) setCacheValue(key, value []byte, deleted bool, dirty bool) {
keyStr := conv.UnsafeBytesToStr(key)
store.cache[keyStr] = &cValue{
value: value,
deleted: deleted,
dirty: dirty,
}
if deleted {
store.deleted[keyStr] = struct{}{}
} else {
delete(store.deleted, keyStr)
}
if dirty {
store.unsortedCache[keyStr] = struct{}{}
}
}
func (store *Store) isDeleted(key string) bool {
_, ok := store.deleted[key]
return ok
}