perf: Make CacheKV store interleaved iterator and insertion not O(n^2) (backport #10026) (#10167)

* perf: Make CacheKV store interleaved iterator and insertion not O(n^2) (#10026)

(cherry picked from commit 28bf2c124b)

# Conflicts:
#	CHANGELOG.md
#	store/cachekv/store.go

* fix changelog conflict

* Update store.go

Co-authored-by: Dev Ojha <ValarDragon@users.noreply.github.com>
Co-authored-by: Robert Zaremba <robert@zaremba.ch>
This commit is contained in:
mergify[bot] 2021-09-17 01:22:38 +02:00 committed by GitHub
parent b4d7c1f5cb
commit 22a4f50f7b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 82 additions and 119 deletions

View File

@ -38,7 +38,9 @@ Ref: https://keepachangelog.com/en/1.0.0/
## [Unreleased] ## [Unreleased]
### Improvements ### Improvements
* (types) [\#10021](https://github.com/cosmos/cosmos-sdk/pull/10021) Speedup coins.AmountOf(), by removing many intermittent regex calls. * (types) [\#10021](https://github.com/cosmos/cosmos-sdk/pull/10021) Speedup coins.AmountOf(), by removing many intermittent regex calls.
* (store) [\#10026](https://github.com/cosmos/cosmos-sdk/pull/10026) Improve CacheKVStore datastructures / algorithms, to no longer take O(N^2) time when interleaving iterators and insertions.
### Bug Fixes ### Bug Fixes

View File

@ -1,107 +1,50 @@
package cachekv package cachekv
import ( import (
"errors"
dbm "github.com/tendermint/tm-db" dbm "github.com/tendermint/tm-db"
"github.com/cosmos/cosmos-sdk/types/kv" "github.com/cosmos/cosmos-sdk/store/types"
) )
// Iterates over iterKVCache items. // Iterates over iterKVCache items.
// if key is nil, means it was deleted. // if key is nil, means it was deleted.
// Implements Iterator. // Implements Iterator.
type memIterator struct { type memIterator struct {
start, end []byte types.Iterator
items []*kv.Pair
ascending bool deleted map[string]struct{}
} }
func newMemIterator(start, end []byte, items *kv.List, ascending bool) *memIterator { func newMemIterator(start, end []byte, items *dbm.MemDB, deleted map[string]struct{}, ascending bool) *memIterator {
itemsInDomain := make([]*kv.Pair, 0, items.Len()) var iter types.Iterator
var err error
var entered bool if ascending {
iter, err = items.Iterator(start, end)
} else {
iter, err = items.ReverseIterator(start, end)
}
for e := items.Front(); e != nil; e = e.Next() { if err != nil {
item := e.Value panic(err)
if !dbm.IsKeyInDomain(item.Key, start, end) { }
if entered {
break
}
continue newDeleted := make(map[string]struct{})
} for k, v := range deleted {
newDeleted[k] = v
itemsInDomain = append(itemsInDomain, item)
entered = true
} }
return &memIterator{ return &memIterator{
start: start, Iterator: iter,
end: end,
items: itemsInDomain, deleted: newDeleted,
ascending: ascending,
} }
} }
func (mi *memIterator) Domain() ([]byte, []byte) {
return mi.start, mi.end
}
func (mi *memIterator) Valid() bool {
return len(mi.items) > 0
}
func (mi *memIterator) assertValid() {
if err := mi.Error(); err != nil {
panic(err)
}
}
func (mi *memIterator) Next() {
mi.assertValid()
if mi.ascending {
mi.items = mi.items[1:]
} else {
mi.items = mi.items[:len(mi.items)-1]
}
}
func (mi *memIterator) Key() []byte {
mi.assertValid()
if mi.ascending {
return mi.items[0].Key
}
return mi.items[len(mi.items)-1].Key
}
func (mi *memIterator) Value() []byte { func (mi *memIterator) Value() []byte {
mi.assertValid() key := mi.Iterator.Key()
if _, ok := mi.deleted[string(key)]; ok {
if mi.ascending { return nil
return mi.items[0].Value
} }
return mi.Iterator.Value()
return mi.items[len(mi.items)-1].Value
}
func (mi *memIterator) Close() error {
mi.start = nil
mi.end = nil
mi.items = nil
return nil
}
// Error returns an error if the memIterator is invalid defined by the Valid
// method.
func (mi *memIterator) Error() error {
if !mi.Valid() {
return errors.New("invalid memIterator")
}
return nil
} }

View File

@ -20,17 +20,17 @@ import (
// If value is nil but deleted is false, it means the parent doesn't have the // If value is nil but deleted is false, it means the parent doesn't have the
// key. (No need to delete upon Write()) // key. (No need to delete upon Write())
type cValue struct { type cValue struct {
value []byte value []byte
deleted bool dirty bool
dirty bool
} }
// Store wraps an in-memory cache around an underlying types.KVStore. // Store wraps an in-memory cache around an underlying types.KVStore.
type Store struct { type Store struct {
mtx sync.Mutex mtx sync.Mutex
cache map[string]*cValue cache map[string]*cValue
deleted map[string]struct{}
unsortedCache map[string]struct{} unsortedCache map[string]struct{}
sortedCache *kv.List // always ascending sorted sortedCache *dbm.MemDB // always ascending sorted
parent types.KVStore parent types.KVStore
} }
@ -40,8 +40,9 @@ var _ types.CacheKVStore = (*Store)(nil)
func NewStore(parent types.KVStore) *Store { func NewStore(parent types.KVStore) *Store {
return &Store{ return &Store{
cache: make(map[string]*cValue), cache: make(map[string]*cValue),
deleted: make(map[string]struct{}),
unsortedCache: make(map[string]struct{}), unsortedCache: make(map[string]struct{}),
sortedCache: kv.NewList(), sortedCache: dbm.NewMemDB(),
parent: parent, parent: parent,
} }
} }
@ -122,7 +123,7 @@ func (store *Store) Write() {
cacheValue := store.cache[key] cacheValue := store.cache[key]
switch { switch {
case cacheValue.deleted: case store.isDeleted(key):
store.parent.Delete([]byte(key)) store.parent.Delete([]byte(key))
case cacheValue.value == nil: case cacheValue.value == nil:
// Skip, it already doesn't exist in parent. // Skip, it already doesn't exist in parent.
@ -133,8 +134,9 @@ func (store *Store) Write() {
// Clear the cache // Clear the cache
store.cache = make(map[string]*cValue) store.cache = make(map[string]*cValue)
store.deleted = make(map[string]struct{})
store.unsortedCache = make(map[string]struct{}) store.unsortedCache = make(map[string]struct{})
store.sortedCache = kv.NewList() store.sortedCache = dbm.NewMemDB()
} }
// CacheWrap implements CacheWrapper. // CacheWrap implements CacheWrapper.
@ -178,23 +180,40 @@ func (store *Store) iterator(start, end []byte, ascending bool) types.Iterator {
} }
store.dirtyItems(start, end) store.dirtyItems(start, end)
cache = newMemIterator(start, end, store.sortedCache, ascending) cache = newMemIterator(start, end, store.sortedCache, store.deleted, ascending)
return newCacheMergeIterator(parent, cache, ascending) return newCacheMergeIterator(parent, cache, ascending)
} }
// Constructs a slice of dirty items, to use w/ memIterator. // Constructs a slice of dirty items, to use w/ memIterator.
func (store *Store) dirtyItems(start, end []byte) { func (store *Store) dirtyItems(start, end []byte) {
unsorted := make([]*kv.Pair, 0)
n := len(store.unsortedCache) n := len(store.unsortedCache)
for key := range store.unsortedCache { unsorted := make([]*kv.Pair, 0)
if dbm.IsKeyInDomain(conv.UnsafeStrToBytes(key), start, end) { // If the unsortedCache is too big, its costs too much to determine
// whats in the subset we are concerned about.
// If you are interleaving iterator calls with writes, this can easily become an
// O(N^2) overhead.
// Even without that, too many range checks eventually becomes more expensive
// than just not having the cache.
if n >= 1024 {
for key := range store.unsortedCache {
cacheValue := store.cache[key] cacheValue := store.cache[key]
unsorted = append(unsorted, &kv.Pair{Key: []byte(key), Value: cacheValue.value}) unsorted = append(unsorted, &kv.Pair{Key: []byte(key), Value: cacheValue.value})
} }
} else {
// else do a linear scan to determine if the unsorted pairs are in the pool.
for key := range store.unsortedCache {
if dbm.IsKeyInDomain(conv.UnsafeStrToBytes(key), start, end) {
cacheValue := store.cache[key]
unsorted = append(unsorted, &kv.Pair{Key: []byte(key), Value: cacheValue.value})
}
}
} }
store.clearUnsortedCacheSubset(unsorted)
}
func (store *Store) clearUnsortedCacheSubset(unsorted []*kv.Pair) {
n := len(store.unsortedCache)
if len(unsorted) == n { // This pattern allows the Go compiler to emit the map clearing idiom for the entire map. if len(unsorted) == n { // This pattern allows the Go compiler to emit the map clearing idiom for the entire map.
for key := range store.unsortedCache { for key := range store.unsortedCache {
delete(store.unsortedCache, key) delete(store.unsortedCache, key)
@ -204,32 +223,21 @@ func (store *Store) dirtyItems(start, end []byte) {
delete(store.unsortedCache, conv.UnsafeBytesToStr(kv.Key)) delete(store.unsortedCache, conv.UnsafeBytesToStr(kv.Key))
} }
} }
sort.Slice(unsorted, func(i, j int) bool { sort.Slice(unsorted, func(i, j int) bool {
return bytes.Compare(unsorted[i].Key, unsorted[j].Key) < 0 return bytes.Compare(unsorted[i].Key, unsorted[j].Key) < 0
}) })
for e := store.sortedCache.Front(); e != nil && len(unsorted) != 0; { for _, item := range unsorted {
uitem := unsorted[0] if item.Value == nil {
sitem := e.Value // deleted element, tracked by store.deleted
comp := bytes.Compare(uitem.Key, sitem.Key) // setting arbitrary value
store.sortedCache.Set(item.Key, []byte{})
switch comp { continue
case -1: }
unsorted = unsorted[1:] err := store.sortedCache.Set(item.Key, item.Value)
if err != nil {
store.sortedCache.InsertBefore(uitem, e) panic(err)
case 1:
e = e.Next()
case 0:
unsorted = unsorted[1:]
e.Value = uitem
e = e.Next()
} }
}
for _, kvp := range unsorted {
store.sortedCache.PushBack(kvp)
} }
} }
@ -238,12 +246,22 @@ func (store *Store) dirtyItems(start, end []byte) {
// Only entrypoint to mutate store.cache. // Only entrypoint to mutate store.cache.
func (store *Store) setCacheValue(key, value []byte, deleted bool, dirty bool) { func (store *Store) setCacheValue(key, value []byte, deleted bool, dirty bool) {
store.cache[conv.UnsafeBytesToStr(key)] = &cValue{ keyStr := conv.UnsafeBytesToStr(key)
value: value, store.cache[keyStr] = &cValue{
deleted: deleted, value: value,
dirty: dirty, dirty: dirty,
}
if deleted {
store.deleted[keyStr] = struct{}{}
} else {
delete(store.deleted, keyStr)
} }
if dirty { if dirty {
store.unsortedCache[conv.UnsafeBytesToStr(key)] = struct{}{} store.unsortedCache[conv.UnsafeBytesToStr(key)] = struct{}{}
} }
} }
func (store *Store) isDeleted(key string) bool {
_, ok := store.deleted[key]
return ok
}