perf: Make CacheKV store interleaved iterator and insertion not O(n^2) (backport #10026) (#10167)

* perf: Make CacheKV store interleaved iterator and insertion not O(n^2) (#10026)

(cherry picked from commit 28bf2c124b)

# Conflicts:
#	CHANGELOG.md
#	store/cachekv/store.go

* fix changelog conflict

* Update store.go

Co-authored-by: Dev Ojha <ValarDragon@users.noreply.github.com>
Co-authored-by: Robert Zaremba <robert@zaremba.ch>
This commit is contained in:
mergify[bot] 2021-09-17 01:22:38 +02:00 committed by GitHub
parent b4d7c1f5cb
commit 22a4f50f7b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 82 additions and 119 deletions

View File

@ -38,7 +38,9 @@ Ref: https://keepachangelog.com/en/1.0.0/
## [Unreleased]
### Improvements
* (types) [\#10021](https://github.com/cosmos/cosmos-sdk/pull/10021) Speedup coins.AmountOf(), by removing many intermittent regex calls.
* (store) [\#10026](https://github.com/cosmos/cosmos-sdk/pull/10026) Improve CacheKVStore datastructures / algorithms, to no longer take O(N^2) time when interleaving iterators and insertions.
### Bug Fixes

View File

@ -1,107 +1,50 @@
package cachekv
import (
"errors"
dbm "github.com/tendermint/tm-db"
"github.com/cosmos/cosmos-sdk/types/kv"
"github.com/cosmos/cosmos-sdk/store/types"
)
// Iterates over iterKVCache items.
// if key is nil, means it was deleted.
// Implements Iterator.
type memIterator struct {
start, end []byte
items []*kv.Pair
ascending bool
types.Iterator
deleted map[string]struct{}
}
func newMemIterator(start, end []byte, items *kv.List, ascending bool) *memIterator {
itemsInDomain := make([]*kv.Pair, 0, items.Len())
func newMemIterator(start, end []byte, items *dbm.MemDB, deleted map[string]struct{}, ascending bool) *memIterator {
var iter types.Iterator
var err error
var entered bool
if ascending {
iter, err = items.Iterator(start, end)
} else {
iter, err = items.ReverseIterator(start, end)
}
for e := items.Front(); e != nil; e = e.Next() {
item := e.Value
if !dbm.IsKeyInDomain(item.Key, start, end) {
if entered {
break
}
if err != nil {
panic(err)
}
continue
}
itemsInDomain = append(itemsInDomain, item)
entered = true
newDeleted := make(map[string]struct{})
for k, v := range deleted {
newDeleted[k] = v
}
return &memIterator{
start: start,
end: end,
items: itemsInDomain,
ascending: ascending,
Iterator: iter,
deleted: newDeleted,
}
}
func (mi *memIterator) Domain() ([]byte, []byte) {
return mi.start, mi.end
}
func (mi *memIterator) Valid() bool {
return len(mi.items) > 0
}
func (mi *memIterator) assertValid() {
if err := mi.Error(); err != nil {
panic(err)
}
}
func (mi *memIterator) Next() {
mi.assertValid()
if mi.ascending {
mi.items = mi.items[1:]
} else {
mi.items = mi.items[:len(mi.items)-1]
}
}
func (mi *memIterator) Key() []byte {
mi.assertValid()
if mi.ascending {
return mi.items[0].Key
}
return mi.items[len(mi.items)-1].Key
}
func (mi *memIterator) Value() []byte {
mi.assertValid()
if mi.ascending {
return mi.items[0].Value
key := mi.Iterator.Key()
if _, ok := mi.deleted[string(key)]; ok {
return nil
}
return mi.items[len(mi.items)-1].Value
}
func (mi *memIterator) Close() error {
mi.start = nil
mi.end = nil
mi.items = nil
return nil
}
// Error returns an error if the memIterator is invalid defined by the Valid
// method.
func (mi *memIterator) Error() error {
if !mi.Valid() {
return errors.New("invalid memIterator")
}
return nil
return mi.Iterator.Value()
}

View File

@ -20,17 +20,17 @@ import (
// If value is nil but deleted is false, it means the parent doesn't have the
// key. (No need to delete upon Write())
type cValue struct {
value []byte
deleted bool
dirty bool
value []byte
dirty bool
}
// Store wraps an in-memory cache around an underlying types.KVStore.
type Store struct {
mtx sync.Mutex
cache map[string]*cValue
deleted map[string]struct{}
unsortedCache map[string]struct{}
sortedCache *kv.List // always ascending sorted
sortedCache *dbm.MemDB // always ascending sorted
parent types.KVStore
}
@ -40,8 +40,9 @@ var _ types.CacheKVStore = (*Store)(nil)
func NewStore(parent types.KVStore) *Store {
return &Store{
cache: make(map[string]*cValue),
deleted: make(map[string]struct{}),
unsortedCache: make(map[string]struct{}),
sortedCache: kv.NewList(),
sortedCache: dbm.NewMemDB(),
parent: parent,
}
}
@ -122,7 +123,7 @@ func (store *Store) Write() {
cacheValue := store.cache[key]
switch {
case cacheValue.deleted:
case store.isDeleted(key):
store.parent.Delete([]byte(key))
case cacheValue.value == nil:
// Skip, it already doesn't exist in parent.
@ -133,8 +134,9 @@ func (store *Store) Write() {
// Clear the cache
store.cache = make(map[string]*cValue)
store.deleted = make(map[string]struct{})
store.unsortedCache = make(map[string]struct{})
store.sortedCache = kv.NewList()
store.sortedCache = dbm.NewMemDB()
}
// CacheWrap implements CacheWrapper.
@ -178,23 +180,40 @@ func (store *Store) iterator(start, end []byte, ascending bool) types.Iterator {
}
store.dirtyItems(start, end)
cache = newMemIterator(start, end, store.sortedCache, ascending)
cache = newMemIterator(start, end, store.sortedCache, store.deleted, ascending)
return newCacheMergeIterator(parent, cache, ascending)
}
// Constructs a slice of dirty items, to use w/ memIterator.
func (store *Store) dirtyItems(start, end []byte) {
unsorted := make([]*kv.Pair, 0)
n := len(store.unsortedCache)
for key := range store.unsortedCache {
if dbm.IsKeyInDomain(conv.UnsafeStrToBytes(key), start, end) {
unsorted := make([]*kv.Pair, 0)
// If the unsortedCache is too big, its costs too much to determine
// whats in the subset we are concerned about.
// If you are interleaving iterator calls with writes, this can easily become an
// O(N^2) overhead.
// Even without that, too many range checks eventually becomes more expensive
// than just not having the cache.
if n >= 1024 {
for key := range store.unsortedCache {
cacheValue := store.cache[key]
unsorted = append(unsorted, &kv.Pair{Key: []byte(key), Value: cacheValue.value})
}
} else {
// else do a linear scan to determine if the unsorted pairs are in the pool.
for key := range store.unsortedCache {
if dbm.IsKeyInDomain(conv.UnsafeStrToBytes(key), start, end) {
cacheValue := store.cache[key]
unsorted = append(unsorted, &kv.Pair{Key: []byte(key), Value: cacheValue.value})
}
}
}
store.clearUnsortedCacheSubset(unsorted)
}
func (store *Store) clearUnsortedCacheSubset(unsorted []*kv.Pair) {
n := len(store.unsortedCache)
if len(unsorted) == n { // This pattern allows the Go compiler to emit the map clearing idiom for the entire map.
for key := range store.unsortedCache {
delete(store.unsortedCache, key)
@ -204,32 +223,21 @@ func (store *Store) dirtyItems(start, end []byte) {
delete(store.unsortedCache, conv.UnsafeBytesToStr(kv.Key))
}
}
sort.Slice(unsorted, func(i, j int) bool {
return bytes.Compare(unsorted[i].Key, unsorted[j].Key) < 0
})
for e := store.sortedCache.Front(); e != nil && len(unsorted) != 0; {
uitem := unsorted[0]
sitem := e.Value
comp := bytes.Compare(uitem.Key, sitem.Key)
switch comp {
case -1:
unsorted = unsorted[1:]
store.sortedCache.InsertBefore(uitem, e)
case 1:
e = e.Next()
case 0:
unsorted = unsorted[1:]
e.Value = uitem
e = e.Next()
for _, item := range unsorted {
if item.Value == nil {
// deleted element, tracked by store.deleted
// setting arbitrary value
store.sortedCache.Set(item.Key, []byte{})
continue
}
err := store.sortedCache.Set(item.Key, item.Value)
if err != nil {
panic(err)
}
}
for _, kvp := range unsorted {
store.sortedCache.PushBack(kvp)
}
}
@ -238,12 +246,22 @@ func (store *Store) dirtyItems(start, end []byte) {
// Only entrypoint to mutate store.cache.
func (store *Store) setCacheValue(key, value []byte, deleted bool, dirty bool) {
store.cache[conv.UnsafeBytesToStr(key)] = &cValue{
value: value,
deleted: deleted,
dirty: dirty,
keyStr := conv.UnsafeBytesToStr(key)
store.cache[keyStr] = &cValue{
value: value,
dirty: dirty,
}
if deleted {
store.deleted[keyStr] = struct{}{}
} else {
delete(store.deleted, keyStr)
}
if dirty {
store.unsortedCache[conv.UnsafeBytesToStr(key)] = struct{}{}
}
}
func (store *Store) isDeleted(key string) bool {
_, ok := store.deleted[key]
return ok
}