cosmos-sdk/store/cachekv/store.go

388 lines
9.8 KiB
Go
Raw Normal View History

2019-02-01 17:03:09 -08:00
package cachekv
import (
"bytes"
"io"
"sort"
"sync"
dbm "github.com/tendermint/tm-db"
2019-02-01 17:03:09 -08:00
"github.com/cosmos/cosmos-sdk/internal/conv"
"github.com/cosmos/cosmos-sdk/store/listenkv"
2019-02-01 17:03:09 -08:00
"github.com/cosmos/cosmos-sdk/store/tracekv"
"github.com/cosmos/cosmos-sdk/store/types"
"github.com/cosmos/cosmos-sdk/types/kv"
2019-02-01 17:03:09 -08:00
)
// If value is nil but deleted is false, it means the parent doesn't have the
// key. (No need to delete upon Write())
type cValue struct {
value []byte
dirty bool
2019-02-01 17:03:09 -08:00
}
// Store wraps an in-memory cache around an underlying types.KVStore.
type Store struct {
mtx sync.Mutex
cache map[string]*cValue
deleted map[string]struct{}
unsortedCache map[string]struct{}
sortedCache *dbm.MemDB // always ascending sorted
parent types.KVStore
2019-02-01 17:03:09 -08:00
}
var _ types.CacheKVStore = (*Store)(nil)
// NewStore creates a new Store object
2019-02-01 17:03:09 -08:00
func NewStore(parent types.KVStore) *Store {
return &Store{
cache: make(map[string]*cValue),
deleted: make(map[string]struct{}),
unsortedCache: make(map[string]struct{}),
sortedCache: dbm.NewMemDB(),
parent: parent,
2019-02-01 17:03:09 -08:00
}
}
// GetStoreType implements Store.
2019-02-01 17:03:09 -08:00
func (store *Store) GetStoreType() types.StoreType {
return store.parent.GetStoreType()
}
// Get implements types.KVStore.
2019-02-01 17:03:09 -08:00
func (store *Store) Get(key []byte) (value []byte) {
store.mtx.Lock()
defer store.mtx.Unlock()
2019-02-01 17:03:09 -08:00
types.AssertValidKey(key)
store/cachekv: reduce allocation with []byte -> string in map keys (#9275) Uses internal/conv throughout store/kv which shows performance gains. Benchmark for store/cachekv: name old time/op new time/op delta CacheKVStoreIterator500-8 23.4µs ± 1% 23.3µs ± 1% ~ (p=0.095 n=5+5) CacheKVStoreIterator1000-8 46.7µs ± 1% 46.2µs ± 0% -0.96% (p=0.008 n=5+5) CacheKVStoreIterator10000-8 457µs ± 1% 455µs ± 1% ~ (p=1.000 n=5+5) CacheKVStoreIterator50000-8 2.59ms ± 2% 2.47ms ± 1% -4.64% (p=0.008 n=5+5) CacheKVStoreIterator100000-8 7.33ms ± 3% 6.91ms ± 1% -5.75% (p=0.008 n=5+5) CacheKVStoreGetNoKeyFound-8 423ns ± 1% 391ns ± 2% -7.41% (p=0.008 n=5+5) CacheKVStoreGetKeyFound-8 267ns ± 3% 264ns ± 2% ~ (p=0.595 n=5+5) name old alloc/op new alloc/op delta CacheKVStoreIterator500-8 5.18kB ± 0% 5.18kB ± 0% ~ (all equal) CacheKVStoreIterator1000-8 9.29kB ± 0% 9.29kB ± 0% ~ (p=0.079 n=4+5) CacheKVStoreIterator10000-8 85.2kB ± 0% 84.9kB ± 0% -0.30% (p=0.008 n=5+5) CacheKVStoreIterator50000-8 468kB ± 1% 458kB ± 0% -2.17% (p=0.008 n=5+5) CacheKVStoreIterator100000-8 1.16MB ± 1% 1.10MB ± 0% -5.34% (p=0.008 n=5+5) CacheKVStoreGetNoKeyFound-8 222B ± 1% 214B ± 0% -3.78% (p=0.008 n=5+5) CacheKVStoreGetKeyFound-8 51.0B ± 0% 51.0B ± 0% ~ (all equal) name old allocs/op new allocs/op delta CacheKVStoreIterator500-8 13.0 ± 0% 13.0 ± 0% ~ (all equal) CacheKVStoreIterator1000-8 13.0 ± 0% 13.0 ± 0% ~ (all equal) CacheKVStoreIterator10000-8 51.0 ± 0% 43.0 ± 0% -15.69% (p=0.008 n=5+5) CacheKVStoreIterator50000-8 1.22k ± 4% 0.94k ± 1% -23.04% (p=0.008 n=5+5) CacheKVStoreIterator100000-8 6.48k ± 4% 4.85k ± 1% -25.12% (p=0.008 n=5+5) CacheKVStoreGetNoKeyFound-8 5.00 ± 0% 4.00 ± 0% -20.00% (p=0.008 n=5+5) CacheKVStoreGetKeyFound-8 2.00 ± 0% 2.00 ± 0% ~ (all equal) Benchmark for x/auth/keeper: name old time/op new time/op delta AccountMapperGetAccountFound-8 1.27µs ± 3% 1.26µs ± 1% ~ (p=0.270 n=5+5) AccountMapperSetAccount-8 3.53µs ± 0% 3.44µs ± 1% -2.59% (p=0.008 n=5+5) name old alloc/op new alloc/op delta AccountMapperGetAccountFound-8 440B ± 0% 440B ± 0% ~ (all equal) AccountMapperSetAccount-8 2.13kB ± 0% 2.08kB ± 0% -2.31% (p=0.008 n=5+5) name old allocs/op new allocs/op delta AccountMapperGetAccountFound-8 10.0 ± 0% 10.0 ± 0% ~ (all equal) AccountMapperSetAccount-8 42.0 ± 0% 38.0 ± 0% -9.52% (p=0.008 n=5+5) Fixes #9274
2021-05-06 06:33:01 -07:00
cacheValue, ok := store.cache[conv.UnsafeBytesToStr(key)]
2019-02-01 17:03:09 -08:00
if !ok {
value = store.parent.Get(key)
store.setCacheValue(key, value, false, false)
} else {
value = cacheValue.value
}
return value
}
// Set implements types.KVStore.
2019-02-01 17:03:09 -08:00
func (store *Store) Set(key []byte, value []byte) {
store.mtx.Lock()
defer store.mtx.Unlock()
2019-02-01 17:03:09 -08:00
types.AssertValidKey(key)
types.AssertValidValue(value)
store.setCacheValue(key, value, false, true)
}
// Has implements types.KVStore.
2019-02-01 17:03:09 -08:00
func (store *Store) Has(key []byte) bool {
value := store.Get(key)
return value != nil
}
// Delete implements types.KVStore.
2019-02-01 17:03:09 -08:00
func (store *Store) Delete(key []byte) {
store.mtx.Lock()
defer store.mtx.Unlock()
2019-02-01 17:03:09 -08:00
types.AssertValidKey(key)
store.setCacheValue(key, nil, true, true)
}
// Implements Cachetypes.KVStore.
func (store *Store) Write() {
store.mtx.Lock()
defer store.mtx.Unlock()
// We need a copy of all of the keys.
// Not the best, but probably not a bottleneck depending.
keys := make([]string, 0, len(store.cache))
2019-02-01 17:03:09 -08:00
for key, dbValue := range store.cache {
if dbValue.dirty {
keys = append(keys, key)
}
}
sort.Strings(keys)
// TODO: Consider allowing usage of Batch, which would allow the write to
// at least happen atomically.
for _, key := range keys {
perf: store/cachekv: avoid a map lookup if unnecessary, clear maps fast (#10486) We can shave off some milliseconds, but also cut down some Megabytes of RAM consumed by only requesting from the cache if needed, but also using the map clearing idiom which is recognized by the compiler to make fast code. Noticed in profiles from Tharsis' Ethermint per https://github.com/tharsis/ethermint/issues/710 - Before * Memory profiles ```shell 19.50MB 19.50MB 134: store.cache = make(map[string]*cValue) 18.50MB 18.50MB 135: store.deleted = make(map[string]struct{}) 15.50MB 15.50MB 136: store.unsortedCache = make(map[string]struct{}) ``` * CPU profiles ```go . . 118: // TODO: Consider allowing usage of Batch, which would allow the write to . . 119: // at least happen atomically. 150ms 150ms 120: for _, key := range keys { 220ms 3.64s 121: cacheValue := store.cache[key] . . 122: . . 123: switch { . 250ms 124: case store.isDeleted(key): . . 125: store.parent.Delete([]byte(key)) 210ms 210ms 126: case cacheValue.value == nil: . . 127: // Skip, it already doesn't exist in parent. . . 128: default: 240ms 27.94s 129: store.parent.Set([]byte(key), cacheValue.value) . . 130: } . . 131: } ... 10ms 60ms 134: store.cache = make(map[string]*cValue) . 40ms 135: store.deleted = make(map[string]struct{}) . 50ms 136: store.unsortedCache = make(map[string]struct{}) . 110ms 137: store.sortedCache = dbm.NewMemDB() ``` - After * Memory profiles ```shell . . 130: // Clear the cache using the map clearing idiom . . 131: // and not allocating fresh objects. . . 132: // Please see https://bencher.orijtech.com/perfclinic/mapclearing/ . . 133: for key := range store.cache { . . 134: delete(store.cache, key) . . 135: } . . 136: for key := range store.deleted { . . 137: delete(store.deleted, key) . . 138: } . . 139: for key := range store.unsortedCache { . . 140: delete(store.unsortedCache, key) . . 141: } ``` * CPU profiles ```shell . . 111: // TODO: Consider allowing usage of Batch, which would allow the write to . . 112: // at least happen atomically. 110ms 110ms 113: for _, key := range keys { . 210ms 114: if store.isDeleted(key) { . . 115: // We use []byte(key) instead of conv.UnsafeStrToBytes because we cannot . . 116: // be sure if the underlying store might do a save with the byteslice or . . 117: // not. Once we get confirmation that .Delete is guaranteed not to . . 118: // save the byteslice, then we can assume only a read-only copy is sufficient. . . 119: store.parent.Delete([]byte(key)) . . 120: continue . . 121: } . . 122: 50ms 2.45s 123: cacheValue := store.cache[key] 910ms 920ms 124: if cacheValue.value != nil { . . 125: // It already exists in the parent, hence delete it. 120ms 29.56s 126: store.parent.Set([]byte(key), cacheValue.value) . . 127: } . . 128: } . . 129: . . 130: // Clear the cache using the map clearing idiom . . 131: // and not allocating fresh objects. . . 132: // Please see https://bencher.orijtech.com/perfclinic/mapclearing/ . 210ms 133: for key := range store.cache { . . 134: delete(store.cache, key) . . 135: } . 10ms 136: for key := range store.deleted { . . 137: delete(store.deleted, key) . . 138: } . 170ms 139: for key := range store.unsortedCache { . . 140: delete(store.unsortedCache, key) . . 141: } . 260ms 142: store.sortedCache = dbm.NewMemDB() . 10ms 143:} ``` Fixes #10487 Updates https://github.com/tharsis/ethermint/issues/710
2021-11-08 15:49:13 -08:00
if store.isDeleted(key) {
// We use []byte(key) instead of conv.UnsafeStrToBytes because we cannot
// be sure if the underlying store might do a save with the byteslice or
// not. Once we get confirmation that .Delete is guaranteed not to
// save the byteslice, then we can assume only a read-only copy is sufficient.
2019-02-01 17:03:09 -08:00
store.parent.Delete([]byte(key))
perf: store/cachekv: avoid a map lookup if unnecessary, clear maps fast (#10486) We can shave off some milliseconds, but also cut down some Megabytes of RAM consumed by only requesting from the cache if needed, but also using the map clearing idiom which is recognized by the compiler to make fast code. Noticed in profiles from Tharsis' Ethermint per https://github.com/tharsis/ethermint/issues/710 - Before * Memory profiles ```shell 19.50MB 19.50MB 134: store.cache = make(map[string]*cValue) 18.50MB 18.50MB 135: store.deleted = make(map[string]struct{}) 15.50MB 15.50MB 136: store.unsortedCache = make(map[string]struct{}) ``` * CPU profiles ```go . . 118: // TODO: Consider allowing usage of Batch, which would allow the write to . . 119: // at least happen atomically. 150ms 150ms 120: for _, key := range keys { 220ms 3.64s 121: cacheValue := store.cache[key] . . 122: . . 123: switch { . 250ms 124: case store.isDeleted(key): . . 125: store.parent.Delete([]byte(key)) 210ms 210ms 126: case cacheValue.value == nil: . . 127: // Skip, it already doesn't exist in parent. . . 128: default: 240ms 27.94s 129: store.parent.Set([]byte(key), cacheValue.value) . . 130: } . . 131: } ... 10ms 60ms 134: store.cache = make(map[string]*cValue) . 40ms 135: store.deleted = make(map[string]struct{}) . 50ms 136: store.unsortedCache = make(map[string]struct{}) . 110ms 137: store.sortedCache = dbm.NewMemDB() ``` - After * Memory profiles ```shell . . 130: // Clear the cache using the map clearing idiom . . 131: // and not allocating fresh objects. . . 132: // Please see https://bencher.orijtech.com/perfclinic/mapclearing/ . . 133: for key := range store.cache { . . 134: delete(store.cache, key) . . 135: } . . 136: for key := range store.deleted { . . 137: delete(store.deleted, key) . . 138: } . . 139: for key := range store.unsortedCache { . . 140: delete(store.unsortedCache, key) . . 141: } ``` * CPU profiles ```shell . . 111: // TODO: Consider allowing usage of Batch, which would allow the write to . . 112: // at least happen atomically. 110ms 110ms 113: for _, key := range keys { . 210ms 114: if store.isDeleted(key) { . . 115: // We use []byte(key) instead of conv.UnsafeStrToBytes because we cannot . . 116: // be sure if the underlying store might do a save with the byteslice or . . 117: // not. Once we get confirmation that .Delete is guaranteed not to . . 118: // save the byteslice, then we can assume only a read-only copy is sufficient. . . 119: store.parent.Delete([]byte(key)) . . 120: continue . . 121: } . . 122: 50ms 2.45s 123: cacheValue := store.cache[key] 910ms 920ms 124: if cacheValue.value != nil { . . 125: // It already exists in the parent, hence delete it. 120ms 29.56s 126: store.parent.Set([]byte(key), cacheValue.value) . . 127: } . . 128: } . . 129: . . 130: // Clear the cache using the map clearing idiom . . 131: // and not allocating fresh objects. . . 132: // Please see https://bencher.orijtech.com/perfclinic/mapclearing/ . 210ms 133: for key := range store.cache { . . 134: delete(store.cache, key) . . 135: } . 10ms 136: for key := range store.deleted { . . 137: delete(store.deleted, key) . . 138: } . 170ms 139: for key := range store.unsortedCache { . . 140: delete(store.unsortedCache, key) . . 141: } . 260ms 142: store.sortedCache = dbm.NewMemDB() . 10ms 143:} ``` Fixes #10487 Updates https://github.com/tharsis/ethermint/issues/710
2021-11-08 15:49:13 -08:00
continue
}
cacheValue := store.cache[key]
if cacheValue.value != nil {
// It already exists in the parent, hence delete it.
2019-02-01 17:03:09 -08:00
store.parent.Set([]byte(key), cacheValue.value)
}
}
perf: store/cachekv: avoid a map lookup if unnecessary, clear maps fast (#10486) We can shave off some milliseconds, but also cut down some Megabytes of RAM consumed by only requesting from the cache if needed, but also using the map clearing idiom which is recognized by the compiler to make fast code. Noticed in profiles from Tharsis' Ethermint per https://github.com/tharsis/ethermint/issues/710 - Before * Memory profiles ```shell 19.50MB 19.50MB 134: store.cache = make(map[string]*cValue) 18.50MB 18.50MB 135: store.deleted = make(map[string]struct{}) 15.50MB 15.50MB 136: store.unsortedCache = make(map[string]struct{}) ``` * CPU profiles ```go . . 118: // TODO: Consider allowing usage of Batch, which would allow the write to . . 119: // at least happen atomically. 150ms 150ms 120: for _, key := range keys { 220ms 3.64s 121: cacheValue := store.cache[key] . . 122: . . 123: switch { . 250ms 124: case store.isDeleted(key): . . 125: store.parent.Delete([]byte(key)) 210ms 210ms 126: case cacheValue.value == nil: . . 127: // Skip, it already doesn't exist in parent. . . 128: default: 240ms 27.94s 129: store.parent.Set([]byte(key), cacheValue.value) . . 130: } . . 131: } ... 10ms 60ms 134: store.cache = make(map[string]*cValue) . 40ms 135: store.deleted = make(map[string]struct{}) . 50ms 136: store.unsortedCache = make(map[string]struct{}) . 110ms 137: store.sortedCache = dbm.NewMemDB() ``` - After * Memory profiles ```shell . . 130: // Clear the cache using the map clearing idiom . . 131: // and not allocating fresh objects. . . 132: // Please see https://bencher.orijtech.com/perfclinic/mapclearing/ . . 133: for key := range store.cache { . . 134: delete(store.cache, key) . . 135: } . . 136: for key := range store.deleted { . . 137: delete(store.deleted, key) . . 138: } . . 139: for key := range store.unsortedCache { . . 140: delete(store.unsortedCache, key) . . 141: } ``` * CPU profiles ```shell . . 111: // TODO: Consider allowing usage of Batch, which would allow the write to . . 112: // at least happen atomically. 110ms 110ms 113: for _, key := range keys { . 210ms 114: if store.isDeleted(key) { . . 115: // We use []byte(key) instead of conv.UnsafeStrToBytes because we cannot . . 116: // be sure if the underlying store might do a save with the byteslice or . . 117: // not. Once we get confirmation that .Delete is guaranteed not to . . 118: // save the byteslice, then we can assume only a read-only copy is sufficient. . . 119: store.parent.Delete([]byte(key)) . . 120: continue . . 121: } . . 122: 50ms 2.45s 123: cacheValue := store.cache[key] 910ms 920ms 124: if cacheValue.value != nil { . . 125: // It already exists in the parent, hence delete it. 120ms 29.56s 126: store.parent.Set([]byte(key), cacheValue.value) . . 127: } . . 128: } . . 129: . . 130: // Clear the cache using the map clearing idiom . . 131: // and not allocating fresh objects. . . 132: // Please see https://bencher.orijtech.com/perfclinic/mapclearing/ . 210ms 133: for key := range store.cache { . . 134: delete(store.cache, key) . . 135: } . 10ms 136: for key := range store.deleted { . . 137: delete(store.deleted, key) . . 138: } . 170ms 139: for key := range store.unsortedCache { . . 140: delete(store.unsortedCache, key) . . 141: } . 260ms 142: store.sortedCache = dbm.NewMemDB() . 10ms 143:} ``` Fixes #10487 Updates https://github.com/tharsis/ethermint/issues/710
2021-11-08 15:49:13 -08:00
// Clear the cache using the map clearing idiom
// and not allocating fresh objects.
// Please see https://bencher.orijtech.com/perfclinic/mapclearing/
for key := range store.cache {
delete(store.cache, key)
}
for key := range store.deleted {
delete(store.deleted, key)
}
for key := range store.unsortedCache {
delete(store.unsortedCache, key)
}
store.sortedCache = dbm.NewMemDB()
2019-02-01 17:03:09 -08:00
}
// CacheWrap implements CacheWrapper.
2019-02-01 17:03:09 -08:00
func (store *Store) CacheWrap() types.CacheWrap {
return NewStore(store)
}
// CacheWrapWithTrace implements the CacheWrapper interface.
func (store *Store) CacheWrapWithTrace(w io.Writer, tc types.TraceContext) types.CacheWrap {
return NewStore(tracekv.NewStore(store, w, tc))
}
// CacheWrapWithListeners implements the CacheWrapper interface.
func (store *Store) CacheWrapWithListeners(storeKey types.StoreKey, listeners []types.WriteListener) types.CacheWrap {
return NewStore(listenkv.NewStore(store, storeKey, listeners))
}
2019-02-01 17:03:09 -08:00
//----------------------------------------
// Iteration
// Iterator implements types.KVStore.
2019-02-01 17:03:09 -08:00
func (store *Store) Iterator(start, end []byte) types.Iterator {
return store.iterator(start, end, true)
}
// ReverseIterator implements types.KVStore.
2019-02-01 17:03:09 -08:00
func (store *Store) ReverseIterator(start, end []byte) types.Iterator {
return store.iterator(start, end, false)
}
func (store *Store) iterator(start, end []byte, ascending bool) types.Iterator {
store.mtx.Lock()
defer store.mtx.Unlock()
2019-02-01 17:03:09 -08:00
var parent, cache types.Iterator
if ascending {
parent = store.parent.Iterator(start, end)
} else {
parent = store.parent.ReverseIterator(start, end)
}
store.dirtyItems(start, end)
cache = newMemIterator(start, end, store.sortedCache, store.deleted, ascending)
2019-02-01 17:03:09 -08:00
return newCacheMergeIterator(parent, cache, ascending)
}
fix!: store/cachekv: reduce growth factor for iterator ranging using binary searches (#10024) This change takes the observation that previous dbm.IsKeyInDomain which searches for [start, end) was performing too many byteslice comparisons. Instead we start off by sorting all the values in the store.unsortedCache, and then apply a modified binary search to look for values that fall within the domain [start, end) The procedure involves: * iterating over all items to build a list of all keys -- O(n) * invoking sort.Strings immediately, of which we anyways eventually invoke sort.Slice(unsorted, ...) which uses Quicksort -- O(nlog(n)) or O(n^2) worst case * invoking modified binary search which is O(log(n)) * 2 ~ O(log(n)) to search for the [start, end) range indices for a total approximate complexity of: Best case: O(n) + O(n(log(n))) + O(log(n)) ~= O(nlog(n)) Worst case: O(n) + O(n^2) + O(log(n)) ~= O(n^2) instead of previously: * iterating over all the unsorted items and invoking dbm.IsKeyInDomain: bytes.Compare ~ O(n) + O(n*s*e) where s -- len(start), e -- len(end) for overall complexity of O(n*s*e) * invoking sort.Slice(unsorted, ...) which uses Quicksort -- O(nlog(n)) or O(n^2) worst case for a total approximate complexity of: Best case: O(n) + O(n*s*e) + O(nlog(n)) ~= O(n*s*e) ~ O(n^2) Worst case: O(n) + O(n*s*e) + O(n^2) ~= O(n*s*e) ~ O(n^2) Ordinarily we'd combine the n*s*e to be n*m, but really the comparisons between (start & key, end & key) are profound that it makes sense to keep them as factors. The overall benchmark results vindicate our choice of isolating the factors (n*s*e) The benchmarks show that as the number of keys to iterate grows, the new code grows gracefully in a somewhat linear growth, notice for CAcheKVStoreIterator*, when we go from: * 1,000 to 10,000 keys: 120us->1,600us (13X) old vs 95us->900us (9.47X) new * 50,000 to 100,000 keys: 19ms->100ms (5.3X) old vs 5.5ms->17ms (3X) new ```shell time/op GetValidator-8 5.8ms ± 2% 4.7ms ± 1% -17.69% (p=0.000 n=10+10) OneBankSendTxPerBlock-8 3.2ms ± 2% 2.8ms ± 1% -10.80% (p=0.000 n=7+10) OneBankMultiSendTxPerBlock-8 3.1ms ± 3% 2.9ms ± 2% -8.36% (p=0.000 n=10+10) AccountMapperSetAccount-8 8.6µs ± 1% 7.8µs ± 1% -9.74% (p=0.000 n=10+10) CacheKVStoreIterator500-8 64µs ± 6% 51µs ± 6% -19.22% (p=0.000 n=10+9) CacheKVStoreIterator1000-8 0.12ms ± 4% 95µs ± 4% -19.55% (p=0.000 n=10+10) CacheKVStoreIterator10000-8 1.6ms ± 4% 0.90ms ± 1% -42.11% (p=0.000 n=10+10) CacheKVStoreIterator50000-8 19ms ± 5% 5.5ms ± 1% -71.35% (p=0.000 n=10+10) CacheKVStoreIterator100000-8 0.10s ± 23% 17ms ± 7% -83.44% (p=0.000 n=10+10) CacheKVStoreGetNoKeyFound-8 1.3µs ± 6% 0.90µs ± 3% -31.19% (p=0.000 n=9+9) CacheKVStoreGetKeyFound-8 0.66µs ± 6% 0.56µs ± 2% -14.81% (p=0.000 n=10+9) alloc/op B/op BlockProvision-8 0.11kB ± 0% 0.10kB ± 0% -7.14% (p=0.000 n=10+10) CacheKVStoreIterator50000-8 0.89MB ± 6% 0.53MB ± 1% -40.85% (p=0.000 n=10+10) CacheKVStoreIterator100000-8 6.3MB ± 23% 1.6MB ± 6% -74.17% (p=0.000 n=10+10) CacheKVStoreGetNoKeyFound-8 0.26kB ± 0% 0.23kB ± 1% -11.53% (p=0.000 n=10+8) allocs/op (count) AccountMapperSetAccount-8 42 ± 0% 38 ± 0% -9.52% (p=0.000 n=10+10) BlockProvision-8 6.0 ± 0% 5.0 ± 0% -16.67% (p=0.000 n=10+10) CacheKVStoreIterator1000-8 14 ± 0% 13 ± 0% -7.14% (p=0.002 n=8+10) CacheKVStoreIterator10000-8 0.15k ± 2% 76 ± 1% -49.00% (p=0.000 n=7+10) CacheKVStoreIterator50000-8 8.9k ± 11% 2.0k ± 2% -77.60% (p=0.000 n=10+10) CacheKVStoreIterator100000-8 0.10M ± 26% 13k ± 12% -86.89% (p=0.000 n=10+10) CacheKVStoreGetNoKeyFound-8 5.0 ± 0% 4.0 ± 0% -20.00% (p=0.000 n=10+10) ``` Note: Purposefully using a commit off master that doesn't include the buggy code that caused x/bank.BenchmarkOneBank* to fail per issue https://github.com/cosmos/cosmos-sdk/issues/10023 Updates #9876 /cc @cuonglm @kirbyquerby <!-- The default pull request template is for types feat, fix, or refactor. For other templates, add one of the following parameters to the url: - template=docs.md - template=other.md --> ## Description Closes: #XXXX <!-- Add a description of the changes that this PR introduces and the files that are the most critical to review. --> --- ### Author Checklist *All items are required. Please add a note to the item if the item is not applicable and please add links to any relevant follow up issues.* I have... - [x] included the correct [type prefix](https://github.com/commitizen/conventional-commit-types/blob/v3.0.0/index.json) in the PR title - [x] added `!` to the type prefix if API or client breaking change - [x] targeted the correct branch (see [PR Targeting](https://github.com/cosmos/cosmos-sdk/blob/master/CONTRIBUTING.md#pr-targeting)) - [x] provided a link to the relevant issue or specification - [x] followed the guidelines for [building modules](https://github.com/cosmos/cosmos-sdk/blob/master/docs/building-modules) - [x] included the necessary unit and integration [tests](https://github.com/cosmos/cosmos-sdk/blob/master/CONTRIBUTING.md#testing) - [ ] added a changelog entry to `CHANGELOG.md` - [x] included comments for [documenting Go code](https://blog.golang.org/godoc) - [ ] updated the relevant documentation or specification - [ ] reviewed "Files changed" and left comments if necessary - [ ] confirmed all CI checks have passed ### Reviewers Checklist *All items are required. Please add a note if the item is not applicable and please add your handle next to the items reviewed if you only reviewed selected items.* I have... - [ ] confirmed the correct [type prefix](https://github.com/commitizen/conventional-commit-types/blob/v3.0.0/index.json) in the PR title - [ ] confirmed `!` in the type prefix if API or client breaking change - [ ] confirmed all author checklist items have been addressed - [ ] reviewed state machine logic - [ ] reviewed API design and naming - [ ] reviewed documentation is accurate - [ ] reviewed tests and test coverage - [ ] manually tested (if applicable)
2021-10-14 14:58:25 -07:00
func findStartIndex(strL []string, startQ string) int {
// Modified binary search to find the very first element in >=startQ.
if len(strL) == 0 {
return -1
}
var left, right, mid int
right = len(strL) - 1
for left <= right {
mid = (left + right) >> 1
midStr := strL[mid]
if midStr == startQ {
// Handle condition where there might be multiple values equal to startQ.
// We are looking for the very first value < midStL, that i+1 will be the first
// element >= midStr.
for i := mid - 1; i >= 0; i-- {
if strL[i] != midStr {
return i + 1
}
}
return 0
}
if midStr < startQ {
left = mid + 1
} else { // midStrL > startQ
right = mid - 1
}
}
if left >= 0 && left < len(strL) && strL[left] >= startQ {
return left
}
return -1
}
func findEndIndex(strL []string, endQ string) int {
if len(strL) == 0 {
return -1
}
// Modified binary search to find the very first element <endQ.
var left, right, mid int
right = len(strL) - 1
for left <= right {
mid = (left + right) >> 1
midStr := strL[mid]
if midStr == endQ {
// Handle condition where there might be multiple values equal to startQ.
// We are looking for the very first value < midStL, that i+1 will be the first
// element >= midStr.
for i := mid - 1; i >= 0; i-- {
if strL[i] < midStr {
return i + 1
}
}
return 0
}
if midStr < endQ {
left = mid + 1
} else { // midStrL > startQ
right = mid - 1
}
}
// Binary search failed, now let's find a value less than endQ.
for i := right; i >= 0; i-- {
if strL[i] < endQ {
return i
}
}
return -1
}
type sortState int
const (
stateUnsorted sortState = iota
stateAlreadySorted
)
2019-02-01 17:03:09 -08:00
// Constructs a slice of dirty items, to use w/ memIterator.
func (store *Store) dirtyItems(start, end []byte) {
fix!: store/cachekv: reduce growth factor for iterator ranging using binary searches (#10024) This change takes the observation that previous dbm.IsKeyInDomain which searches for [start, end) was performing too many byteslice comparisons. Instead we start off by sorting all the values in the store.unsortedCache, and then apply a modified binary search to look for values that fall within the domain [start, end) The procedure involves: * iterating over all items to build a list of all keys -- O(n) * invoking sort.Strings immediately, of which we anyways eventually invoke sort.Slice(unsorted, ...) which uses Quicksort -- O(nlog(n)) or O(n^2) worst case * invoking modified binary search which is O(log(n)) * 2 ~ O(log(n)) to search for the [start, end) range indices for a total approximate complexity of: Best case: O(n) + O(n(log(n))) + O(log(n)) ~= O(nlog(n)) Worst case: O(n) + O(n^2) + O(log(n)) ~= O(n^2) instead of previously: * iterating over all the unsorted items and invoking dbm.IsKeyInDomain: bytes.Compare ~ O(n) + O(n*s*e) where s -- len(start), e -- len(end) for overall complexity of O(n*s*e) * invoking sort.Slice(unsorted, ...) which uses Quicksort -- O(nlog(n)) or O(n^2) worst case for a total approximate complexity of: Best case: O(n) + O(n*s*e) + O(nlog(n)) ~= O(n*s*e) ~ O(n^2) Worst case: O(n) + O(n*s*e) + O(n^2) ~= O(n*s*e) ~ O(n^2) Ordinarily we'd combine the n*s*e to be n*m, but really the comparisons between (start & key, end & key) are profound that it makes sense to keep them as factors. The overall benchmark results vindicate our choice of isolating the factors (n*s*e) The benchmarks show that as the number of keys to iterate grows, the new code grows gracefully in a somewhat linear growth, notice for CAcheKVStoreIterator*, when we go from: * 1,000 to 10,000 keys: 120us->1,600us (13X) old vs 95us->900us (9.47X) new * 50,000 to 100,000 keys: 19ms->100ms (5.3X) old vs 5.5ms->17ms (3X) new ```shell time/op GetValidator-8 5.8ms ± 2% 4.7ms ± 1% -17.69% (p=0.000 n=10+10) OneBankSendTxPerBlock-8 3.2ms ± 2% 2.8ms ± 1% -10.80% (p=0.000 n=7+10) OneBankMultiSendTxPerBlock-8 3.1ms ± 3% 2.9ms ± 2% -8.36% (p=0.000 n=10+10) AccountMapperSetAccount-8 8.6µs ± 1% 7.8µs ± 1% -9.74% (p=0.000 n=10+10) CacheKVStoreIterator500-8 64µs ± 6% 51µs ± 6% -19.22% (p=0.000 n=10+9) CacheKVStoreIterator1000-8 0.12ms ± 4% 95µs ± 4% -19.55% (p=0.000 n=10+10) CacheKVStoreIterator10000-8 1.6ms ± 4% 0.90ms ± 1% -42.11% (p=0.000 n=10+10) CacheKVStoreIterator50000-8 19ms ± 5% 5.5ms ± 1% -71.35% (p=0.000 n=10+10) CacheKVStoreIterator100000-8 0.10s ± 23% 17ms ± 7% -83.44% (p=0.000 n=10+10) CacheKVStoreGetNoKeyFound-8 1.3µs ± 6% 0.90µs ± 3% -31.19% (p=0.000 n=9+9) CacheKVStoreGetKeyFound-8 0.66µs ± 6% 0.56µs ± 2% -14.81% (p=0.000 n=10+9) alloc/op B/op BlockProvision-8 0.11kB ± 0% 0.10kB ± 0% -7.14% (p=0.000 n=10+10) CacheKVStoreIterator50000-8 0.89MB ± 6% 0.53MB ± 1% -40.85% (p=0.000 n=10+10) CacheKVStoreIterator100000-8 6.3MB ± 23% 1.6MB ± 6% -74.17% (p=0.000 n=10+10) CacheKVStoreGetNoKeyFound-8 0.26kB ± 0% 0.23kB ± 1% -11.53% (p=0.000 n=10+8) allocs/op (count) AccountMapperSetAccount-8 42 ± 0% 38 ± 0% -9.52% (p=0.000 n=10+10) BlockProvision-8 6.0 ± 0% 5.0 ± 0% -16.67% (p=0.000 n=10+10) CacheKVStoreIterator1000-8 14 ± 0% 13 ± 0% -7.14% (p=0.002 n=8+10) CacheKVStoreIterator10000-8 0.15k ± 2% 76 ± 1% -49.00% (p=0.000 n=7+10) CacheKVStoreIterator50000-8 8.9k ± 11% 2.0k ± 2% -77.60% (p=0.000 n=10+10) CacheKVStoreIterator100000-8 0.10M ± 26% 13k ± 12% -86.89% (p=0.000 n=10+10) CacheKVStoreGetNoKeyFound-8 5.0 ± 0% 4.0 ± 0% -20.00% (p=0.000 n=10+10) ``` Note: Purposefully using a commit off master that doesn't include the buggy code that caused x/bank.BenchmarkOneBank* to fail per issue https://github.com/cosmos/cosmos-sdk/issues/10023 Updates #9876 /cc @cuonglm @kirbyquerby <!-- The default pull request template is for types feat, fix, or refactor. For other templates, add one of the following parameters to the url: - template=docs.md - template=other.md --> ## Description Closes: #XXXX <!-- Add a description of the changes that this PR introduces and the files that are the most critical to review. --> --- ### Author Checklist *All items are required. Please add a note to the item if the item is not applicable and please add links to any relevant follow up issues.* I have... - [x] included the correct [type prefix](https://github.com/commitizen/conventional-commit-types/blob/v3.0.0/index.json) in the PR title - [x] added `!` to the type prefix if API or client breaking change - [x] targeted the correct branch (see [PR Targeting](https://github.com/cosmos/cosmos-sdk/blob/master/CONTRIBUTING.md#pr-targeting)) - [x] provided a link to the relevant issue or specification - [x] followed the guidelines for [building modules](https://github.com/cosmos/cosmos-sdk/blob/master/docs/building-modules) - [x] included the necessary unit and integration [tests](https://github.com/cosmos/cosmos-sdk/blob/master/CONTRIBUTING.md#testing) - [ ] added a changelog entry to `CHANGELOG.md` - [x] included comments for [documenting Go code](https://blog.golang.org/godoc) - [ ] updated the relevant documentation or specification - [ ] reviewed "Files changed" and left comments if necessary - [ ] confirmed all CI checks have passed ### Reviewers Checklist *All items are required. Please add a note if the item is not applicable and please add your handle next to the items reviewed if you only reviewed selected items.* I have... - [ ] confirmed the correct [type prefix](https://github.com/commitizen/conventional-commit-types/blob/v3.0.0/index.json) in the PR title - [ ] confirmed `!` in the type prefix if API or client breaking change - [ ] confirmed all author checklist items have been addressed - [ ] reviewed state machine logic - [ ] reviewed API design and naming - [ ] reviewed documentation is accurate - [ ] reviewed tests and test coverage - [ ] manually tested (if applicable)
2021-10-14 14:58:25 -07:00
startStr, endStr := conv.UnsafeBytesToStr(start), conv.UnsafeBytesToStr(end)
if startStr > endStr {
// Nothing to do here.
return
}
store/cachekv, x/bank/types: algorithmically fix pathologically slow code (#8719) After continuously profiling InitGensis with 100K accounts, it showed pathologically slow code, that was the result of a couple of patterns: * Unconditional and not always necessary map lookups * O(n^2) sdk.AccAddressFromBech32 retrievals when the code is expensive, during a quicksort The remedy involved 4 parts: * O(n) sdk.AccAddressFromBech32 invocations, down from O(n^2) in the quicksort * Only doing map lookups when the domain key check has passed * Using a black magic compiler technique of the map clearing idiom * Zero allocation []byte<->string conversion With 100K accounts, this brings InitGenesis down to ~6min, instead of 20+min, it reduces the sort code from ~7sec down to 50ms. Also some simple benchmark reflect the change: ```shell name old time/op new time/op delta SanitizeBalances500-8 19.3ms ±10% 1.5ms ± 5% -92.46% (p=0.000 n=20+20) SanitizeBalances1000-8 41.9ms ± 8% 3.0ms ±12% -92.92% (p=0.000 n=20+20) name old alloc/op new alloc/op delta SanitizeBalances500-8 9.05MB ± 6% 0.56MB ± 0% -93.76% (p=0.000 n=20+18) SanitizeBalances1000-8 20.2MB ± 3% 1.1MB ± 0% -94.37% (p=0.000 n=20+19) name old allocs/op new allocs/op delta SanitizeBalances500-8 72.4k ± 6% 4.5k ± 0% -93.76% (p=0.000 n=20+20) SanitizeBalances1000-8 162k ± 3% 9k ± 0% -94.40% (p=0.000 n=20+20) ``` The CPU profiles show the radical change as per https://github.com/cosmos/cosmos-sdk/issues/7766#issuecomment-786671734 Later on, we shall do more profiling and fixes but for now this brings down the run-time for InitGenesis. Fixes #7766 Co-authored-by: Alessio Treglia <alessio@tendermint.com>
2021-02-27 07:26:22 -08:00
n := len(store.unsortedCache)
unsorted := make([]*kv.Pair, 0)
// If the unsortedCache is too big, its costs too much to determine
// whats in the subset we are concerned about.
// If you are interleaving iterator calls with writes, this can easily become an
// O(N^2) overhead.
// Even without that, too many range checks eventually becomes more expensive
// than just not having the cache.
fix!: store/cachekv: reduce growth factor for iterator ranging using binary searches (#10024) This change takes the observation that previous dbm.IsKeyInDomain which searches for [start, end) was performing too many byteslice comparisons. Instead we start off by sorting all the values in the store.unsortedCache, and then apply a modified binary search to look for values that fall within the domain [start, end) The procedure involves: * iterating over all items to build a list of all keys -- O(n) * invoking sort.Strings immediately, of which we anyways eventually invoke sort.Slice(unsorted, ...) which uses Quicksort -- O(nlog(n)) or O(n^2) worst case * invoking modified binary search which is O(log(n)) * 2 ~ O(log(n)) to search for the [start, end) range indices for a total approximate complexity of: Best case: O(n) + O(n(log(n))) + O(log(n)) ~= O(nlog(n)) Worst case: O(n) + O(n^2) + O(log(n)) ~= O(n^2) instead of previously: * iterating over all the unsorted items and invoking dbm.IsKeyInDomain: bytes.Compare ~ O(n) + O(n*s*e) where s -- len(start), e -- len(end) for overall complexity of O(n*s*e) * invoking sort.Slice(unsorted, ...) which uses Quicksort -- O(nlog(n)) or O(n^2) worst case for a total approximate complexity of: Best case: O(n) + O(n*s*e) + O(nlog(n)) ~= O(n*s*e) ~ O(n^2) Worst case: O(n) + O(n*s*e) + O(n^2) ~= O(n*s*e) ~ O(n^2) Ordinarily we'd combine the n*s*e to be n*m, but really the comparisons between (start & key, end & key) are profound that it makes sense to keep them as factors. The overall benchmark results vindicate our choice of isolating the factors (n*s*e) The benchmarks show that as the number of keys to iterate grows, the new code grows gracefully in a somewhat linear growth, notice for CAcheKVStoreIterator*, when we go from: * 1,000 to 10,000 keys: 120us->1,600us (13X) old vs 95us->900us (9.47X) new * 50,000 to 100,000 keys: 19ms->100ms (5.3X) old vs 5.5ms->17ms (3X) new ```shell time/op GetValidator-8 5.8ms ± 2% 4.7ms ± 1% -17.69% (p=0.000 n=10+10) OneBankSendTxPerBlock-8 3.2ms ± 2% 2.8ms ± 1% -10.80% (p=0.000 n=7+10) OneBankMultiSendTxPerBlock-8 3.1ms ± 3% 2.9ms ± 2% -8.36% (p=0.000 n=10+10) AccountMapperSetAccount-8 8.6µs ± 1% 7.8µs ± 1% -9.74% (p=0.000 n=10+10) CacheKVStoreIterator500-8 64µs ± 6% 51µs ± 6% -19.22% (p=0.000 n=10+9) CacheKVStoreIterator1000-8 0.12ms ± 4% 95µs ± 4% -19.55% (p=0.000 n=10+10) CacheKVStoreIterator10000-8 1.6ms ± 4% 0.90ms ± 1% -42.11% (p=0.000 n=10+10) CacheKVStoreIterator50000-8 19ms ± 5% 5.5ms ± 1% -71.35% (p=0.000 n=10+10) CacheKVStoreIterator100000-8 0.10s ± 23% 17ms ± 7% -83.44% (p=0.000 n=10+10) CacheKVStoreGetNoKeyFound-8 1.3µs ± 6% 0.90µs ± 3% -31.19% (p=0.000 n=9+9) CacheKVStoreGetKeyFound-8 0.66µs ± 6% 0.56µs ± 2% -14.81% (p=0.000 n=10+9) alloc/op B/op BlockProvision-8 0.11kB ± 0% 0.10kB ± 0% -7.14% (p=0.000 n=10+10) CacheKVStoreIterator50000-8 0.89MB ± 6% 0.53MB ± 1% -40.85% (p=0.000 n=10+10) CacheKVStoreIterator100000-8 6.3MB ± 23% 1.6MB ± 6% -74.17% (p=0.000 n=10+10) CacheKVStoreGetNoKeyFound-8 0.26kB ± 0% 0.23kB ± 1% -11.53% (p=0.000 n=10+8) allocs/op (count) AccountMapperSetAccount-8 42 ± 0% 38 ± 0% -9.52% (p=0.000 n=10+10) BlockProvision-8 6.0 ± 0% 5.0 ± 0% -16.67% (p=0.000 n=10+10) CacheKVStoreIterator1000-8 14 ± 0% 13 ± 0% -7.14% (p=0.002 n=8+10) CacheKVStoreIterator10000-8 0.15k ± 2% 76 ± 1% -49.00% (p=0.000 n=7+10) CacheKVStoreIterator50000-8 8.9k ± 11% 2.0k ± 2% -77.60% (p=0.000 n=10+10) CacheKVStoreIterator100000-8 0.10M ± 26% 13k ± 12% -86.89% (p=0.000 n=10+10) CacheKVStoreGetNoKeyFound-8 5.0 ± 0% 4.0 ± 0% -20.00% (p=0.000 n=10+10) ``` Note: Purposefully using a commit off master that doesn't include the buggy code that caused x/bank.BenchmarkOneBank* to fail per issue https://github.com/cosmos/cosmos-sdk/issues/10023 Updates #9876 /cc @cuonglm @kirbyquerby <!-- The default pull request template is for types feat, fix, or refactor. For other templates, add one of the following parameters to the url: - template=docs.md - template=other.md --> ## Description Closes: #XXXX <!-- Add a description of the changes that this PR introduces and the files that are the most critical to review. --> --- ### Author Checklist *All items are required. Please add a note to the item if the item is not applicable and please add links to any relevant follow up issues.* I have... - [x] included the correct [type prefix](https://github.com/commitizen/conventional-commit-types/blob/v3.0.0/index.json) in the PR title - [x] added `!` to the type prefix if API or client breaking change - [x] targeted the correct branch (see [PR Targeting](https://github.com/cosmos/cosmos-sdk/blob/master/CONTRIBUTING.md#pr-targeting)) - [x] provided a link to the relevant issue or specification - [x] followed the guidelines for [building modules](https://github.com/cosmos/cosmos-sdk/blob/master/docs/building-modules) - [x] included the necessary unit and integration [tests](https://github.com/cosmos/cosmos-sdk/blob/master/CONTRIBUTING.md#testing) - [ ] added a changelog entry to `CHANGELOG.md` - [x] included comments for [documenting Go code](https://blog.golang.org/godoc) - [ ] updated the relevant documentation or specification - [ ] reviewed "Files changed" and left comments if necessary - [ ] confirmed all CI checks have passed ### Reviewers Checklist *All items are required. Please add a note if the item is not applicable and please add your handle next to the items reviewed if you only reviewed selected items.* I have... - [ ] confirmed the correct [type prefix](https://github.com/commitizen/conventional-commit-types/blob/v3.0.0/index.json) in the PR title - [ ] confirmed `!` in the type prefix if API or client breaking change - [ ] confirmed all author checklist items have been addressed - [ ] reviewed state machine logic - [ ] reviewed API design and naming - [ ] reviewed documentation is accurate - [ ] reviewed tests and test coverage - [ ] manually tested (if applicable)
2021-10-14 14:58:25 -07:00
if n < 1024 {
for key := range store.unsortedCache {
if dbm.IsKeyInDomain(conv.UnsafeStrToBytes(key), start, end) {
cacheValue := store.cache[key]
unsorted = append(unsorted, &kv.Pair{Key: []byte(key), Value: cacheValue.value})
}
}
fix!: store/cachekv: reduce growth factor for iterator ranging using binary searches (#10024) This change takes the observation that previous dbm.IsKeyInDomain which searches for [start, end) was performing too many byteslice comparisons. Instead we start off by sorting all the values in the store.unsortedCache, and then apply a modified binary search to look for values that fall within the domain [start, end) The procedure involves: * iterating over all items to build a list of all keys -- O(n) * invoking sort.Strings immediately, of which we anyways eventually invoke sort.Slice(unsorted, ...) which uses Quicksort -- O(nlog(n)) or O(n^2) worst case * invoking modified binary search which is O(log(n)) * 2 ~ O(log(n)) to search for the [start, end) range indices for a total approximate complexity of: Best case: O(n) + O(n(log(n))) + O(log(n)) ~= O(nlog(n)) Worst case: O(n) + O(n^2) + O(log(n)) ~= O(n^2) instead of previously: * iterating over all the unsorted items and invoking dbm.IsKeyInDomain: bytes.Compare ~ O(n) + O(n*s*e) where s -- len(start), e -- len(end) for overall complexity of O(n*s*e) * invoking sort.Slice(unsorted, ...) which uses Quicksort -- O(nlog(n)) or O(n^2) worst case for a total approximate complexity of: Best case: O(n) + O(n*s*e) + O(nlog(n)) ~= O(n*s*e) ~ O(n^2) Worst case: O(n) + O(n*s*e) + O(n^2) ~= O(n*s*e) ~ O(n^2) Ordinarily we'd combine the n*s*e to be n*m, but really the comparisons between (start & key, end & key) are profound that it makes sense to keep them as factors. The overall benchmark results vindicate our choice of isolating the factors (n*s*e) The benchmarks show that as the number of keys to iterate grows, the new code grows gracefully in a somewhat linear growth, notice for CAcheKVStoreIterator*, when we go from: * 1,000 to 10,000 keys: 120us->1,600us (13X) old vs 95us->900us (9.47X) new * 50,000 to 100,000 keys: 19ms->100ms (5.3X) old vs 5.5ms->17ms (3X) new ```shell time/op GetValidator-8 5.8ms ± 2% 4.7ms ± 1% -17.69% (p=0.000 n=10+10) OneBankSendTxPerBlock-8 3.2ms ± 2% 2.8ms ± 1% -10.80% (p=0.000 n=7+10) OneBankMultiSendTxPerBlock-8 3.1ms ± 3% 2.9ms ± 2% -8.36% (p=0.000 n=10+10) AccountMapperSetAccount-8 8.6µs ± 1% 7.8µs ± 1% -9.74% (p=0.000 n=10+10) CacheKVStoreIterator500-8 64µs ± 6% 51µs ± 6% -19.22% (p=0.000 n=10+9) CacheKVStoreIterator1000-8 0.12ms ± 4% 95µs ± 4% -19.55% (p=0.000 n=10+10) CacheKVStoreIterator10000-8 1.6ms ± 4% 0.90ms ± 1% -42.11% (p=0.000 n=10+10) CacheKVStoreIterator50000-8 19ms ± 5% 5.5ms ± 1% -71.35% (p=0.000 n=10+10) CacheKVStoreIterator100000-8 0.10s ± 23% 17ms ± 7% -83.44% (p=0.000 n=10+10) CacheKVStoreGetNoKeyFound-8 1.3µs ± 6% 0.90µs ± 3% -31.19% (p=0.000 n=9+9) CacheKVStoreGetKeyFound-8 0.66µs ± 6% 0.56µs ± 2% -14.81% (p=0.000 n=10+9) alloc/op B/op BlockProvision-8 0.11kB ± 0% 0.10kB ± 0% -7.14% (p=0.000 n=10+10) CacheKVStoreIterator50000-8 0.89MB ± 6% 0.53MB ± 1% -40.85% (p=0.000 n=10+10) CacheKVStoreIterator100000-8 6.3MB ± 23% 1.6MB ± 6% -74.17% (p=0.000 n=10+10) CacheKVStoreGetNoKeyFound-8 0.26kB ± 0% 0.23kB ± 1% -11.53% (p=0.000 n=10+8) allocs/op (count) AccountMapperSetAccount-8 42 ± 0% 38 ± 0% -9.52% (p=0.000 n=10+10) BlockProvision-8 6.0 ± 0% 5.0 ± 0% -16.67% (p=0.000 n=10+10) CacheKVStoreIterator1000-8 14 ± 0% 13 ± 0% -7.14% (p=0.002 n=8+10) CacheKVStoreIterator10000-8 0.15k ± 2% 76 ± 1% -49.00% (p=0.000 n=7+10) CacheKVStoreIterator50000-8 8.9k ± 11% 2.0k ± 2% -77.60% (p=0.000 n=10+10) CacheKVStoreIterator100000-8 0.10M ± 26% 13k ± 12% -86.89% (p=0.000 n=10+10) CacheKVStoreGetNoKeyFound-8 5.0 ± 0% 4.0 ± 0% -20.00% (p=0.000 n=10+10) ``` Note: Purposefully using a commit off master that doesn't include the buggy code that caused x/bank.BenchmarkOneBank* to fail per issue https://github.com/cosmos/cosmos-sdk/issues/10023 Updates #9876 /cc @cuonglm @kirbyquerby <!-- The default pull request template is for types feat, fix, or refactor. For other templates, add one of the following parameters to the url: - template=docs.md - template=other.md --> ## Description Closes: #XXXX <!-- Add a description of the changes that this PR introduces and the files that are the most critical to review. --> --- ### Author Checklist *All items are required. Please add a note to the item if the item is not applicable and please add links to any relevant follow up issues.* I have... - [x] included the correct [type prefix](https://github.com/commitizen/conventional-commit-types/blob/v3.0.0/index.json) in the PR title - [x] added `!` to the type prefix if API or client breaking change - [x] targeted the correct branch (see [PR Targeting](https://github.com/cosmos/cosmos-sdk/blob/master/CONTRIBUTING.md#pr-targeting)) - [x] provided a link to the relevant issue or specification - [x] followed the guidelines for [building modules](https://github.com/cosmos/cosmos-sdk/blob/master/docs/building-modules) - [x] included the necessary unit and integration [tests](https://github.com/cosmos/cosmos-sdk/blob/master/CONTRIBUTING.md#testing) - [ ] added a changelog entry to `CHANGELOG.md` - [x] included comments for [documenting Go code](https://blog.golang.org/godoc) - [ ] updated the relevant documentation or specification - [ ] reviewed "Files changed" and left comments if necessary - [ ] confirmed all CI checks have passed ### Reviewers Checklist *All items are required. Please add a note if the item is not applicable and please add your handle next to the items reviewed if you only reviewed selected items.* I have... - [ ] confirmed the correct [type prefix](https://github.com/commitizen/conventional-commit-types/blob/v3.0.0/index.json) in the PR title - [ ] confirmed `!` in the type prefix if API or client breaking change - [ ] confirmed all author checklist items have been addressed - [ ] reviewed state machine logic - [ ] reviewed API design and naming - [ ] reviewed documentation is accurate - [ ] reviewed tests and test coverage - [ ] manually tested (if applicable)
2021-10-14 14:58:25 -07:00
store.clearUnsortedCacheSubset(unsorted, stateUnsorted)
return
}
// Otherwise it is large so perform a modified binary search to find
// the target ranges for the keys that we should be looking for.
strL := make([]string, 0, n)
for key := range store.unsortedCache {
strL = append(strL, key)
store/cachekv, x/bank/types: algorithmically fix pathologically slow code (#8719) After continuously profiling InitGensis with 100K accounts, it showed pathologically slow code, that was the result of a couple of patterns: * Unconditional and not always necessary map lookups * O(n^2) sdk.AccAddressFromBech32 retrievals when the code is expensive, during a quicksort The remedy involved 4 parts: * O(n) sdk.AccAddressFromBech32 invocations, down from O(n^2) in the quicksort * Only doing map lookups when the domain key check has passed * Using a black magic compiler technique of the map clearing idiom * Zero allocation []byte<->string conversion With 100K accounts, this brings InitGenesis down to ~6min, instead of 20+min, it reduces the sort code from ~7sec down to 50ms. Also some simple benchmark reflect the change: ```shell name old time/op new time/op delta SanitizeBalances500-8 19.3ms ±10% 1.5ms ± 5% -92.46% (p=0.000 n=20+20) SanitizeBalances1000-8 41.9ms ± 8% 3.0ms ±12% -92.92% (p=0.000 n=20+20) name old alloc/op new alloc/op delta SanitizeBalances500-8 9.05MB ± 6% 0.56MB ± 0% -93.76% (p=0.000 n=20+18) SanitizeBalances1000-8 20.2MB ± 3% 1.1MB ± 0% -94.37% (p=0.000 n=20+19) name old allocs/op new allocs/op delta SanitizeBalances500-8 72.4k ± 6% 4.5k ± 0% -93.76% (p=0.000 n=20+20) SanitizeBalances1000-8 162k ± 3% 9k ± 0% -94.40% (p=0.000 n=20+20) ``` The CPU profiles show the radical change as per https://github.com/cosmos/cosmos-sdk/issues/7766#issuecomment-786671734 Later on, we shall do more profiling and fixes but for now this brings down the run-time for InitGenesis. Fixes #7766 Co-authored-by: Alessio Treglia <alessio@tendermint.com>
2021-02-27 07:26:22 -08:00
}
fix!: store/cachekv: reduce growth factor for iterator ranging using binary searches (#10024) This change takes the observation that previous dbm.IsKeyInDomain which searches for [start, end) was performing too many byteslice comparisons. Instead we start off by sorting all the values in the store.unsortedCache, and then apply a modified binary search to look for values that fall within the domain [start, end) The procedure involves: * iterating over all items to build a list of all keys -- O(n) * invoking sort.Strings immediately, of which we anyways eventually invoke sort.Slice(unsorted, ...) which uses Quicksort -- O(nlog(n)) or O(n^2) worst case * invoking modified binary search which is O(log(n)) * 2 ~ O(log(n)) to search for the [start, end) range indices for a total approximate complexity of: Best case: O(n) + O(n(log(n))) + O(log(n)) ~= O(nlog(n)) Worst case: O(n) + O(n^2) + O(log(n)) ~= O(n^2) instead of previously: * iterating over all the unsorted items and invoking dbm.IsKeyInDomain: bytes.Compare ~ O(n) + O(n*s*e) where s -- len(start), e -- len(end) for overall complexity of O(n*s*e) * invoking sort.Slice(unsorted, ...) which uses Quicksort -- O(nlog(n)) or O(n^2) worst case for a total approximate complexity of: Best case: O(n) + O(n*s*e) + O(nlog(n)) ~= O(n*s*e) ~ O(n^2) Worst case: O(n) + O(n*s*e) + O(n^2) ~= O(n*s*e) ~ O(n^2) Ordinarily we'd combine the n*s*e to be n*m, but really the comparisons between (start & key, end & key) are profound that it makes sense to keep them as factors. The overall benchmark results vindicate our choice of isolating the factors (n*s*e) The benchmarks show that as the number of keys to iterate grows, the new code grows gracefully in a somewhat linear growth, notice for CAcheKVStoreIterator*, when we go from: * 1,000 to 10,000 keys: 120us->1,600us (13X) old vs 95us->900us (9.47X) new * 50,000 to 100,000 keys: 19ms->100ms (5.3X) old vs 5.5ms->17ms (3X) new ```shell time/op GetValidator-8 5.8ms ± 2% 4.7ms ± 1% -17.69% (p=0.000 n=10+10) OneBankSendTxPerBlock-8 3.2ms ± 2% 2.8ms ± 1% -10.80% (p=0.000 n=7+10) OneBankMultiSendTxPerBlock-8 3.1ms ± 3% 2.9ms ± 2% -8.36% (p=0.000 n=10+10) AccountMapperSetAccount-8 8.6µs ± 1% 7.8µs ± 1% -9.74% (p=0.000 n=10+10) CacheKVStoreIterator500-8 64µs ± 6% 51µs ± 6% -19.22% (p=0.000 n=10+9) CacheKVStoreIterator1000-8 0.12ms ± 4% 95µs ± 4% -19.55% (p=0.000 n=10+10) CacheKVStoreIterator10000-8 1.6ms ± 4% 0.90ms ± 1% -42.11% (p=0.000 n=10+10) CacheKVStoreIterator50000-8 19ms ± 5% 5.5ms ± 1% -71.35% (p=0.000 n=10+10) CacheKVStoreIterator100000-8 0.10s ± 23% 17ms ± 7% -83.44% (p=0.000 n=10+10) CacheKVStoreGetNoKeyFound-8 1.3µs ± 6% 0.90µs ± 3% -31.19% (p=0.000 n=9+9) CacheKVStoreGetKeyFound-8 0.66µs ± 6% 0.56µs ± 2% -14.81% (p=0.000 n=10+9) alloc/op B/op BlockProvision-8 0.11kB ± 0% 0.10kB ± 0% -7.14% (p=0.000 n=10+10) CacheKVStoreIterator50000-8 0.89MB ± 6% 0.53MB ± 1% -40.85% (p=0.000 n=10+10) CacheKVStoreIterator100000-8 6.3MB ± 23% 1.6MB ± 6% -74.17% (p=0.000 n=10+10) CacheKVStoreGetNoKeyFound-8 0.26kB ± 0% 0.23kB ± 1% -11.53% (p=0.000 n=10+8) allocs/op (count) AccountMapperSetAccount-8 42 ± 0% 38 ± 0% -9.52% (p=0.000 n=10+10) BlockProvision-8 6.0 ± 0% 5.0 ± 0% -16.67% (p=0.000 n=10+10) CacheKVStoreIterator1000-8 14 ± 0% 13 ± 0% -7.14% (p=0.002 n=8+10) CacheKVStoreIterator10000-8 0.15k ± 2% 76 ± 1% -49.00% (p=0.000 n=7+10) CacheKVStoreIterator50000-8 8.9k ± 11% 2.0k ± 2% -77.60% (p=0.000 n=10+10) CacheKVStoreIterator100000-8 0.10M ± 26% 13k ± 12% -86.89% (p=0.000 n=10+10) CacheKVStoreGetNoKeyFound-8 5.0 ± 0% 4.0 ± 0% -20.00% (p=0.000 n=10+10) ``` Note: Purposefully using a commit off master that doesn't include the buggy code that caused x/bank.BenchmarkOneBank* to fail per issue https://github.com/cosmos/cosmos-sdk/issues/10023 Updates #9876 /cc @cuonglm @kirbyquerby <!-- The default pull request template is for types feat, fix, or refactor. For other templates, add one of the following parameters to the url: - template=docs.md - template=other.md --> ## Description Closes: #XXXX <!-- Add a description of the changes that this PR introduces and the files that are the most critical to review. --> --- ### Author Checklist *All items are required. Please add a note to the item if the item is not applicable and please add links to any relevant follow up issues.* I have... - [x] included the correct [type prefix](https://github.com/commitizen/conventional-commit-types/blob/v3.0.0/index.json) in the PR title - [x] added `!` to the type prefix if API or client breaking change - [x] targeted the correct branch (see [PR Targeting](https://github.com/cosmos/cosmos-sdk/blob/master/CONTRIBUTING.md#pr-targeting)) - [x] provided a link to the relevant issue or specification - [x] followed the guidelines for [building modules](https://github.com/cosmos/cosmos-sdk/blob/master/docs/building-modules) - [x] included the necessary unit and integration [tests](https://github.com/cosmos/cosmos-sdk/blob/master/CONTRIBUTING.md#testing) - [ ] added a changelog entry to `CHANGELOG.md` - [x] included comments for [documenting Go code](https://blog.golang.org/godoc) - [ ] updated the relevant documentation or specification - [ ] reviewed "Files changed" and left comments if necessary - [ ] confirmed all CI checks have passed ### Reviewers Checklist *All items are required. Please add a note if the item is not applicable and please add your handle next to the items reviewed if you only reviewed selected items.* I have... - [ ] confirmed the correct [type prefix](https://github.com/commitizen/conventional-commit-types/blob/v3.0.0/index.json) in the PR title - [ ] confirmed `!` in the type prefix if API or client breaking change - [ ] confirmed all author checklist items have been addressed - [ ] reviewed state machine logic - [ ] reviewed API design and naming - [ ] reviewed documentation is accurate - [ ] reviewed tests and test coverage - [ ] manually tested (if applicable)
2021-10-14 14:58:25 -07:00
sort.Strings(strL)
// Now find the values within the domain
// [start, end)
startIndex := findStartIndex(strL, startStr)
endIndex := findEndIndex(strL, endStr)
if endIndex < 0 {
endIndex = len(strL) - 1
}
if startIndex < 0 {
startIndex = 0
}
kvL := make([]*kv.Pair, 0)
for i := startIndex; i <= endIndex; i++ {
key := strL[i]
cacheValue := store.cache[key]
kvL = append(kvL, &kv.Pair{Key: []byte(key), Value: cacheValue.value})
}
// kvL was already sorted so pass it in as is.
store.clearUnsortedCacheSubset(kvL, stateAlreadySorted)
}
fix!: store/cachekv: reduce growth factor for iterator ranging using binary searches (#10024) This change takes the observation that previous dbm.IsKeyInDomain which searches for [start, end) was performing too many byteslice comparisons. Instead we start off by sorting all the values in the store.unsortedCache, and then apply a modified binary search to look for values that fall within the domain [start, end) The procedure involves: * iterating over all items to build a list of all keys -- O(n) * invoking sort.Strings immediately, of which we anyways eventually invoke sort.Slice(unsorted, ...) which uses Quicksort -- O(nlog(n)) or O(n^2) worst case * invoking modified binary search which is O(log(n)) * 2 ~ O(log(n)) to search for the [start, end) range indices for a total approximate complexity of: Best case: O(n) + O(n(log(n))) + O(log(n)) ~= O(nlog(n)) Worst case: O(n) + O(n^2) + O(log(n)) ~= O(n^2) instead of previously: * iterating over all the unsorted items and invoking dbm.IsKeyInDomain: bytes.Compare ~ O(n) + O(n*s*e) where s -- len(start), e -- len(end) for overall complexity of O(n*s*e) * invoking sort.Slice(unsorted, ...) which uses Quicksort -- O(nlog(n)) or O(n^2) worst case for a total approximate complexity of: Best case: O(n) + O(n*s*e) + O(nlog(n)) ~= O(n*s*e) ~ O(n^2) Worst case: O(n) + O(n*s*e) + O(n^2) ~= O(n*s*e) ~ O(n^2) Ordinarily we'd combine the n*s*e to be n*m, but really the comparisons between (start & key, end & key) are profound that it makes sense to keep them as factors. The overall benchmark results vindicate our choice of isolating the factors (n*s*e) The benchmarks show that as the number of keys to iterate grows, the new code grows gracefully in a somewhat linear growth, notice for CAcheKVStoreIterator*, when we go from: * 1,000 to 10,000 keys: 120us->1,600us (13X) old vs 95us->900us (9.47X) new * 50,000 to 100,000 keys: 19ms->100ms (5.3X) old vs 5.5ms->17ms (3X) new ```shell time/op GetValidator-8 5.8ms ± 2% 4.7ms ± 1% -17.69% (p=0.000 n=10+10) OneBankSendTxPerBlock-8 3.2ms ± 2% 2.8ms ± 1% -10.80% (p=0.000 n=7+10) OneBankMultiSendTxPerBlock-8 3.1ms ± 3% 2.9ms ± 2% -8.36% (p=0.000 n=10+10) AccountMapperSetAccount-8 8.6µs ± 1% 7.8µs ± 1% -9.74% (p=0.000 n=10+10) CacheKVStoreIterator500-8 64µs ± 6% 51µs ± 6% -19.22% (p=0.000 n=10+9) CacheKVStoreIterator1000-8 0.12ms ± 4% 95µs ± 4% -19.55% (p=0.000 n=10+10) CacheKVStoreIterator10000-8 1.6ms ± 4% 0.90ms ± 1% -42.11% (p=0.000 n=10+10) CacheKVStoreIterator50000-8 19ms ± 5% 5.5ms ± 1% -71.35% (p=0.000 n=10+10) CacheKVStoreIterator100000-8 0.10s ± 23% 17ms ± 7% -83.44% (p=0.000 n=10+10) CacheKVStoreGetNoKeyFound-8 1.3µs ± 6% 0.90µs ± 3% -31.19% (p=0.000 n=9+9) CacheKVStoreGetKeyFound-8 0.66µs ± 6% 0.56µs ± 2% -14.81% (p=0.000 n=10+9) alloc/op B/op BlockProvision-8 0.11kB ± 0% 0.10kB ± 0% -7.14% (p=0.000 n=10+10) CacheKVStoreIterator50000-8 0.89MB ± 6% 0.53MB ± 1% -40.85% (p=0.000 n=10+10) CacheKVStoreIterator100000-8 6.3MB ± 23% 1.6MB ± 6% -74.17% (p=0.000 n=10+10) CacheKVStoreGetNoKeyFound-8 0.26kB ± 0% 0.23kB ± 1% -11.53% (p=0.000 n=10+8) allocs/op (count) AccountMapperSetAccount-8 42 ± 0% 38 ± 0% -9.52% (p=0.000 n=10+10) BlockProvision-8 6.0 ± 0% 5.0 ± 0% -16.67% (p=0.000 n=10+10) CacheKVStoreIterator1000-8 14 ± 0% 13 ± 0% -7.14% (p=0.002 n=8+10) CacheKVStoreIterator10000-8 0.15k ± 2% 76 ± 1% -49.00% (p=0.000 n=7+10) CacheKVStoreIterator50000-8 8.9k ± 11% 2.0k ± 2% -77.60% (p=0.000 n=10+10) CacheKVStoreIterator100000-8 0.10M ± 26% 13k ± 12% -86.89% (p=0.000 n=10+10) CacheKVStoreGetNoKeyFound-8 5.0 ± 0% 4.0 ± 0% -20.00% (p=0.000 n=10+10) ``` Note: Purposefully using a commit off master that doesn't include the buggy code that caused x/bank.BenchmarkOneBank* to fail per issue https://github.com/cosmos/cosmos-sdk/issues/10023 Updates #9876 /cc @cuonglm @kirbyquerby <!-- The default pull request template is for types feat, fix, or refactor. For other templates, add one of the following parameters to the url: - template=docs.md - template=other.md --> ## Description Closes: #XXXX <!-- Add a description of the changes that this PR introduces and the files that are the most critical to review. --> --- ### Author Checklist *All items are required. Please add a note to the item if the item is not applicable and please add links to any relevant follow up issues.* I have... - [x] included the correct [type prefix](https://github.com/commitizen/conventional-commit-types/blob/v3.0.0/index.json) in the PR title - [x] added `!` to the type prefix if API or client breaking change - [x] targeted the correct branch (see [PR Targeting](https://github.com/cosmos/cosmos-sdk/blob/master/CONTRIBUTING.md#pr-targeting)) - [x] provided a link to the relevant issue or specification - [x] followed the guidelines for [building modules](https://github.com/cosmos/cosmos-sdk/blob/master/docs/building-modules) - [x] included the necessary unit and integration [tests](https://github.com/cosmos/cosmos-sdk/blob/master/CONTRIBUTING.md#testing) - [ ] added a changelog entry to `CHANGELOG.md` - [x] included comments for [documenting Go code](https://blog.golang.org/godoc) - [ ] updated the relevant documentation or specification - [ ] reviewed "Files changed" and left comments if necessary - [ ] confirmed all CI checks have passed ### Reviewers Checklist *All items are required. Please add a note if the item is not applicable and please add your handle next to the items reviewed if you only reviewed selected items.* I have... - [ ] confirmed the correct [type prefix](https://github.com/commitizen/conventional-commit-types/blob/v3.0.0/index.json) in the PR title - [ ] confirmed `!` in the type prefix if API or client breaking change - [ ] confirmed all author checklist items have been addressed - [ ] reviewed state machine logic - [ ] reviewed API design and naming - [ ] reviewed documentation is accurate - [ ] reviewed tests and test coverage - [ ] manually tested (if applicable)
2021-10-14 14:58:25 -07:00
func (store *Store) clearUnsortedCacheSubset(unsorted []*kv.Pair, sortState sortState) {
n := len(store.unsortedCache)
store/cachekv, x/bank/types: algorithmically fix pathologically slow code (#8719) After continuously profiling InitGensis with 100K accounts, it showed pathologically slow code, that was the result of a couple of patterns: * Unconditional and not always necessary map lookups * O(n^2) sdk.AccAddressFromBech32 retrievals when the code is expensive, during a quicksort The remedy involved 4 parts: * O(n) sdk.AccAddressFromBech32 invocations, down from O(n^2) in the quicksort * Only doing map lookups when the domain key check has passed * Using a black magic compiler technique of the map clearing idiom * Zero allocation []byte<->string conversion With 100K accounts, this brings InitGenesis down to ~6min, instead of 20+min, it reduces the sort code from ~7sec down to 50ms. Also some simple benchmark reflect the change: ```shell name old time/op new time/op delta SanitizeBalances500-8 19.3ms ±10% 1.5ms ± 5% -92.46% (p=0.000 n=20+20) SanitizeBalances1000-8 41.9ms ± 8% 3.0ms ±12% -92.92% (p=0.000 n=20+20) name old alloc/op new alloc/op delta SanitizeBalances500-8 9.05MB ± 6% 0.56MB ± 0% -93.76% (p=0.000 n=20+18) SanitizeBalances1000-8 20.2MB ± 3% 1.1MB ± 0% -94.37% (p=0.000 n=20+19) name old allocs/op new allocs/op delta SanitizeBalances500-8 72.4k ± 6% 4.5k ± 0% -93.76% (p=0.000 n=20+20) SanitizeBalances1000-8 162k ± 3% 9k ± 0% -94.40% (p=0.000 n=20+20) ``` The CPU profiles show the radical change as per https://github.com/cosmos/cosmos-sdk/issues/7766#issuecomment-786671734 Later on, we shall do more profiling and fixes but for now this brings down the run-time for InitGenesis. Fixes #7766 Co-authored-by: Alessio Treglia <alessio@tendermint.com>
2021-02-27 07:26:22 -08:00
if len(unsorted) == n { // This pattern allows the Go compiler to emit the map clearing idiom for the entire map.
for key := range store.unsortedCache {
delete(store.unsortedCache, key)
2019-02-01 17:03:09 -08:00
}
store/cachekv, x/bank/types: algorithmically fix pathologically slow code (#8719) After continuously profiling InitGensis with 100K accounts, it showed pathologically slow code, that was the result of a couple of patterns: * Unconditional and not always necessary map lookups * O(n^2) sdk.AccAddressFromBech32 retrievals when the code is expensive, during a quicksort The remedy involved 4 parts: * O(n) sdk.AccAddressFromBech32 invocations, down from O(n^2) in the quicksort * Only doing map lookups when the domain key check has passed * Using a black magic compiler technique of the map clearing idiom * Zero allocation []byte<->string conversion With 100K accounts, this brings InitGenesis down to ~6min, instead of 20+min, it reduces the sort code from ~7sec down to 50ms. Also some simple benchmark reflect the change: ```shell name old time/op new time/op delta SanitizeBalances500-8 19.3ms ±10% 1.5ms ± 5% -92.46% (p=0.000 n=20+20) SanitizeBalances1000-8 41.9ms ± 8% 3.0ms ±12% -92.92% (p=0.000 n=20+20) name old alloc/op new alloc/op delta SanitizeBalances500-8 9.05MB ± 6% 0.56MB ± 0% -93.76% (p=0.000 n=20+18) SanitizeBalances1000-8 20.2MB ± 3% 1.1MB ± 0% -94.37% (p=0.000 n=20+19) name old allocs/op new allocs/op delta SanitizeBalances500-8 72.4k ± 6% 4.5k ± 0% -93.76% (p=0.000 n=20+20) SanitizeBalances1000-8 162k ± 3% 9k ± 0% -94.40% (p=0.000 n=20+20) ``` The CPU profiles show the radical change as per https://github.com/cosmos/cosmos-sdk/issues/7766#issuecomment-786671734 Later on, we shall do more profiling and fixes but for now this brings down the run-time for InitGenesis. Fixes #7766 Co-authored-by: Alessio Treglia <alessio@tendermint.com>
2021-02-27 07:26:22 -08:00
} else { // Otherwise, normally delete the unsorted keys from the map.
for _, kv := range unsorted {
delete(store.unsortedCache, conv.UnsafeBytesToStr(kv.Key))
store/cachekv, x/bank/types: algorithmically fix pathologically slow code (#8719) After continuously profiling InitGensis with 100K accounts, it showed pathologically slow code, that was the result of a couple of patterns: * Unconditional and not always necessary map lookups * O(n^2) sdk.AccAddressFromBech32 retrievals when the code is expensive, during a quicksort The remedy involved 4 parts: * O(n) sdk.AccAddressFromBech32 invocations, down from O(n^2) in the quicksort * Only doing map lookups when the domain key check has passed * Using a black magic compiler technique of the map clearing idiom * Zero allocation []byte<->string conversion With 100K accounts, this brings InitGenesis down to ~6min, instead of 20+min, it reduces the sort code from ~7sec down to 50ms. Also some simple benchmark reflect the change: ```shell name old time/op new time/op delta SanitizeBalances500-8 19.3ms ±10% 1.5ms ± 5% -92.46% (p=0.000 n=20+20) SanitizeBalances1000-8 41.9ms ± 8% 3.0ms ±12% -92.92% (p=0.000 n=20+20) name old alloc/op new alloc/op delta SanitizeBalances500-8 9.05MB ± 6% 0.56MB ± 0% -93.76% (p=0.000 n=20+18) SanitizeBalances1000-8 20.2MB ± 3% 1.1MB ± 0% -94.37% (p=0.000 n=20+19) name old allocs/op new allocs/op delta SanitizeBalances500-8 72.4k ± 6% 4.5k ± 0% -93.76% (p=0.000 n=20+20) SanitizeBalances1000-8 162k ± 3% 9k ± 0% -94.40% (p=0.000 n=20+20) ``` The CPU profiles show the radical change as per https://github.com/cosmos/cosmos-sdk/issues/7766#issuecomment-786671734 Later on, we shall do more profiling and fixes but for now this brings down the run-time for InitGenesis. Fixes #7766 Co-authored-by: Alessio Treglia <alessio@tendermint.com>
2021-02-27 07:26:22 -08:00
}
2019-02-01 17:03:09 -08:00
}
fix!: store/cachekv: reduce growth factor for iterator ranging using binary searches (#10024) This change takes the observation that previous dbm.IsKeyInDomain which searches for [start, end) was performing too many byteslice comparisons. Instead we start off by sorting all the values in the store.unsortedCache, and then apply a modified binary search to look for values that fall within the domain [start, end) The procedure involves: * iterating over all items to build a list of all keys -- O(n) * invoking sort.Strings immediately, of which we anyways eventually invoke sort.Slice(unsorted, ...) which uses Quicksort -- O(nlog(n)) or O(n^2) worst case * invoking modified binary search which is O(log(n)) * 2 ~ O(log(n)) to search for the [start, end) range indices for a total approximate complexity of: Best case: O(n) + O(n(log(n))) + O(log(n)) ~= O(nlog(n)) Worst case: O(n) + O(n^2) + O(log(n)) ~= O(n^2) instead of previously: * iterating over all the unsorted items and invoking dbm.IsKeyInDomain: bytes.Compare ~ O(n) + O(n*s*e) where s -- len(start), e -- len(end) for overall complexity of O(n*s*e) * invoking sort.Slice(unsorted, ...) which uses Quicksort -- O(nlog(n)) or O(n^2) worst case for a total approximate complexity of: Best case: O(n) + O(n*s*e) + O(nlog(n)) ~= O(n*s*e) ~ O(n^2) Worst case: O(n) + O(n*s*e) + O(n^2) ~= O(n*s*e) ~ O(n^2) Ordinarily we'd combine the n*s*e to be n*m, but really the comparisons between (start & key, end & key) are profound that it makes sense to keep them as factors. The overall benchmark results vindicate our choice of isolating the factors (n*s*e) The benchmarks show that as the number of keys to iterate grows, the new code grows gracefully in a somewhat linear growth, notice for CAcheKVStoreIterator*, when we go from: * 1,000 to 10,000 keys: 120us->1,600us (13X) old vs 95us->900us (9.47X) new * 50,000 to 100,000 keys: 19ms->100ms (5.3X) old vs 5.5ms->17ms (3X) new ```shell time/op GetValidator-8 5.8ms ± 2% 4.7ms ± 1% -17.69% (p=0.000 n=10+10) OneBankSendTxPerBlock-8 3.2ms ± 2% 2.8ms ± 1% -10.80% (p=0.000 n=7+10) OneBankMultiSendTxPerBlock-8 3.1ms ± 3% 2.9ms ± 2% -8.36% (p=0.000 n=10+10) AccountMapperSetAccount-8 8.6µs ± 1% 7.8µs ± 1% -9.74% (p=0.000 n=10+10) CacheKVStoreIterator500-8 64µs ± 6% 51µs ± 6% -19.22% (p=0.000 n=10+9) CacheKVStoreIterator1000-8 0.12ms ± 4% 95µs ± 4% -19.55% (p=0.000 n=10+10) CacheKVStoreIterator10000-8 1.6ms ± 4% 0.90ms ± 1% -42.11% (p=0.000 n=10+10) CacheKVStoreIterator50000-8 19ms ± 5% 5.5ms ± 1% -71.35% (p=0.000 n=10+10) CacheKVStoreIterator100000-8 0.10s ± 23% 17ms ± 7% -83.44% (p=0.000 n=10+10) CacheKVStoreGetNoKeyFound-8 1.3µs ± 6% 0.90µs ± 3% -31.19% (p=0.000 n=9+9) CacheKVStoreGetKeyFound-8 0.66µs ± 6% 0.56µs ± 2% -14.81% (p=0.000 n=10+9) alloc/op B/op BlockProvision-8 0.11kB ± 0% 0.10kB ± 0% -7.14% (p=0.000 n=10+10) CacheKVStoreIterator50000-8 0.89MB ± 6% 0.53MB ± 1% -40.85% (p=0.000 n=10+10) CacheKVStoreIterator100000-8 6.3MB ± 23% 1.6MB ± 6% -74.17% (p=0.000 n=10+10) CacheKVStoreGetNoKeyFound-8 0.26kB ± 0% 0.23kB ± 1% -11.53% (p=0.000 n=10+8) allocs/op (count) AccountMapperSetAccount-8 42 ± 0% 38 ± 0% -9.52% (p=0.000 n=10+10) BlockProvision-8 6.0 ± 0% 5.0 ± 0% -16.67% (p=0.000 n=10+10) CacheKVStoreIterator1000-8 14 ± 0% 13 ± 0% -7.14% (p=0.002 n=8+10) CacheKVStoreIterator10000-8 0.15k ± 2% 76 ± 1% -49.00% (p=0.000 n=7+10) CacheKVStoreIterator50000-8 8.9k ± 11% 2.0k ± 2% -77.60% (p=0.000 n=10+10) CacheKVStoreIterator100000-8 0.10M ± 26% 13k ± 12% -86.89% (p=0.000 n=10+10) CacheKVStoreGetNoKeyFound-8 5.0 ± 0% 4.0 ± 0% -20.00% (p=0.000 n=10+10) ``` Note: Purposefully using a commit off master that doesn't include the buggy code that caused x/bank.BenchmarkOneBank* to fail per issue https://github.com/cosmos/cosmos-sdk/issues/10023 Updates #9876 /cc @cuonglm @kirbyquerby <!-- The default pull request template is for types feat, fix, or refactor. For other templates, add one of the following parameters to the url: - template=docs.md - template=other.md --> ## Description Closes: #XXXX <!-- Add a description of the changes that this PR introduces and the files that are the most critical to review. --> --- ### Author Checklist *All items are required. Please add a note to the item if the item is not applicable and please add links to any relevant follow up issues.* I have... - [x] included the correct [type prefix](https://github.com/commitizen/conventional-commit-types/blob/v3.0.0/index.json) in the PR title - [x] added `!` to the type prefix if API or client breaking change - [x] targeted the correct branch (see [PR Targeting](https://github.com/cosmos/cosmos-sdk/blob/master/CONTRIBUTING.md#pr-targeting)) - [x] provided a link to the relevant issue or specification - [x] followed the guidelines for [building modules](https://github.com/cosmos/cosmos-sdk/blob/master/docs/building-modules) - [x] included the necessary unit and integration [tests](https://github.com/cosmos/cosmos-sdk/blob/master/CONTRIBUTING.md#testing) - [ ] added a changelog entry to `CHANGELOG.md` - [x] included comments for [documenting Go code](https://blog.golang.org/godoc) - [ ] updated the relevant documentation or specification - [ ] reviewed "Files changed" and left comments if necessary - [ ] confirmed all CI checks have passed ### Reviewers Checklist *All items are required. Please add a note if the item is not applicable and please add your handle next to the items reviewed if you only reviewed selected items.* I have... - [ ] confirmed the correct [type prefix](https://github.com/commitizen/conventional-commit-types/blob/v3.0.0/index.json) in the PR title - [ ] confirmed `!` in the type prefix if API or client breaking change - [ ] confirmed all author checklist items have been addressed - [ ] reviewed state machine logic - [ ] reviewed API design and naming - [ ] reviewed documentation is accurate - [ ] reviewed tests and test coverage - [ ] manually tested (if applicable)
2021-10-14 14:58:25 -07:00
if sortState == stateUnsorted {
sort.Slice(unsorted, func(i, j int) bool {
return bytes.Compare(unsorted[i].Key, unsorted[j].Key) < 0
})
}
2019-02-01 17:03:09 -08:00
for _, item := range unsorted {
if item.Value == nil {
// deleted element, tracked by store.deleted
// setting arbitrary value
store.sortedCache.Set(item.Key, []byte{})
continue
}
err := store.sortedCache.Set(item.Key, item.Value)
if err != nil {
panic(err)
}
}
2019-02-01 17:03:09 -08:00
}
//----------------------------------------
// etc
// Only entrypoint to mutate store.cache.
func (store *Store) setCacheValue(key, value []byte, deleted bool, dirty bool) {
perf: convert to string once in setCacheValue (#9992) ## Description So we don't burn un-necessary CPU in case of dirty store/delete. ``` name old time/op new time/op delta CacheKVStoreIterator500-8 23.3µs ± 2% 23.4µs ± 1% ~ (p=0.151 n=5+5) CacheKVStoreIterator1000-8 47.0µs ± 2% 46.3µs ± 0% -1.50% (p=0.008 n=5+5) CacheKVStoreIterator10000-8 462µs ± 3% 458µs ± 1% ~ (p=0.690 n=5+5) CacheKVStoreIterator50000-8 2.63ms ± 5% 2.51ms ± 2% -4.63% (p=0.032 n=5+5) CacheKVStoreIterator100000-8 8.09ms ±21% 6.98ms ± 4% ~ (p=0.151 n=5+5) CacheKVStoreGetNoKeyFound-8 393ns ± 2% 397ns ± 2% ~ (p=0.421 n=5+5) CacheKVStoreGetKeyFound-8 269ns ± 5% 268ns ± 3% ~ (p=1.000 n=5+5) ``` Fixes #9991 --- ### Author Checklist *All items are required. Please add a note to the item if the item is not applicable and please add links to any relevant follow up issues.* I have... - [x] included the correct [type prefix](https://github.com/commitizen/conventional-commit-types/blob/v3.0.0/index.json) in the PR title - [ ] added `!` to the type prefix if API or client breaking change - [x] targeted the correct branch (see [PR Targeting](https://github.com/cosmos/cosmos-sdk/blob/master/CONTRIBUTING.md#pr-targeting)) - [x] provided a link to the relevant issue or specification - [x] followed the guidelines for [building modules](https://github.com/cosmos/cosmos-sdk/blob/master/docs/building-modules) - [ ] included the necessary unit and integration [tests](https://github.com/cosmos/cosmos-sdk/blob/master/CONTRIBUTING.md#testing) - [ ] added a changelog entry to `CHANGELOG.md` - [ ] included comments for [documenting Go code](https://blog.golang.org/godoc) - [ ] updated the relevant documentation or specification - [ ] reviewed "Files changed" and left comments if necessary - [ ] confirmed all CI checks have passed ### Reviewers Checklist *All items are required. Please add a note if the item is not applicable and please add your handle next to the items reviewed if you only reviewed selected items.* I have... - [ ] confirmed the correct [type prefix](https://github.com/commitizen/conventional-commit-types/blob/v3.0.0/index.json) in the PR title - [ ] confirmed `!` in the type prefix if API or client breaking change - [ ] confirmed all author checklist items have been addressed - [ ] reviewed state machine logic - [ ] reviewed API design and naming - [ ] reviewed documentation is accurate - [ ] reviewed tests and test coverage - [ ] manually tested (if applicable)
2021-08-24 01:59:38 -07:00
keyStr := conv.UnsafeBytesToStr(key)
store.cache[keyStr] = &cValue{
value: value,
dirty: dirty,
}
if deleted {
store.deleted[keyStr] = struct{}{}
} else {
delete(store.deleted, keyStr)
2019-02-01 17:03:09 -08:00
}
if dirty {
perf: convert to string once in setCacheValue (#9992) ## Description So we don't burn un-necessary CPU in case of dirty store/delete. ``` name old time/op new time/op delta CacheKVStoreIterator500-8 23.3µs ± 2% 23.4µs ± 1% ~ (p=0.151 n=5+5) CacheKVStoreIterator1000-8 47.0µs ± 2% 46.3µs ± 0% -1.50% (p=0.008 n=5+5) CacheKVStoreIterator10000-8 462µs ± 3% 458µs ± 1% ~ (p=0.690 n=5+5) CacheKVStoreIterator50000-8 2.63ms ± 5% 2.51ms ± 2% -4.63% (p=0.032 n=5+5) CacheKVStoreIterator100000-8 8.09ms ±21% 6.98ms ± 4% ~ (p=0.151 n=5+5) CacheKVStoreGetNoKeyFound-8 393ns ± 2% 397ns ± 2% ~ (p=0.421 n=5+5) CacheKVStoreGetKeyFound-8 269ns ± 5% 268ns ± 3% ~ (p=1.000 n=5+5) ``` Fixes #9991 --- ### Author Checklist *All items are required. Please add a note to the item if the item is not applicable and please add links to any relevant follow up issues.* I have... - [x] included the correct [type prefix](https://github.com/commitizen/conventional-commit-types/blob/v3.0.0/index.json) in the PR title - [ ] added `!` to the type prefix if API or client breaking change - [x] targeted the correct branch (see [PR Targeting](https://github.com/cosmos/cosmos-sdk/blob/master/CONTRIBUTING.md#pr-targeting)) - [x] provided a link to the relevant issue or specification - [x] followed the guidelines for [building modules](https://github.com/cosmos/cosmos-sdk/blob/master/docs/building-modules) - [ ] included the necessary unit and integration [tests](https://github.com/cosmos/cosmos-sdk/blob/master/CONTRIBUTING.md#testing) - [ ] added a changelog entry to `CHANGELOG.md` - [ ] included comments for [documenting Go code](https://blog.golang.org/godoc) - [ ] updated the relevant documentation or specification - [ ] reviewed "Files changed" and left comments if necessary - [ ] confirmed all CI checks have passed ### Reviewers Checklist *All items are required. Please add a note if the item is not applicable and please add your handle next to the items reviewed if you only reviewed selected items.* I have... - [ ] confirmed the correct [type prefix](https://github.com/commitizen/conventional-commit-types/blob/v3.0.0/index.json) in the PR title - [ ] confirmed `!` in the type prefix if API or client breaking change - [ ] confirmed all author checklist items have been addressed - [ ] reviewed state machine logic - [ ] reviewed API design and naming - [ ] reviewed documentation is accurate - [ ] reviewed tests and test coverage - [ ] manually tested (if applicable)
2021-08-24 01:59:38 -07:00
store.unsortedCache[keyStr] = struct{}{}
}
2019-02-01 17:03:09 -08:00
}
func (store *Store) isDeleted(key string) bool {
_, ok := store.deleted[key]
return ok
}