store/cachekv, x/bank/types: algorithmically fix pathologically slow code (#8719)
After continuously profiling InitGensis with 100K accounts, it showed pathologically slow code, that was the result of a couple of patterns: * Unconditional and not always necessary map lookups * O(n^2) sdk.AccAddressFromBech32 retrievals when the code is expensive, during a quicksort The remedy involved 4 parts: * O(n) sdk.AccAddressFromBech32 invocations, down from O(n^2) in the quicksort * Only doing map lookups when the domain key check has passed * Using a black magic compiler technique of the map clearing idiom * Zero allocation []byte<->string conversion With 100K accounts, this brings InitGenesis down to ~6min, instead of 20+min, it reduces the sort code from ~7sec down to 50ms. Also some simple benchmark reflect the change: ```shell name old time/op new time/op delta SanitizeBalances500-8 19.3ms ±10% 1.5ms ± 5% -92.46% (p=0.000 n=20+20) SanitizeBalances1000-8 41.9ms ± 8% 3.0ms ±12% -92.92% (p=0.000 n=20+20) name old alloc/op new alloc/op delta SanitizeBalances500-8 9.05MB ± 6% 0.56MB ± 0% -93.76% (p=0.000 n=20+18) SanitizeBalances1000-8 20.2MB ± 3% 1.1MB ± 0% -94.37% (p=0.000 n=20+19) name old allocs/op new allocs/op delta SanitizeBalances500-8 72.4k ± 6% 4.5k ± 0% -93.76% (p=0.000 n=20+20) SanitizeBalances1000-8 162k ± 3% 9k ± 0% -94.40% (p=0.000 n=20+20) ``` The CPU profiles show the radical change as per https://github.com/cosmos/cosmos-sdk/issues/7766#issuecomment-786671734 Later on, we shall do more profiling and fixes but for now this brings down the run-time for InitGenesis. Fixes #7766 Co-authored-by: Alessio Treglia <alessio@tendermint.com>
This commit is contained in:
parent
68e7a3adf7
commit
dbb9923917
|
@ -66,6 +66,7 @@ Ref: https://keepachangelog.com/en/1.0.0/
|
|||
* (x/bank) [\#8614](https://github.com/cosmos/cosmos-sdk/issues/8614) Add `Name` and `Symbol` fields to denom metadata
|
||||
* (x/auth) [\#8522](https://github.com/cosmos/cosmos-sdk/pull/8522) Allow to query all stored accounts
|
||||
* (crypto/types) [\#8600](https://github.com/cosmos/cosmos-sdk/pull/8600) `CompactBitArray`: optimize the `NumTrueBitsBefore` method and add an `Equal` method.
|
||||
* (store/cachekv), (x/bank/types) [\#8719](https://github.com/cosmos/cosmos-sdk/pull/8719) algorithmically fix pathologically slow code
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -4,9 +4,11 @@ import (
|
|||
"bytes"
|
||||
"container/list"
|
||||
"io"
|
||||
"reflect"
|
||||
"sort"
|
||||
"sync"
|
||||
"time"
|
||||
"unsafe"
|
||||
|
||||
dbm "github.com/tendermint/tm-db"
|
||||
|
||||
|
@ -177,18 +179,48 @@ func (store *Store) iterator(start, end []byte, ascending bool) types.Iterator {
|
|||
return newCacheMergeIterator(parent, cache, ascending)
|
||||
}
|
||||
|
||||
// strToByte is meant to make a zero allocation conversion
|
||||
// from string -> []byte to speed up operations, it is not meant
|
||||
// to be used generally, but for a specific pattern to check for available
|
||||
// keys within a domain.
|
||||
func strToByte(s string) []byte {
|
||||
var b []byte
|
||||
hdr := (*reflect.SliceHeader)(unsafe.Pointer(&b))
|
||||
hdr.Cap = len(s)
|
||||
hdr.Len = len(s)
|
||||
hdr.Data = (*reflect.StringHeader)(unsafe.Pointer(&s)).Data
|
||||
return b
|
||||
}
|
||||
|
||||
// byteSliceToStr is meant to make a zero allocation conversion
|
||||
// from []byte -> string to speed up operations, it is not meant
|
||||
// to be used generally, but for a specific pattern to delete keys
|
||||
// from a map.
|
||||
func byteSliceToStr(b []byte) string {
|
||||
hdr := (*reflect.StringHeader)(unsafe.Pointer(&b))
|
||||
return *(*string)(unsafe.Pointer(hdr))
|
||||
}
|
||||
|
||||
// Constructs a slice of dirty items, to use w/ memIterator.
|
||||
func (store *Store) dirtyItems(start, end []byte) {
|
||||
unsorted := make([]*kv.Pair, 0)
|
||||
|
||||
n := len(store.unsortedCache)
|
||||
for key := range store.unsortedCache {
|
||||
cacheValue := store.cache[key]
|
||||
|
||||
if dbm.IsKeyInDomain([]byte(key), start, end) {
|
||||
if dbm.IsKeyInDomain(strToByte(key), start, end) {
|
||||
cacheValue := store.cache[key]
|
||||
unsorted = append(unsorted, &kv.Pair{Key: []byte(key), Value: cacheValue.value})
|
||||
}
|
||||
}
|
||||
|
||||
if len(unsorted) == n { // This pattern allows the Go compiler to emit the map clearing idiom for the entire map.
|
||||
for key := range store.unsortedCache {
|
||||
delete(store.unsortedCache, key)
|
||||
}
|
||||
} else { // Otherwise, normally delete the unsorted keys from the map.
|
||||
for _, kv := range unsorted {
|
||||
delete(store.unsortedCache, byteSliceToStr(kv.Key))
|
||||
}
|
||||
}
|
||||
|
||||
sort.Slice(unsorted, func(i, j int) bool {
|
||||
|
|
|
@ -62,16 +62,47 @@ func (b Balance) Validate() error {
|
|||
|
||||
// SanitizeGenesisBalances sorts addresses and coin sets.
|
||||
func SanitizeGenesisBalances(balances []Balance) []Balance {
|
||||
sort.Slice(balances, func(i, j int) bool {
|
||||
addr1, _ := sdk.AccAddressFromBech32(balances[i].Address)
|
||||
addr2, _ := sdk.AccAddressFromBech32(balances[j].Address)
|
||||
return bytes.Compare(addr1.Bytes(), addr2.Bytes()) < 0
|
||||
})
|
||||
// Given that this function sorts balances, using the standard library's
|
||||
// Quicksort based algorithms, we have algorithmic complexities of:
|
||||
// * Best case: O(nlogn)
|
||||
// * Worst case: O(n^2)
|
||||
// The comparator used MUST be cheap to use lest we incur expenses like we had
|
||||
// before whereby sdk.AccAddressFromBech32, which is a very expensive operation
|
||||
// compared n * n elements yet discarded computations each time, as per:
|
||||
// https://github.com/cosmos/cosmos-sdk/issues/7766#issuecomment-786671734
|
||||
// with this change the first step is to extract out and singly produce the values
|
||||
// that'll be used for comparisons and keep them cheap and fast.
|
||||
|
||||
// 1. Retrieve the byte equivalents for each Balance's address and maintain a mapping of
|
||||
// its Balance, as the mapper will be used in sorting.
|
||||
type addrToBalance struct {
|
||||
// We use a pointer here to avoid averse effects of value copying
|
||||
// wasting RAM all around.
|
||||
balance *Balance
|
||||
accAddr []byte
|
||||
}
|
||||
adL := make([]*addrToBalance, 0, len(balances))
|
||||
for _, balance := range balances {
|
||||
balance.Coins = balance.Coins.Sort()
|
||||
balance := balance
|
||||
addr, _ := sdk.AccAddressFromBech32(balance.Address)
|
||||
adL = append(adL, &addrToBalance{
|
||||
balance: &balance,
|
||||
accAddr: []byte(addr),
|
||||
})
|
||||
}
|
||||
|
||||
// 2. Sort with the cheap mapping, using the mapper's
|
||||
// already accAddr bytes values which is a cheaper operation.
|
||||
sort.Slice(adL, func(i, j int) bool {
|
||||
ai, aj := adL[i], adL[j]
|
||||
return bytes.Compare(ai.accAddr, aj.accAddr) < 0
|
||||
})
|
||||
|
||||
// 3. To complete the sorting, we have to now just insert
|
||||
// back the balances in the order returned by the sort.
|
||||
for i, ad := range adL {
|
||||
balances[i] = *ad.balance
|
||||
}
|
||||
return balances
|
||||
}
|
||||
|
||||
|
|
|
@ -1,10 +1,12 @@
|
|||
package types_test
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/cosmos/cosmos-sdk/crypto/keys/ed25519"
|
||||
sdk "github.com/cosmos/cosmos-sdk/types"
|
||||
bank "github.com/cosmos/cosmos-sdk/x/bank/types"
|
||||
)
|
||||
|
@ -101,3 +103,79 @@ func TestBalance_GetAddress(t *testing.T) {
|
|||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestSanitizeBalances(t *testing.T) {
|
||||
// 1. Generate balances
|
||||
tokens := sdk.TokensFromConsensusPower(81)
|
||||
coin := sdk.NewCoin("benchcoin", tokens)
|
||||
coins := sdk.Coins{coin}
|
||||
addrs, _ := makeRandomAddressesAndPublicKeys(20)
|
||||
|
||||
var balances []bank.Balance
|
||||
for _, addr := range addrs {
|
||||
balances = append(balances, bank.Balance{
|
||||
Address: addr.String(),
|
||||
Coins: coins,
|
||||
})
|
||||
}
|
||||
// 2. Sort the values.
|
||||
sorted := bank.SanitizeGenesisBalances(balances)
|
||||
|
||||
// 3. Compare and ensure that all the values are sorted in ascending order.
|
||||
// Invariant after sorting:
|
||||
// a[i] <= a[i+1...n]
|
||||
for i := 0; i < len(sorted); i++ {
|
||||
ai := sorted[i]
|
||||
// Ensure that every single value that comes after i is less than it.
|
||||
for j := i + 1; j < len(sorted); j++ {
|
||||
aj := sorted[j]
|
||||
|
||||
if got := bytes.Compare(ai.GetAddress(), aj.GetAddress()); got > 0 {
|
||||
t.Errorf("Balance(%d) > Balance(%d)", i, j)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func makeRandomAddressesAndPublicKeys(n int) (accL []sdk.AccAddress, pkL []*ed25519.PubKey) {
|
||||
for i := 0; i < n; i++ {
|
||||
pk := ed25519.GenPrivKey().PubKey().(*ed25519.PubKey)
|
||||
pkL = append(pkL, pk)
|
||||
accL = append(accL, sdk.AccAddress(pk.Address()))
|
||||
}
|
||||
return accL, pkL
|
||||
}
|
||||
|
||||
var sink, revert interface{}
|
||||
|
||||
func BenchmarkSanitizeBalances500(b *testing.B) {
|
||||
benchmarkSanitizeBalances(b, 500)
|
||||
}
|
||||
|
||||
func BenchmarkSanitizeBalances1000(b *testing.B) {
|
||||
benchmarkSanitizeBalances(b, 1000)
|
||||
}
|
||||
|
||||
func benchmarkSanitizeBalances(b *testing.B, nAddresses int) {
|
||||
b.ReportAllocs()
|
||||
tokens := sdk.TokensFromConsensusPower(81)
|
||||
coin := sdk.NewCoin("benchcoin", tokens)
|
||||
coins := sdk.Coins{coin}
|
||||
addrs, _ := makeRandomAddressesAndPublicKeys(nAddresses)
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
var balances []bank.Balance
|
||||
for _, addr := range addrs {
|
||||
balances = append(balances, bank.Balance{
|
||||
Address: addr.String(),
|
||||
Coins: coins,
|
||||
})
|
||||
}
|
||||
sink = bank.SanitizeGenesisBalances(balances)
|
||||
}
|
||||
if sink == nil {
|
||||
b.Fatal("Benchmark did not run")
|
||||
}
|
||||
sink = revert
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue