cosmos-sdk/store/cachekv/search_test.go

package cachekv

import "testing"

func TestFindStartIndex(t *testing.T) {
	tests := []struct {
		name    string
		sortedL []string
		query   string
		want    int
	}{
		{
			name:    "non-existent value",
			sortedL: []string{"a", "b", "c", "d", "e", "l", "m", "n", "u", "v", "w", "x", "y", "z"},
			query:   "o",
			want:    8,
		},
		{
			name:    "dupes start at index 0",
			sortedL: []string{"a", "a", "a", "b", "c", "d", "e", "l", "m", "n", "u", "v", "w", "x", "y", "z"},
			query:   "a",
			want:    0,
		},
		{
			name:    "dupes start at non-index 0",
			sortedL: []string{"a", "c", "c", "c", "c", "d", "e", "l", "m", "n", "u", "v", "w", "x", "y", "z"},
			query:   "c",
			want:    1,
		},
		{
			name:    "at end",
			sortedL: []string{"a", "e", "u", "v", "w", "x", "y", "z"},
			query:   "z",
			want:    7,
		},
		{
			name:    "dupes at end",
			sortedL: []string{"a", "e", "u", "v", "w", "x", "y", "z", "z", "z", "z"},
			query:   "z",
			want:    7,
		},
		{
			name:    "entirely dupes",
			sortedL: []string{"z", "z", "z", "z", "z"},
			query:   "z",
			want:    0,
		},
		{
			name:    "non-existent but within >=start",
			sortedL: []string{"z", "z", "z", "z", "z"},
			query:   "p",
			want:    0,
		},
		{
			name:    "non-existent and out of range",
			sortedL: []string{"d", "e", "f", "g", "h"},
			query:   "z",
			want:    -1,
		},
	}

	for _, tt := range tests {
		tt := tt
		t.Run(tt.name, func(t *testing.T) {
			body := tt.sortedL
			got := findStartIndex(body, tt.query)
			if got != tt.want {
				t.Fatalf("Got: %d, want: %d", got, tt.want)
			}
		})
	}
}

func TestFindEndIndex(t *testing.T) {
	tests := []struct {
		name    string
		sortedL []string
		query   string
		want    int
	}{
		{
			name:    "non-existent value",
			sortedL: []string{"a", "b", "c", "d", "e", "l", "m", "n", "u", "v", "w", "x", "y", "z"},
			query:   "o",
			want:    7,
		},
		{
			name:    "dupes start at index 0",
			sortedL: []string{"a", "a", "a", "b", "c", "d", "e", "l", "m", "n", "u", "v", "w", "x", "y", "z"},
			query:   "a",
			want:    0,
		},
		{
			name:    "dupes start at non-index 0",
			sortedL: []string{"a", "c", "c", "c", "c", "d", "e", "l", "m", "n", "u", "v", "w", "x", "y", "z"},
			query:   "c",
			want:    1,
		},
		{
			name:    "at end",
			sortedL: []string{"a", "e", "u", "v", "w", "x", "y", "z"},
			query:   "z",
			want:    7,
		},
		{
			name:    "dupes at end",
			sortedL: []string{"a", "e", "u", "v", "w", "x", "y", "z", "z", "z", "z"},
			query:   "z",
			want:    7,
		},
		{
			name:    "entirely dupes",
			sortedL: []string{"z", "z", "z", "z", "z"},
			query:   "z",
			want:    0,
		},
		{
			name:    "non-existent and out of range",
			sortedL: []string{"z", "z", "z", "z", "z"},
			query:   "p",
			want:    -1,
		},
		{
			name:    "non-existent and out of range",
			sortedL: []string{"d", "e", "f", "g", "h"},
			query:   "z",
			want:    4,
		},
	}

	for _, tt := range tests {
		tt := tt
		t.Run(tt.name, func(t *testing.T) {
			body := tt.sortedL
			got := findEndIndex(body, tt.query)
			if got != tt.want {
				t.Fatalf("Got: %d, want: %d", got, tt.want)
			}
		})
	}
}
fix!: store/cachekv: reduce growth factor for iterator ranging using binary searches (#10024) This change takes the observation that previous dbm.IsKeyInDomain which searches for [start, end) was performing too many byteslice comparisons. Instead we start off by sorting all the values in the store.unsortedCache, and then apply a modified binary search to look for values that fall within the domain [start, end) The procedure involves: * iterating over all items to build a list of all keys -- O(n) * invoking sort.Strings immediately, of which we anyways eventually invoke sort.Slice(unsorted, ...) which uses Quicksort -- O(nlog(n)) or O(n^2) worst case * invoking modified binary search which is O(log(n)) * 2 ~ O(log(n)) to search for the [start, end) range indices for a total approximate complexity of: Best case: O(n) + O(n(log(n))) + O(log(n)) ~= O(nlog(n)) Worst case: O(n) + O(n^2) + O(log(n)) ~= O(n^2) instead of previously: * iterating over all the unsorted items and invoking dbm.IsKeyInDomain: bytes.Compare ~ O(n) + O(nse) where s -- len(start), e -- len(end) for overall complexity of O(nse) * invoking sort.Slice(unsorted, ...) which uses Quicksort -- O(nlog(n)) or O(n^2) worst case for a total approximate complexity of: Best case: O(n) + O(nse) + O(nlog(n)) ~= O(nse) ~ O(n^2) Worst case: O(n) + O(nse) + O(n^2) ~= O(nse) ~ O(n^2) Ordinarily we'd combine the nse to be nm, but really the comparisons between (start & key, end & key) are profound that it makes sense to keep them as factors. The overall benchmark results vindicate our choice of isolating the factors (nse) The benchmarks show that as the number of keys to iterate grows, the new code grows gracefully in a somewhat linear growth, notice for CAcheKVStoreIterator, when we go from: * 1,000 to 10,000 keys: 120us->1,600us (13X) old vs 95us->900us (9.47X) new * 50,000 to 100,000 keys: 19ms->100ms (5.3X) old vs 5.5ms->17ms (3X) new ```shell time/op GetValidator-8 5.8ms ± 2% 4.7ms ± 1% -17.69% (p=0.000 n=10+10) OneBankSendTxPerBlock-8 3.2ms ± 2% 2.8ms ± 1% -10.80% (p=0.000 n=7+10) OneBankMultiSendTxPerBlock-8 3.1ms ± 3% 2.9ms ± 2% -8.36% (p=0.000 n=10+10) AccountMapperSetAccount-8 8.6µs ± 1% 7.8µs ± 1% -9.74% (p=0.000 n=10+10) CacheKVStoreIterator500-8 64µs ± 6% 51µs ± 6% -19.22% (p=0.000 n=10+9) CacheKVStoreIterator1000-8 0.12ms ± 4% 95µs ± 4% -19.55% (p=0.000 n=10+10) CacheKVStoreIterator10000-8 1.6ms ± 4% 0.90ms ± 1% -42.11% (p=0.000 n=10+10) CacheKVStoreIterator50000-8 19ms ± 5% 5.5ms ± 1% -71.35% (p=0.000 n=10+10) CacheKVStoreIterator100000-8 0.10s ± 23% 17ms ± 7% -83.44% (p=0.000 n=10+10) CacheKVStoreGetNoKeyFound-8 1.3µs ± 6% 0.90µs ± 3% -31.19% (p=0.000 n=9+9) CacheKVStoreGetKeyFound-8 0.66µs ± 6% 0.56µs ± 2% -14.81% (p=0.000 n=10+9) alloc/op B/op BlockProvision-8 0.11kB ± 0% 0.10kB ± 0% -7.14% (p=0.000 n=10+10) CacheKVStoreIterator50000-8 0.89MB ± 6% 0.53MB ± 1% -40.85% (p=0.000 n=10+10) CacheKVStoreIterator100000-8 6.3MB ± 23% 1.6MB ± 6% -74.17% (p=0.000 n=10+10) CacheKVStoreGetNoKeyFound-8 0.26kB ± 0% 0.23kB ± 1% -11.53% (p=0.000 n=10+8) allocs/op (count) AccountMapperSetAccount-8 42 ± 0% 38 ± 0% -9.52% (p=0.000 n=10+10) BlockProvision-8 6.0 ± 0% 5.0 ± 0% -16.67% (p=0.000 n=10+10) CacheKVStoreIterator1000-8 14 ± 0% 13 ± 0% -7.14% (p=0.002 n=8+10) CacheKVStoreIterator10000-8 0.15k ± 2% 76 ± 1% -49.00% (p=0.000 n=7+10) CacheKVStoreIterator50000-8 8.9k ± 11% 2.0k ± 2% -77.60% (p=0.000 n=10+10) CacheKVStoreIterator100000-8 0.10M ± 26% 13k ± 12% -86.89% (p=0.000 n=10+10) CacheKVStoreGetNoKeyFound-8 5.0 ± 0% 4.0 ± 0% -20.00% (p=0.000 n=10+10) ``` Note: Purposefully using a commit off master that doesn't include the buggy code that caused x/bank.BenchmarkOneBank* to fail per issue https://github.com/cosmos/cosmos-sdk/issues/10023 Updates #9876 /cc @cuonglm @kirbyquerby <!-- The default pull request template is for types feat, fix, or refactor. For other templates, add one of the following parameters to the url: - template=docs.md - template=other.md --> ## Description Closes: #XXXX <!-- Add a description of the changes that this PR introduces and the files that are the most critical to review. --> --- ### Author Checklist All items are required. Please add a note to the item if the item is not applicable and please add links to any relevant follow up issues. I have... - [x] included the correct [type prefix](https://github.com/commitizen/conventional-commit-types/blob/v3.0.0/index.json) in the PR title - [x] added `!` to the type prefix if API or client breaking change - [x] targeted the correct branch (see [PR Targeting](https://github.com/cosmos/cosmos-sdk/blob/master/CONTRIBUTING.md#pr-targeting)) - [x] provided a link to the relevant issue or specification - [x] followed the guidelines for [building modules](https://github.com/cosmos/cosmos-sdk/blob/master/docs/building-modules) - [x] included the necessary unit and integration [tests](https://github.com/cosmos/cosmos-sdk/blob/master/CONTRIBUTING.md#testing) - [ ] added a changelog entry to `CHANGELOG.md` - [x] included comments for [documenting Go code](https://blog.golang.org/godoc) - [ ] updated the relevant documentation or specification - [ ] reviewed "Files changed" and left comments if necessary - [ ] confirmed all CI checks have passed ### Reviewers Checklist All items are required. Please add a note if the item is not applicable and please add your handle next to the items reviewed if you only reviewed selected items. I have... - [ ] confirmed the correct [type prefix](https://github.com/commitizen/conventional-commit-types/blob/v3.0.0/index.json) in the PR title - [ ] confirmed `!` in the type prefix if API or client breaking change - [ ] confirmed all author checklist items have been addressed - [ ] reviewed state machine logic - [ ] reviewed API design and naming - [ ] reviewed documentation is accurate - [ ] reviewed tests and test coverage - [ ] manually tested (if applicable) 2021-10-14 14:58:25 -07:00			`package cachekv`

			`import "testing"`

			`func TestFindStartIndex(t *testing.T) {`
			`tests := []struct {`
			`name string`
			`sortedL []string`
			`query string`
			`want int`
			`}{`
			`{`
			`name: "non-existent value",`
			`sortedL: []string{"a", "b", "c", "d", "e", "l", "m", "n", "u", "v", "w", "x", "y", "z"},`
			`query: "o",`
			`want: 8,`
			`},`
			`{`
			`name: "dupes start at index 0",`
			`sortedL: []string{"a", "a", "a", "b", "c", "d", "e", "l", "m", "n", "u", "v", "w", "x", "y", "z"},`
			`query: "a",`
			`want: 0,`
			`},`
			`{`
			`name: "dupes start at non-index 0",`
			`sortedL: []string{"a", "c", "c", "c", "c", "d", "e", "l", "m", "n", "u", "v", "w", "x", "y", "z"},`
			`query: "c",`
			`want: 1,`
			`},`
			`{`
			`name: "at end",`
			`sortedL: []string{"a", "e", "u", "v", "w", "x", "y", "z"},`
			`query: "z",`
			`want: 7,`
			`},`
			`{`
			`name: "dupes at end",`
			`sortedL: []string{"a", "e", "u", "v", "w", "x", "y", "z", "z", "z", "z"},`
			`query: "z",`
			`want: 7,`
			`},`
			`{`
			`name: "entirely dupes",`
			`sortedL: []string{"z", "z", "z", "z", "z"},`
			`query: "z",`
			`want: 0,`
			`},`
			`{`
			`name: "non-existent but within >=start",`
			`sortedL: []string{"z", "z", "z", "z", "z"},`
			`query: "p",`
			`want: 0,`
			`},`
			`{`
			`name: "non-existent and out of range",`
			`sortedL: []string{"d", "e", "f", "g", "h"},`
			`query: "z",`
			`want: -1,`
			`},`
			`}`

			`for _, tt := range tests {`
			`tt := tt`
			`t.Run(tt.name, func(t *testing.T) {`
			`body := tt.sortedL`
			`got := findStartIndex(body, tt.query)`
			`if got != tt.want {`
			`t.Fatalf("Got: %d, want: %d", got, tt.want)`
			`}`
			`})`
			`}`
			`}`

			`func TestFindEndIndex(t *testing.T) {`
			`tests := []struct {`
			`name string`
			`sortedL []string`
			`query string`
			`want int`
			`}{`
			`{`
			`name: "non-existent value",`
			`sortedL: []string{"a", "b", "c", "d", "e", "l", "m", "n", "u", "v", "w", "x", "y", "z"},`
			`query: "o",`
			`want: 7,`
			`},`
			`{`
			`name: "dupes start at index 0",`
			`sortedL: []string{"a", "a", "a", "b", "c", "d", "e", "l", "m", "n", "u", "v", "w", "x", "y", "z"},`
			`query: "a",`
			`want: 0,`
			`},`
			`{`
			`name: "dupes start at non-index 0",`
			`sortedL: []string{"a", "c", "c", "c", "c", "d", "e", "l", "m", "n", "u", "v", "w", "x", "y", "z"},`
			`query: "c",`
			`want: 1,`
			`},`
			`{`
			`name: "at end",`
			`sortedL: []string{"a", "e", "u", "v", "w", "x", "y", "z"},`
			`query: "z",`
			`want: 7,`
			`},`
			`{`
			`name: "dupes at end",`
			`sortedL: []string{"a", "e", "u", "v", "w", "x", "y", "z", "z", "z", "z"},`
			`query: "z",`
			`want: 7,`
			`},`
			`{`
			`name: "entirely dupes",`
			`sortedL: []string{"z", "z", "z", "z", "z"},`
			`query: "z",`
			`want: 0,`
			`},`
			`{`
			`name: "non-existent and out of range",`
			`sortedL: []string{"z", "z", "z", "z", "z"},`
			`query: "p",`
			`want: -1,`
			`},`
			`{`
			`name: "non-existent and out of range",`
			`sortedL: []string{"d", "e", "f", "g", "h"},`
			`query: "z",`
			`want: 4,`
			`},`
			`}`

			`for _, tt := range tests {`
			`tt := tt`
			`t.Run(tt.name, func(t *testing.T) {`
			`body := tt.sortedL`
			`got := findEndIndex(body, tt.query)`
			`if got != tt.want {`
			`t.Fatalf("Got: %d, want: %d", got, tt.want)`
			`}`
			`})`
			`}`
			`}`