radiance/pkg/compactindex/build_test.go

251 lines
5.9 KiB
Go

package compactindex
import (
"context"
"encoding/binary"
"errors"
"io"
"io/fs"
"math"
"math/rand"
"os"
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/vbauerster/mpb/v8/decor"
)
func TestBuilder(t *testing.T) {
const numBuckets = 3
const maxValue = math.MaxUint64
// Create a table with 3 buckets.
builder, err := NewBuilder("", numBuckets*targetEntriesPerBucket, maxValue)
require.NoError(t, err)
require.NotNil(t, builder)
assert.Len(t, builder.buckets, 3)
defer builder.Close()
// Insert a few entries.
require.NoError(t, builder.Insert([]byte("hello"), 1))
require.NoError(t, builder.Insert([]byte("world"), 2))
require.NoError(t, builder.Insert([]byte("blub"), 3))
// Create index file.
targetFile, err := os.CreateTemp("", "compactindex-final-")
require.NoError(t, err)
defer os.Remove(targetFile.Name())
defer targetFile.Close()
// Seal index.
require.NoError(t, builder.Seal(context.TODO(), targetFile))
// Assert binary content.
buf, err := os.ReadFile(targetFile.Name())
require.NoError(t, err)
assert.Equal(t, []byte{
// --- File header
// magic
0x72, 0x64, 0x63, 0x65, 0x63, 0x69, 0x64, 0x78,
// max file size
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
// num buckets
0x03, 0x00, 0x00, 0x00,
// padding
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// --- Bucket header 0
// hash domain
0x00, 0x00, 0x00, 0x00,
// num entries
0x01, 0x00, 0x00, 0x00,
// hash len
0x03,
// padding
0x00,
// file offset
0x50, 0x00, 0x00, 0x00, 0x00, 0x00,
// --- Bucket header 1
// hash domain
0x00, 0x00, 0x00, 0x00,
// num entries
0x00, 0x00, 0x00, 0x00,
// hash len
0x03,
// padding
0x00,
// file offset
0x5b, 0x00, 0x00, 0x00, 0x00, 0x00,
// --- Bucket header 2
// hash domain
0x00, 0x00, 0x00, 0x00,
// num entries
0x02, 0x00, 0x00, 0x00,
// hash len
0x03,
// padding
0x00,
// file offset
0x5b, 0x00, 0x00, 0x00, 0x00, 0x00,
// --- Bucket 0
// hash
0xe2, 0xdb, 0x55,
// value
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// --- Bucket 2
// hash
0xe3, 0x09, 0x6b,
// value
0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// hash
0x92, 0xcd, 0xbb,
// value
0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
}, buf)
// Reset file offset.
_, seekErr := targetFile.Seek(0, io.SeekStart)
require.NoError(t, seekErr)
// Open index.
db, err := Open(targetFile)
require.NoError(t, err, "Failed to open generated index")
require.NotNil(t, db)
// File header assertions.
assert.Equal(t, Header{
FileSize: maxValue,
NumBuckets: numBuckets,
}, db.Header)
// Get bucket handles.
buckets := make([]*Bucket, numBuckets)
for i := range buckets {
buckets[i], err = db.GetBucket(uint(i))
require.NoError(t, err)
}
// Ensure out-of-bounds bucket accesses fail.
_, wantErr := db.GetBucket(numBuckets)
assert.EqualError(t, wantErr, "out of bounds bucket index: 3 >= 3")
// Bucket header assertions.
assert.Equal(t, BucketDescriptor{
BucketHeader: BucketHeader{
HashDomain: 0x00,
NumEntries: 1,
HashLen: 3,
FileOffset: 0x50,
},
Stride: 11, // 3 + 8
OffsetWidth: 8,
}, buckets[0].BucketDescriptor)
assert.Equal(t, BucketHeader{
HashDomain: 0x00,
NumEntries: 0,
HashLen: 3,
FileOffset: 0x5b,
}, buckets[1].BucketHeader)
assert.Equal(t, BucketHeader{
HashDomain: 0x00,
NumEntries: 2,
HashLen: 3,
FileOffset: 0x5b,
}, buckets[2].BucketHeader)
// Test lookups.
entries, err := buckets[2].Load( /*batchSize*/ 4)
require.NoError(t, err)
assert.Equal(t, []Entry{
{
Hash: 0x6b09e3,
Value: 3,
},
{
Hash: 0xbbcd92,
Value: 2,
},
}, entries)
}
func TestBuilder_Random(t *testing.T) {
if testing.Short() {
t.Skip("Skipping long test")
}
const numKeys = uint(500000)
const keySize = uint(16)
const maxOffset = uint64(1000000)
const queries = int(10000)
// Create new builder session.
builder, err := NewBuilder("", numKeys, maxOffset)
require.NoError(t, err)
require.NotNil(t, builder)
require.NotEmpty(t, builder.buckets)
// Ensure we cleaned up after ourselves.
defer func() {
_, statErr := os.Stat(builder.dir)
assert.Truef(t, errors.Is(statErr, fs.ErrNotExist), "Delete failed: %v", statErr)
}()
defer builder.Close()
// Insert items to temp buckets.
preInsert := time.Now()
key := make([]byte, keySize)
for i := uint(0); i < numKeys; i++ {
binary.LittleEndian.PutUint64(key, uint64(i))
err := builder.Insert(key, uint64(rand.Int63n(int64(maxOffset))))
require.NoError(t, err)
}
t.Logf("Inserted %d keys in %s", numKeys, time.Since(preInsert))
// Create file for final index.
targetFile, err := os.CreateTemp("", "compactindex-final-")
require.NoError(t, err)
defer os.Remove(targetFile.Name())
defer targetFile.Close()
// Seal to final index.
preSeal := time.Now()
sealErr := builder.Seal(context.TODO(), targetFile)
require.NoError(t, sealErr, "Seal failed")
t.Logf("Sealed in %s", time.Since(preSeal))
// Print some stats.
targetStat, err := targetFile.Stat()
require.NoError(t, err)
t.Logf("Index size: %d (% .2f)", targetStat.Size(), decor.SizeB1000(targetStat.Size()))
t.Logf("Bytes per entry: %f", float64(targetStat.Size())/float64(numKeys))
t.Logf("Indexing speed: %f/s", float64(numKeys)/time.Since(preInsert).Seconds())
// Open index.
_, seekErr := targetFile.Seek(0, io.SeekStart)
require.NoError(t, seekErr)
db, err := Open(targetFile)
require.NoError(t, err, "Failed to open generated index")
// Run query benchmark.
preQuery := time.Now()
for i := queries; i != 0; i-- {
keyN := uint64(rand.Int63n(int64(numKeys)))
binary.LittleEndian.PutUint64(key, keyN)
bucket, err := db.LookupBucket(key)
require.NoError(t, err)
value, err := bucket.Lookup(key)
require.NoError(t, err)
require.True(t, value > 0)
}
t.Logf("Queried %d items", queries)
t.Logf("Query speed: %f/s", float64(queries)/time.Since(preQuery).Seconds())
}