251 lines
5.9 KiB
Go
251 lines
5.9 KiB
Go
package compactindex
|
|
|
|
import (
|
|
"context"
|
|
"encoding/binary"
|
|
"errors"
|
|
"io"
|
|
"io/fs"
|
|
"math"
|
|
"math/rand"
|
|
"os"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
"github.com/vbauerster/mpb/v8/decor"
|
|
)
|
|
|
|
func TestBuilder(t *testing.T) {
|
|
const numBuckets = 3
|
|
const maxValue = math.MaxUint64
|
|
|
|
// Create a table with 3 buckets.
|
|
builder, err := NewBuilder("", numBuckets*targetEntriesPerBucket, maxValue)
|
|
require.NoError(t, err)
|
|
require.NotNil(t, builder)
|
|
assert.Len(t, builder.buckets, 3)
|
|
defer builder.Close()
|
|
|
|
// Insert a few entries.
|
|
require.NoError(t, builder.Insert([]byte("hello"), 1))
|
|
require.NoError(t, builder.Insert([]byte("world"), 2))
|
|
require.NoError(t, builder.Insert([]byte("blub"), 3))
|
|
|
|
// Create index file.
|
|
targetFile, err := os.CreateTemp("", "compactindex-final-")
|
|
require.NoError(t, err)
|
|
defer os.Remove(targetFile.Name())
|
|
defer targetFile.Close()
|
|
|
|
// Seal index.
|
|
require.NoError(t, builder.Seal(context.TODO(), targetFile))
|
|
|
|
// Assert binary content.
|
|
buf, err := os.ReadFile(targetFile.Name())
|
|
require.NoError(t, err)
|
|
assert.Equal(t, []byte{
|
|
// --- File header
|
|
// magic
|
|
0x72, 0x64, 0x63, 0x65, 0x63, 0x69, 0x64, 0x78,
|
|
// max file size
|
|
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
|
// num buckets
|
|
0x03, 0x00, 0x00, 0x00,
|
|
// padding
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
|
|
// --- Bucket header 0
|
|
// hash domain
|
|
0x00, 0x00, 0x00, 0x00,
|
|
// num entries
|
|
0x01, 0x00, 0x00, 0x00,
|
|
// hash len
|
|
0x03,
|
|
// padding
|
|
0x00,
|
|
// file offset
|
|
0x50, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
|
|
// --- Bucket header 1
|
|
// hash domain
|
|
0x00, 0x00, 0x00, 0x00,
|
|
// num entries
|
|
0x00, 0x00, 0x00, 0x00,
|
|
// hash len
|
|
0x03,
|
|
// padding
|
|
0x00,
|
|
// file offset
|
|
0x5b, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
|
|
// --- Bucket header 2
|
|
// hash domain
|
|
0x00, 0x00, 0x00, 0x00,
|
|
// num entries
|
|
0x02, 0x00, 0x00, 0x00,
|
|
// hash len
|
|
0x03,
|
|
// padding
|
|
0x00,
|
|
// file offset
|
|
0x5b, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
|
|
// --- Bucket 0
|
|
// hash
|
|
0xe2, 0xdb, 0x55,
|
|
// value
|
|
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
|
|
// --- Bucket 2
|
|
// hash
|
|
0xe3, 0x09, 0x6b,
|
|
// value
|
|
0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
// hash
|
|
0x92, 0xcd, 0xbb,
|
|
// value
|
|
0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
}, buf)
|
|
|
|
// Reset file offset.
|
|
_, seekErr := targetFile.Seek(0, io.SeekStart)
|
|
require.NoError(t, seekErr)
|
|
|
|
// Open index.
|
|
db, err := Open(targetFile)
|
|
require.NoError(t, err, "Failed to open generated index")
|
|
require.NotNil(t, db)
|
|
|
|
// File header assertions.
|
|
assert.Equal(t, Header{
|
|
FileSize: maxValue,
|
|
NumBuckets: numBuckets,
|
|
}, db.Header)
|
|
|
|
// Get bucket handles.
|
|
buckets := make([]*Bucket, numBuckets)
|
|
for i := range buckets {
|
|
buckets[i], err = db.GetBucket(uint(i))
|
|
require.NoError(t, err)
|
|
}
|
|
|
|
// Ensure out-of-bounds bucket accesses fail.
|
|
_, wantErr := db.GetBucket(numBuckets)
|
|
assert.EqualError(t, wantErr, "out of bounds bucket index: 3 >= 3")
|
|
|
|
// Bucket header assertions.
|
|
assert.Equal(t, BucketDescriptor{
|
|
BucketHeader: BucketHeader{
|
|
HashDomain: 0x00,
|
|
NumEntries: 1,
|
|
HashLen: 3,
|
|
FileOffset: 0x50,
|
|
},
|
|
Stride: 11, // 3 + 8
|
|
OffsetWidth: 8,
|
|
}, buckets[0].BucketDescriptor)
|
|
assert.Equal(t, BucketHeader{
|
|
HashDomain: 0x00,
|
|
NumEntries: 0,
|
|
HashLen: 3,
|
|
FileOffset: 0x5b,
|
|
}, buckets[1].BucketHeader)
|
|
assert.Equal(t, BucketHeader{
|
|
HashDomain: 0x00,
|
|
NumEntries: 2,
|
|
HashLen: 3,
|
|
FileOffset: 0x5b,
|
|
}, buckets[2].BucketHeader)
|
|
|
|
// Test lookups.
|
|
entries, err := buckets[2].Load( /*batchSize*/ 4)
|
|
require.NoError(t, err)
|
|
assert.Equal(t, []Entry{
|
|
{
|
|
Hash: 0x6b09e3,
|
|
Value: 3,
|
|
},
|
|
{
|
|
Hash: 0xbbcd92,
|
|
Value: 2,
|
|
},
|
|
}, entries)
|
|
}
|
|
|
|
func TestBuilder_Random(t *testing.T) {
|
|
if testing.Short() {
|
|
t.Skip("Skipping long test")
|
|
}
|
|
|
|
const numKeys = uint(500000)
|
|
const keySize = uint(16)
|
|
const maxOffset = uint64(1000000)
|
|
const queries = int(10000)
|
|
|
|
// Create new builder session.
|
|
builder, err := NewBuilder("", numKeys, maxOffset)
|
|
require.NoError(t, err)
|
|
require.NotNil(t, builder)
|
|
require.NotEmpty(t, builder.buckets)
|
|
|
|
// Ensure we cleaned up after ourselves.
|
|
defer func() {
|
|
_, statErr := os.Stat(builder.dir)
|
|
assert.Truef(t, errors.Is(statErr, fs.ErrNotExist), "Delete failed: %v", statErr)
|
|
}()
|
|
defer builder.Close()
|
|
|
|
// Insert items to temp buckets.
|
|
preInsert := time.Now()
|
|
key := make([]byte, keySize)
|
|
for i := uint(0); i < numKeys; i++ {
|
|
binary.LittleEndian.PutUint64(key, uint64(i))
|
|
err := builder.Insert(key, uint64(rand.Int63n(int64(maxOffset))))
|
|
require.NoError(t, err)
|
|
}
|
|
t.Logf("Inserted %d keys in %s", numKeys, time.Since(preInsert))
|
|
|
|
// Create file for final index.
|
|
targetFile, err := os.CreateTemp("", "compactindex-final-")
|
|
require.NoError(t, err)
|
|
defer os.Remove(targetFile.Name())
|
|
defer targetFile.Close()
|
|
|
|
// Seal to final index.
|
|
preSeal := time.Now()
|
|
sealErr := builder.Seal(context.TODO(), targetFile)
|
|
require.NoError(t, sealErr, "Seal failed")
|
|
t.Logf("Sealed in %s", time.Since(preSeal))
|
|
|
|
// Print some stats.
|
|
targetStat, err := targetFile.Stat()
|
|
require.NoError(t, err)
|
|
t.Logf("Index size: %d (% .2f)", targetStat.Size(), decor.SizeB1000(targetStat.Size()))
|
|
t.Logf("Bytes per entry: %f", float64(targetStat.Size())/float64(numKeys))
|
|
t.Logf("Indexing speed: %f/s", float64(numKeys)/time.Since(preInsert).Seconds())
|
|
|
|
// Open index.
|
|
_, seekErr := targetFile.Seek(0, io.SeekStart)
|
|
require.NoError(t, seekErr)
|
|
db, err := Open(targetFile)
|
|
require.NoError(t, err, "Failed to open generated index")
|
|
|
|
// Run query benchmark.
|
|
preQuery := time.Now()
|
|
for i := queries; i != 0; i-- {
|
|
keyN := uint64(rand.Int63n(int64(numKeys)))
|
|
binary.LittleEndian.PutUint64(key, keyN)
|
|
|
|
bucket, err := db.LookupBucket(key)
|
|
require.NoError(t, err)
|
|
|
|
value, err := bucket.Lookup(key)
|
|
require.NoError(t, err)
|
|
require.True(t, value > 0)
|
|
}
|
|
t.Logf("Queried %d items", queries)
|
|
t.Logf("Query speed: %f/s", float64(queries)/time.Since(preQuery).Seconds())
|
|
}
|