radiance/pkg/compactindex/compactindex.go

262 lines
8.6 KiB
Go

// Package compactindex is an immutable hashtable index format inspired by djb's constant database (cdb).
//
// # Design
//
// Compactindex is used to create secondary indexes over arbitrary flat files.
// Each index is a single, immutable flat file.
//
// Index files consist of a space-optimized and query-optimized key-value-like table.
//
// Instead of storing actual keys, the format stores FKS dynamic perfect hashes.
// And instead of storing values, the format contains offsets into some file.
//
// As a result, the database effectively only supports two operations, similarly to cdb.
// (Note that the actual Go interface is a bit more flexible).
//
// func Create(kv map[[]byte]uint64) *Index
// func (*Index) Lookup(key []byte) (value uint64, exist bool)
//
// # Buckets
//
// The set of items is split into buckets of approx 10000 records.
// The number of buckets is unlimited.
//
// The key-to-bucket assignment is determined by xxHash3 using uniform discrete hashing over the key space.
//
// The index file header also mentions the number of buckets and the file offset of each bucket.
//
// # Tables
//
// Each bucket contains a table of entries, indexed by a collision-free hash function.
//
// The hash function used in the entry table is xxHash.
// A 32-bit hash domain is prefixed to mine collision-free sets of hashes (FKS scheme).
// This hash domain is also recorded at the bucket header.
//
// Each bucket entry is a constant-size record consisting of a 3-byte hash and an offset to the value.
// The size of the offset integer is the minimal byte-aligned integer width that can represent the target file size.
//
// # Querying
//
// The query interface (DB) is backend-agnostic, supporting any storage medium that provides random reads.
// To name a few: Memory buffers, local files, arbitrary embedded buffers, HTTP range requests, plan9, etc...
//
// The DB struct itself performs zero memory allocations and therefore also doesn't cache.
// It is therefore recommended to provide a io.ReaderAt backed by a cache to improve performance.
//
// Given a key, the query strategy is simple:
//
// 1. Hash key to bucket using global hash function
// 2. Retrieve bucket offset from bucket header table
// 3. Hash key to entry using per-bucket hash function
// 4. Search for entry in bucket (binary search)
//
// The search strategy for locating entries in buckets can be adjusted to fit the latency/bandwidth profile of the underlying storage medium.
//
// For example, the fastest lookup strategy in memory is a binary search retrieving double cache lines at a time.
// When doing range requests against high-latency remote storage (e.g. S3 buckets),
// it is typically faster to retrieve and scan through large parts of a bucket (multiple kilobytes) at once.
//
// # Construction
//
// Constructing a compactindex requires upfront knowledge of the number of items and highest possible target offset (read: target file size).
//
// The process requires scratch space of around 16 bytes per entry. During generation, data is offloaded to disk for memory efficiency.
//
// The process works as follows:
//
// 1. Determine number of buckets and offset integer width
// based on known input params (item count and target file size).
// 2. Linear pass over input data, populating temporary files that
// contain the unsorted entries of each bucket.
// 3. For each bucket, brute force a perfect hash function that
// defines a bijection between hash values and keys in the bucket.
// 4. For each bucket, sort by hash values.
// 5. Store to index.
//
// An alternative construction approach is available when the number of items or target file size is unknown.
// In this case, a set of keys is first serialized to a flat file.
package compactindex
import (
"encoding/binary"
"fmt"
"math"
"math/bits"
"sort"
"github.com/cespare/xxhash/v2"
)
// Magic are the first eight bytes of an index.
var Magic = [8]byte{'r', 'd', 'c', 'e', 'c', 'i', 'd', 'x'}
// Header occurs once at the beginning of the index.
type Header struct {
FileSize uint64
NumBuckets uint32
}
// headerSize is the size of the header at the beginning of the file.
const headerSize = 32
// Load checks the Magic sequence and loads the header fields.
func (h *Header) Load(buf *[headerSize]byte) error {
// Use a magic byte sequence to bail fast when user passes a corrupted/unrelated stream.
if *(*[8]byte)(buf[:8]) != Magic {
return fmt.Errorf("not a radiance compactindex file")
}
*h = Header{
FileSize: binary.LittleEndian.Uint64(buf[8:16]),
NumBuckets: binary.LittleEndian.Uint32(buf[16:20]),
}
// 12 bytes to spare for now. Might use it in the future.
// Force to zero for now.
for _, b := range buf[20:32] {
if b != 0x00 {
return fmt.Errorf("unsupported index version")
}
}
return nil
}
func (h *Header) Store(buf *[headerSize]byte) {
copy(buf[0:8], Magic[:])
binary.LittleEndian.PutUint64(buf[8:16], h.FileSize)
binary.LittleEndian.PutUint32(buf[16:20], h.NumBuckets)
for i := 20; i < 32; i++ {
buf[i] = 0
}
}
// BucketHash returns the bucket index for the given key.
//
// Uses a truncated xxHash64 rotated until the result fits.
func (h *Header) BucketHash(key []byte) uint {
xsum := xxhash.Sum64(key)
mask := maxCls64(uint64(h.NumBuckets))
for {
index := xsum & mask
if index < uint64(h.NumBuckets) {
return uint(index)
}
xsum = bits.RotateLeft64(xsum, bits.LeadingZeros64(mask))
}
}
// BucketHeader occurs at the beginning of each bucket.
type BucketHeader struct {
HashDomain uint32
NumEntries uint32
HashLen uint8
FileOffset uint64
}
// bucketHdrLen is the size of the header preceding the hash table entries.
const bucketHdrLen = 16
func (b *BucketHeader) Store(buf *[bucketHdrLen]byte) {
binary.LittleEndian.PutUint32(buf[0:4], b.HashDomain)
binary.LittleEndian.PutUint32(buf[4:8], b.NumEntries)
buf[8] = b.HashLen
buf[9] = 0
putUintLe(buf[10:16], b.FileOffset)
}
func (b *BucketHeader) Load(buf *[bucketHdrLen]byte) {
b.HashDomain = binary.LittleEndian.Uint32(buf[0:4])
b.NumEntries = binary.LittleEndian.Uint32(buf[4:8])
b.HashLen = buf[8]
b.FileOffset = uintLe(buf[10:16])
}
// Hash returns the per-bucket hash of a key.
func (b *BucketHeader) Hash(key []byte) uint64 {
xsum := EntryHash64(b.HashDomain, key)
// Mask sum by hash length.
return xsum & (math.MaxUint64 >> (64 - b.HashLen*8))
}
type BucketDescriptor struct {
BucketHeader
Stride uint8 // size of one entry in bucket
OffsetWidth uint8 // with of offset field in bucket
}
func (b *BucketDescriptor) unmarshalEntry(buf []byte) (e Entry) {
e.Hash = uintLe(buf[0:b.HashLen])
e.Value = uintLe(buf[b.HashLen : b.HashLen+b.OffsetWidth])
return
}
func (b *BucketDescriptor) marshalEntry(buf []byte, e Entry) {
if len(buf) < int(b.Stride) {
panic("serializeEntry: buf too small")
}
putUintLe(buf[0:b.HashLen], e.Hash)
putUintLe(buf[b.HashLen:b.HashLen+b.OffsetWidth], e.Value)
}
// SearchSortedEntries performs an in-memory binary search for a given hash.
func SearchSortedEntries(entries []Entry, hash uint64) *Entry {
i, found := sort.Find(len(entries), func(i int) int {
other := entries[i].Hash
// Note: This is safe because neither side exceeds 2^24.
return int(hash) - int(other)
})
if !found {
return nil
}
if i >= len(entries) || entries[i].Hash != hash {
return nil
}
return &entries[i]
}
// EntryHash64 is a xxHash-based hash function using an arbitrary prefix.
func EntryHash64(prefix uint32, key []byte) uint64 {
const blockSize = 32
var prefixBlock [blockSize]byte
binary.LittleEndian.PutUint32(prefixBlock[:4], prefix)
var digest xxhash.Digest
digest.Reset()
digest.Write(prefixBlock[:])
digest.Write(key)
return digest.Sum64()
}
// Entry is a single element in a hash table.
type Entry struct {
Hash uint64
Value uint64
}
// intWidth returns the number of bytes minimally required to represent the given integer.
func intWidth(n uint64) uint8 {
msb := 64 - bits.LeadingZeros64(n)
return uint8((msb + 7) / 8)
}
// maxCls64 returns the max integer that has the same amount of leading zeros as n.
func maxCls64(n uint64) uint64 {
return math.MaxUint64 >> bits.LeadingZeros64(n)
}
// uintLe decodes an unsigned little-endian integer without bounds assertions.
// out-of-bounds bits are set to zero.
func uintLe(buf []byte) uint64 {
var full [8]byte
copy(full[:], buf)
return binary.LittleEndian.Uint64(full[:])
}
// putUintLe encodes an unsigned little-endian integer without bounds assertions.
// Returns true if the integer fully fit in the provided buffer.
func putUintLe(buf []byte, x uint64) bool {
var full [8]byte
binary.LittleEndian.PutUint64(full[:], x)
copy(buf, full[:])
return int(intWidth(x)) <= len(buf)
}