diff --git a/pkg/sbf/cu.go b/pkg/sbf/cu.go new file mode 100644 index 0000000..50a2e59 --- /dev/null +++ b/pkg/sbf/cu.go @@ -0,0 +1,3 @@ +package sbf + +// This file contains helper routines for the calculation of compute units. diff --git a/pkg/sbf/interpreter.go b/pkg/sbf/interpreter.go index c6771db..10f0475 100644 --- a/pkg/sbf/interpreter.go +++ b/pkg/sbf/interpreter.go @@ -9,17 +9,18 @@ import ( // Interpreter implements the SBF core in pure Go. type Interpreter struct { - text []byte - ro []byte - stack []byte - heap []byte - input []byte + textVA uint64 + text []byte + ro []byte + stack Stack + heap []byte + input []byte entry uint64 - cuMax uint64 syscalls map[uint32]Syscall + funcs map[uint32]int64 vmContext any } @@ -31,7 +32,7 @@ func NewInterpreter(p *Program, opts *VMOpts) *Interpreter { return &Interpreter{ text: p.Text, ro: p.RO, - stack: make([]byte, opts.StackSize), + stack: NewStack(), heap: make([]byte, opts.HeapSize), input: opts.Input, entry: p.Entrypoint, @@ -45,16 +46,18 @@ func NewInterpreter(p *Program, opts *VMOpts) *Interpreter { // // This function may panic given code that doesn't pass the static verifier. func (i *Interpreter) Run() (err error) { - // Deliberately implementing the entire core in a single function here - // to give the compiler more creative liberties. - var r [11]uint64 r[1] = VaddrInput // TODO frame pointer pc := int64(i.entry) cuLeft := int64(i.cuMax) - // TODO step to next instruction + // Design notes + // - The interpreter is deliberately implemented in a single big loop, + // to give the compiler more creative liberties, and avoid escaping hot data to the heap. + // - uint64(int32(x)) performs sign extension. Most ALU64 instructions make use of this. + // - The static verifier imposes invariants on the bytecode. + // The interpreter may panic when it notices these invariants are violated (e.g. invalid opcode) mainLoop: for { @@ -367,21 +370,49 @@ mainLoop: // TODO use src reg hint if sc, ok := i.syscalls[ins.Uimm()]; ok { r[0], cuLeft, err = sc.Invoke(i, r[1], r[2], r[3], r[4], r[5], cuLeft) + } else if target, ok := i.funcs[ins.Uimm()]; ok { + r[10], ok = i.stack.Push((*[4]uint64)(r[6:10]), pc+1) + if !ok { + err = ExcCallDepth + } + pc = target } else { - panic("bpf function calls not implemented") + err = ExcCallDest } case OpCallx: - panic("callx not implemented") + target := r[ins.Uimm()] + target &= ^(uint64(0x7)) + var ok bool + r[10], ok = i.stack.Push((*[4]uint64)(r[6:10]), pc+1) + if !ok { + err = ExcCallDepth + } + if target < i.textVA || target >= VaddrStack || target >= i.textVA+uint64(len(i.text)) { + err = NewExcBadAccess(target, 8, false, "jump out-of-bounds") + } + pc = int64((target - i.textVA) / 8) case OpExit: - // TODO implement function returns - break mainLoop + var ok bool + r[10], pc, ok = i.stack.Pop((*[4]uint64)(r[6:10])) + if !ok { + break mainLoop + } default: panic(fmt.Sprintf("unimplemented opcode %#02x", ins.Op())) } // Post execute + if cuLeft < 0 { + err = ExcOutOfCU + } if err != nil { - // TODO return CPU exception error type here - return err + exc := &Exception{ + PC: pc, + Detail: err, + } + if IsLongIns(ins.Op()) { + exc.PC-- // fix reported PC + } + return exc } pc++ } @@ -412,7 +443,11 @@ func (i *Interpreter) Translate(addr uint64, size uint32, write bool) (unsafe.Po } return unsafe.Pointer(&i.ro[lo]), nil case VaddrStack >> 32: - panic("todo implement stack access check") + mem := i.stack.GetFrame(uint32(addr)) + if uint32(len(mem)) < size { + return nil, NewExcBadAccess(addr, size, write, "out-of-bounds stack access") + } + return unsafe.Pointer(&mem[0]), nil case VaddrHeap >> 32: panic("todo implement heap access check") case VaddrInput >> 32: diff --git a/pkg/sbf/sbf.go b/pkg/sbf/sbf.go index 6c9ebcc..f5a4142 100644 --- a/pkg/sbf/sbf.go +++ b/pkg/sbf/sbf.go @@ -20,8 +20,6 @@ const ( MaxInsSize = 2 * SlotSize ) -const StackFrameSize = 0x1000 - func IsLongIns(op uint8) bool { return op == OpLddw } diff --git a/pkg/sbf/stack.go b/pkg/sbf/stack.go new file mode 100644 index 0000000..f67ec52 --- /dev/null +++ b/pkg/sbf/stack.go @@ -0,0 +1,118 @@ +package sbf + +// Stack is the VM's call frame stack. +// +// # Memory stack +// +// The memory stack resides in addressable memory at VaddrStack. +// +// It is split into statically sized stack frames (StackFrameSize). +// Each frame stores spilled function arguments and local variables. +// The frame pointer (r10) points to the highest address in the current frame. +// +// New frames get allocated upwards. +// Each frame is followed by a gap of size StackFrameSize. +// +// [0x1_0000_0000]: Frame +// [0x1_0000_1000]: Gap +// [0x1_0000_2000]: Frame +// [0x1_0000_3000]: Gap +// ... +// +// # Shadow stack +// +// The shadow stack is not directly accessible from SBF. +// It stores return addresses and caller-preserved registers. +type Stack struct { + mem []byte + sp uint64 + shadow []Frame +} + +// Frame is an entry on the shadow stack. +type Frame struct { + FramePtr uint64 + NVRegs [4]uint64 + RetAddr int64 +} + +// StackFrameSize is the addressable memory within a stack frame. +// +// Note that this constant cannot be changed trivially. +const StackFrameSize = 0x1000 + +// StackDepth is the max frame count of the stack. +const StackDepth = 64 + +func NewStack() Stack { + s := Stack{ + mem: make([]byte, StackDepth*StackFrameSize), + sp: VaddrStack, + shadow: make([]Frame, 1, StackDepth), + } + s.shadow[0] = Frame{ + FramePtr: VaddrStack + StackFrameSize, + } + return s +} + +// GetFramePtr returns the current frame pointer. +func (s *Stack) GetFramePtr() uint64 { + return s.shadow[len(s.shadow)-1].FramePtr +} + +// GetFrame returns the stack frame memory slice containing the frame pointer. +// +// The returned slice starts at the location within the frame as indicated by the address. +// To get the full frame, align the provided address by StackFrameSize. +// +// Returns nil if the program tries to address a gap or out-of-bounds memory. +func (s *Stack) GetFrame(addr uint32) []byte { + hi, lo := addr/StackFrameSize, addr%StackFrameSize + if hi > StackDepth || hi%2 == 1 { + return nil + } + pos := hi / 2 + off := pos * StackFrameSize + return s.mem[off+lo : off+StackFrameSize] +} + +// Push allocates a new call frame. +// +// Saves the given nonvolatile regs and return address. +// Returns the new frame pointer. +func (s *Stack) Push(nvRegs *[4]uint64, ret int64) (fp uint64, ok bool) { + if ok = len(s.shadow) < cap(s.shadow); !ok { + return + } + + fp = s.GetFramePtr() + 2*StackFrameSize + s.shadow = s.shadow[len(s.shadow)+1:] + s.shadow[len(s.shadow)-1] = Frame{ + FramePtr: fp, + NVRegs: *nvRegs, + RetAddr: ret, + } + s.sp = fp - StackFrameSize + return +} + +// Pop exits the last call frame. +// +// Writes saved nonvolatile regs into provided slice. +// Returns saved return address, new frame pointer. +// Sets `ok` to false if no call frames are left. +func (s *Stack) Pop(nvRegs *[4]uint64) (fp uint64, ret int64, ok bool) { + if len(s.shadow) <= 1 { + ok = false + return + } + + var frame Frame + frame, s.shadow = s.shadow[0], s.shadow[1:] + + fp = s.GetFramePtr() + *nvRegs = frame.NVRegs + ret = frame.RetAddr + return +} diff --git a/pkg/sbf/syscalls.go b/pkg/sbf/syscalls.go index 8fe004b..0ae49ee 100644 --- a/pkg/sbf/syscalls.go +++ b/pkg/sbf/syscalls.go @@ -26,6 +26,11 @@ func PCHash(addr uint64) uint32 { return murmur3.Sum32(key[:]) } +// Syscall are callback handles from VM to Go. (work in progress) +type Syscall interface { + Invoke(vm VM, r1, r2, r3, r4, r5 uint64, cuIn int64) (r0 uint64, cuOut int64, err error) +} + type SyscallRegistry map[uint32]Syscall func NewSyscallRegistry() SyscallRegistry { @@ -41,3 +46,41 @@ func (s SyscallRegistry) Register(name string, syscall Syscall) (hash uint32, ok ok = true return } + +// Convenience Methods + +type SyscallFunc0 func(vm VM, cuIn int64) (r0 uint64, cuOut int64, err error) + +func (f SyscallFunc0) Invoke(vm VM, _, _, _, _, _ uint64, cuIn int64) (r0 uint64, cuOut int64, err error) { + return f(vm, cuIn) +} + +type SyscallFunc1 func(vm VM, r1 uint64, cuIn int64) (r0 uint64, cuOut int64, err error) + +func (f SyscallFunc1) Invoke(vm VM, r1, _, _, _, _ uint64, cuIn int64) (r0 uint64, cuOut int64, err error) { + return f(vm, r1, cuIn) +} + +type SyscallFunc2 func(vm VM, r1, r2 uint64, cuIn int64) (r0 uint64, cuOut int64, err error) + +func (f SyscallFunc2) Invoke(vm VM, r1, r2, _, _, _ uint64, cuIn int64) (r0 uint64, cuOut int64, err error) { + return f(vm, r1, r2, cuIn) +} + +type SyscallFunc3 func(vm VM, r1, r2, r3 uint64, cuIn int64) (r0 uint64, cuOut int64, err error) + +func (f SyscallFunc3) Invoke(vm VM, r1, r2, r3, _, _ uint64, cuIn int64) (r0 uint64, cuOut int64, err error) { + return f(vm, r1, r2, r3, cuIn) +} + +type SyscallFunc4 func(vm VM, r1, r2, r3, r4 uint64, cuIn int64) (r0 uint64, cuOut int64, err error) + +func (f SyscallFunc4) Invoke(vm VM, r1, r2, r3, r4, _ uint64, cuIn int64) (r0 uint64, cuOut int64, err error) { + return f(vm, r1, r2, r3, r4, cuIn) +} + +type SyscallFunc5 func(vm VM, r1, r2, r3, r4, r5 uint64, cuIn int64) (r0 uint64, cuOut int64, err error) + +func (f SyscallFunc5) Invoke(vm VM, r1, r2, r3, r4, r5 uint64, cuIn int64) (r0 uint64, cuOut int64, err error) { + return f(vm, r1, r2, r3, r4, r5, cuIn) +} diff --git a/pkg/sbf/vm.go b/pkg/sbf/vm.go index 73e0f4b..7818d9f 100644 --- a/pkg/sbf/vm.go +++ b/pkg/sbf/vm.go @@ -35,15 +35,26 @@ type VMOpts struct { Input []byte // mapped at VaddrInput } -// Syscall are callback handles from VM to Go. (work in progress) -type Syscall interface { - Invoke(vm VM, r1, r2, r3, r4, r5 uint64, cuIn int64) (r0 uint64, cuOut int64, err error) +type Exception struct { + PC int64 + Detail error +} + +func (e *Exception) Error() string { + return fmt.Sprintf("exception at %d: %s", e.PC, e.Detail) +} + +func (e *Exception) Unwrap() error { + return e.Detail } // Exception codes. var ( ExcDivideByZero = errors.New("division by zero") ExcDivideOverflow = errors.New("divide overflow") + ExcOutOfCU = errors.New("compute unit overrun") + ExcCallDepth = errors.New("call depth exceeded") + ExcCallDest = errors.New("unknown symbol or syscall") ) type ExcBadAccess struct { @@ -65,41 +76,3 @@ func NewExcBadAccess(addr uint64, size uint32, write bool, reason string) ExcBad func (e ExcBadAccess) Error() string { return fmt.Sprintf("bad memory access at %#x (size=%d write=%v), reason: %s", e.Addr, e.Size, e.Write, e.Reason) } - -// Convenience Methods - -type SyscallFunc0 func(vm VM, cuIn int64) (r0 uint64, cuOut int64, err error) - -func (f SyscallFunc0) Invoke(vm VM, _, _, _, _, _ uint64, cuIn int64) (r0 uint64, cuOut int64, err error) { - return f(vm, cuIn) -} - -type SyscallFunc1 func(vm VM, r1 uint64, cuIn int64) (r0 uint64, cuOut int64, err error) - -func (f SyscallFunc1) Invoke(vm VM, r1, _, _, _, _ uint64, cuIn int64) (r0 uint64, cuOut int64, err error) { - return f(vm, r1, cuIn) -} - -type SyscallFunc2 func(vm VM, r1, r2 uint64, cuIn int64) (r0 uint64, cuOut int64, err error) - -func (f SyscallFunc2) Invoke(vm VM, r1, r2, _, _, _ uint64, cuIn int64) (r0 uint64, cuOut int64, err error) { - return f(vm, r1, r2, cuIn) -} - -type SyscallFunc3 func(vm VM, r1, r2, r3 uint64, cuIn int64) (r0 uint64, cuOut int64, err error) - -func (f SyscallFunc3) Invoke(vm VM, r1, r2, r3, _, _ uint64, cuIn int64) (r0 uint64, cuOut int64, err error) { - return f(vm, r1, r2, r3, cuIn) -} - -type SyscallFunc4 func(vm VM, r1, r2, r3, r4 uint64, cuIn int64) (r0 uint64, cuOut int64, err error) - -func (f SyscallFunc4) Invoke(vm VM, r1, r2, r3, r4, _ uint64, cuIn int64) (r0 uint64, cuOut int64, err error) { - return f(vm, r1, r2, r3, r4, cuIn) -} - -type SyscallFunc5 func(vm VM, r1, r2, r3, r4, r5 uint64, cuIn int64) (r0 uint64, cuOut int64, err error) - -func (f SyscallFunc5) Invoke(vm VM, r1, r2, r3, r4, r5 uint64, cuIn int64) (r0 uint64, cuOut int64, err error) { - return f(vm, r1, r2, r3, r4, r5, cuIn) -}