diff --git a/pkg/sbf/loader/arithmetic.go b/pkg/sbf/loader/arithmetic.go new file mode 100644 index 0000000..67b5725 --- /dev/null +++ b/pkg/sbf/loader/arithmetic.go @@ -0,0 +1,66 @@ +package loader + +import ( + "math" + "math/bits" +) + +func clampAddUint64(x uint64, y uint64) uint64 { + z, carry := bits.Add64(x, y, 0) + if carry != 0 { + return math.MaxUint64 + } + return z +} + +type addrRange struct { + min, max uint64 +} + +func newAddrRange() addrRange { + return addrRange{min: math.MaxUint64, max: 0} +} + +func (a addrRange) len() uint64 { + if a.min >= a.max { + return 0 + } + return a.max - a.min +} + +/* +func (a addrRange) contains(addr uint64) bool { + if a.len() == 0 { + return false + } + return a.min <= addr && addr < a.max +} +*/ + +func (a addrRange) containsRange(b addrRange) bool { + if a.len() == 0 || b.len() == 0 { + return false + } + return a.min <= b.min && a.max >= b.max +} + +func (a *addrRange) extendToFit(x uint64) { + if x < a.min { + a.min = x + } + if x > a.max { + a.max = x + } +} + +func (a *addrRange) insert(b addrRange) { + if b.len() == 0 { + return + } + if b.min < a.min { + a.min = b.min + } + if b.max > a.max { + a.max = b.max + } +} diff --git a/pkg/sbf/loader/copy.go b/pkg/sbf/loader/copy.go new file mode 100644 index 0000000..533d6c7 --- /dev/null +++ b/pkg/sbf/loader/copy.go @@ -0,0 +1,129 @@ +package loader + +import ( + "debug/elf" + "fmt" + "io" +) + +// The following ELF loading rules seem mostly arbitrary. +// For the sake of cleanliness, this loader doesn't process +// some badly malformed ELFs that would pass on Solana mainnet. +// The Solana protocol is being improved in this area. + +// copy allocates program buffers and copies ELF contents. +func (l *Loader) copy() error { + l.progRange = newAddrRange() + l.rodatas = make([]addrRange, 0, 4) + if err := l.getText(); err != nil { + return err + } + if err := l.mapSections(); err != nil { + return err + } + if err := l.copySections(); err != nil { + return err + } + return nil +} + +// getText remembers the range of .text in the program buffer +func (l *Loader) getText() error { + if err := l.checkSectionAddrs(l.shText); err != nil { + return fmt.Errorf("invalid .text: %w", err) + } + l.text = addrRange{min: l.shText.Off, max: l.shText.Off + l.shText.Size} + return nil +} + +// mapRodataLike reserves ranges for sections in the program buffer +func (l *Loader) mapSections() error { + // Walk all non-standard rodata sections + iter := l.newShTableIter() + for iter.Next() && iter.Err() == nil { + i, sh := iter.Index(), iter.Item() + + // Skip standard sections + sectionName, err := l.getString(&l.shShstrtab, sh.Name, maxSectionNameLen) + if err != nil { + return fmt.Errorf("getString: %w", err) + } + switch sectionName { + case ".text", ".rodata", ".data.rel.ro", ".eh_frame": + // ok + default: + continue + } + + if err := l.checkSectionAddrs(&sh); err != nil { + return fmt.Errorf("invalid rodata-like section %d: %w", i, err) + } + + // Section overlap check & bounds tracking + section := addrRange{min: sh.Off, max: sh.Off + sh.Size} + if section.len() == 0 { + continue + } + if l.progRange.containsRange(section) { + // TODO rbpf probably doesn't have this restriction + return fmt.Errorf("rodata section %d overlaps with other section", i) + } + l.progRange.insert(section) + + if section.min != l.text.min { + l.rodatas = append(l.rodatas, section) + } + } + return iter.Err() +} + +func (l *Loader) checkSectionAddrs(sh *elf.Section64) error { + // TODO Support true vaddr ELFs + + if sh.Size > l.fileSize { + return io.ErrUnexpectedEOF + } + if sh.Addr != sh.Off { + return fmt.Errorf("section physical address out-of-place") + } + + // Ensure section within VM program range + vaddr := clampAddUint64(VaddrProgram, sh.Addr) + vaddrEnd := vaddr + sh.Size + if vaddrEnd < vaddr || vaddrEnd > VaddrStack { + return fmt.Errorf("section virtual address out-of-bounds") + } + + return nil +} + +// copySections copies text and rodata-like sections from the ELF into VM memory. +func (l *Loader) copySections() error { + if l.progRange.len() == 0 { + // TODO what is the correct behavior here? + return fmt.Errorf("program is empty (???)") + } + l.progRange.extendToFit(0) + + // Allocate! + l.program = make([]byte, l.progRange.len()) + + // Read data from ELF file + for _, section := range l.rodatas { + if err := l.copySection(section); err != nil { + return err + } + } + if err := l.copySection(l.text); err != nil { + return err + } + + return nil +} + +func (l *Loader) copySection(section addrRange) (err error) { + off, size := int64(section.min), int64(section.len()) + rd := io.NewSectionReader(l.rd, off, size) + _, err = io.ReadFull(rd, l.program[section.min:section.max]) + return +} diff --git a/pkg/sbf/loader/loader.go b/pkg/sbf/loader/loader.go new file mode 100644 index 0000000..9729ce2 --- /dev/null +++ b/pkg/sbf/loader/loader.go @@ -0,0 +1,95 @@ +package loader + +import ( + "bytes" + "debug/elf" + "fmt" + "io" + + "github.com/certusone/radiance/pkg/sbf" +) + +// Loader is based on solana_rbpf::elf_parser +type Loader struct { + // File containing ELF + rd io.ReaderAt + fileSize uint64 + + // ELF data structures + eh elf.Header64 + phLoad elf.Prog64 + phDynamic *elf.Prog64 + shShstrtab elf.Section64 + shText *elf.Section64 + shSymtab *elf.Section64 + shStrtab *elf.Section64 + shDynstr *elf.Section64 + shDynamic *elf.Section64 + dynamic [DT_NUM]uint64 + relocsIter *tableIter[elf.Rel64] + dynSymIter *tableIter[elf.Sym64] + + // Program section/segment mappings + // Uses physical addressing + rodatas []addrRange + text addrRange + progRange addrRange + + // Contains most of ELF (.text and rodata-like) + // Non-loaded sections are zeroed + program []byte + + // Symbols + //funcs map[uint32]symbol + //syscalls map[uint32]string +} + +// Bounds checks +const ( + // 64 MiB max program size. + // Allows loader to use unchecked math when adding 32-bit offsets. + maxFileLen = 1 << 26 + + maxSectionNameLen = 16 + maxSymbolNameLen = 1024 +) + +// EF_SBF_V2 is the SBFv2 ELF flag +const EF_SBF_V2 = 0x20 + +// DT_NUM is the number of ELF generic dynamic entry types +const DT_NUM = 35 + +// Hardcoded addresses. +const ( + VaddrProgram = uint64(0x1_0000_0000) + VaddrStack = uint64(0x2_0000_0000) + VaddrHeap = uint64(0x3_0000_0000) + VaddrInput = uint64(0x4_0000_0000) +) + +// NewLoaderFromBytes creates an ELF loader from a byte slice. +func NewLoaderFromBytes(buf []byte) (*Loader, error) { + if len(buf) > maxFileLen { + return nil, fmt.Errorf("ELF file too large") + } + l := &Loader{ + rd: bytes.NewReader(buf), + fileSize: uint64(len(buf)), + } + return l, nil +} + +// Load parses, loads, and relocates an SBF program. +func (l *Loader) Load() (*sbf.Program, error) { + if err := l.parse(); err != nil { + return nil, err + } + if err := l.copy(); err != nil { + return nil, err + } + //if err := l.relocate(); err != nil { + // return nil, err + //} + panic("unimplemented") +} diff --git a/pkg/sbf/loader/loader_test.go b/pkg/sbf/loader/loader_test.go index f1a22e0..c2a009d 100644 --- a/pkg/sbf/loader/loader_test.go +++ b/pkg/sbf/loader/loader_test.go @@ -15,10 +15,11 @@ var ( ) func TestLoadProgram_Noop(t *testing.T) { - loader, err := newLoader(soNoop) + loader, err := NewLoaderFromBytes(soNoop) require.NoError(t, err) err = loader.parse() + require.NoError(t, err) assert.Equal(t, elf.Header64{ Ident: [16]byte{ @@ -147,4 +148,45 @@ func TestLoadProgram_Noop(t *testing.T) { dynamic[elf.DT_FLAGS] = 0x04 assert.Equal(t, dynamic, loader.dynamic) + + err = loader.copy() + require.NoError(t, err) + + assert.Equal(t, []addrRange{ + { + // .rodata + min: 0x2b8, + max: 0x2c3, + }, + }, loader.rodatas) + + assert.Equal(t, addrRange{ + min: 0x1000, + max: 0x1060, + }, loader.text) + + assertZeroBytes(t, loader.program[:loader.rodatas[0].min]) + assert.Equal(t, + soNoop[loader.rodatas[0].min:loader.rodatas[0].max], + loader.program[loader.rodatas[0].min:loader.rodatas[0].max]) + assertZeroBytes(t, loader.program[loader.rodatas[0].max:loader.text.min]) + assert.Equal(t, + soNoop[loader.text.min:loader.text.max], + loader.program[loader.text.min:loader.text.max]) + assertZeroBytes(t, loader.program[loader.text.max:]) +} + +func assertZeroBytes(t *testing.T, b []byte) { + if !isZeroBytes(b) { + t.Fatal("Should be zero") + } +} + +func isZeroBytes(b []byte) bool { + for _, v := range b { + if v != 0x00 { + return false + } + } + return true } diff --git a/pkg/sbf/loader/parse.go b/pkg/sbf/loader/parse.go index d5bf049..82a5559 100644 --- a/pkg/sbf/loader/parse.go +++ b/pkg/sbf/loader/parse.go @@ -18,52 +18,8 @@ import ( // TODO Fuzz // TODO Differential fuzz against rbpf -const EF_SBF_V2 = 0x20 - -const DT_NUM = 35 - -// Bounds checks -const ( - // 64 MiB max program size. - // Allows loader to use unchecked math when adding 32-bit offsets. - maxFileLen = 1 << 26 - - maxSectionNameLen = 16 - maxSymbolNameLen = 1024 -) - -// loader is based on solana_rbpf::elf_parser -type loader struct { - rd io.ReaderAt - fileSize uint64 - - eh elf.Header64 - phLoad elf.Prog64 - phDynamic *elf.Prog64 - shShstrtab elf.Section64 - shText *elf.Section64 - shSymtab *elf.Section64 - shStrtab *elf.Section64 - shDynstr *elf.Section64 - shDynamic *elf.Section64 - dynamic [DT_NUM]uint64 - relocsIter *tableIter[elf.Rel64] - dynSymIter *tableIter[elf.Sym64] -} - -func newLoader(buf []byte) (*loader, error) { - if len(buf) > maxFileLen { - return nil, fmt.Errorf("ELF file too large") - } - l := &loader{ - rd: bytes.NewReader(buf), - fileSize: uint64(len(buf)), - } - return l, nil -} - // parse checks ELF file for validity and loads metadata with minimal allocations. -func (l *loader) parse() error { +func (l *Loader) parse() error { if err := l.readHeader(); err != nil { return err } @@ -82,6 +38,9 @@ func (l *loader) parse() error { if err := l.parseDynamic(); err != nil { return err } + if err := l.validate(); err != nil { + return err + } return nil } @@ -94,17 +53,17 @@ const ( symLen = 0x18 // sizeof(elf.Sym64) ) -func (l *loader) newPhTableIter() *tableIter[elf.Prog64] { +func (l *Loader) newPhTableIter() *tableIter[elf.Prog64] { eh := &l.eh return newTableIterator[elf.Prog64](l, eh.Phoff, uint32(eh.Phnum), phEntLen) } -func (l *loader) newShTableIter() *tableIter[elf.Section64] { +func (l *Loader) newShTableIter() *tableIter[elf.Section64] { eh := &l.eh return newTableIterator[elf.Section64](l, eh.Shoff, uint32(eh.Shnum), shEntLen) } -func (l *loader) readHeader() error { +func (l *Loader) readHeader() error { var hdrBuf [ehLen]byte if _, err := io.ReadFull(io.NewSectionReader(l.rd, 0, ehLen), hdrBuf[:]); err != nil { return err @@ -112,7 +71,7 @@ func (l *loader) readHeader() error { return binary.Read(bytes.NewReader(hdrBuf[:]), binary.LittleEndian, &l.eh) } -func (l *loader) validateHeader() error { +func (l *Loader) validateHeader() error { eh := &l.eh ident := &eh.Ident @@ -152,7 +111,7 @@ func (l *loader) validateHeader() error { } // scan the program header table and remember the last PT_LOAD segment -func (l *loader) loadProgramHeaderTable() error { +func (l *Loader) loadProgramHeaderTable() error { iter := l.newPhTableIter() for iter.Next() && iter.Err() == nil { ph := iter.Item() @@ -166,7 +125,7 @@ func (l *loader) loadProgramHeaderTable() error { } continue case elf.PT_LOAD: - break + // ok default: continue } @@ -188,7 +147,7 @@ func (l *loader) loadProgramHeaderTable() error { // reads and validates the section header table. // remembers the section header table. -func (l *loader) readSectionHeaderTable() error { +func (l *Loader) readSectionHeaderTable() error { eh := &l.eh iter := l.newShTableIter() sectionDataOff := uint64(0) @@ -212,7 +171,7 @@ func (l *loader) readSectionHeaderTable() error { *l.shDynamic = sh } default: - break + // ok } // Ensure section data is not overlapping with ELF headers @@ -252,7 +211,7 @@ func (l *loader) readSectionHeaderTable() error { return iter.Err() } -func (l *loader) getString(strtab *elf.Section64, stroff uint32, maxLen uint16) (string, error) { +func (l *Loader) getString(strtab *elf.Section64, stroff uint32, maxLen uint16) (string, error) { if elf.SectionType(strtab.Type) != elf.SHT_STRTAB { return "", fmt.Errorf("invalid strtab") } @@ -275,8 +234,8 @@ func (l *loader) getString(strtab *elf.Section64, stroff uint32, maxLen uint16) return builder.String(), nil } -// Iterate sections and remember special sections by name. -func (l *loader) parseSections() error { +// Iterate sections, validate them, and remember special sections by name. +func (l *Loader) parseSections() error { shShstrtab := &l.shShstrtab iter := l.newShTableIter() for iter.Next() && iter.Err() == nil { @@ -296,6 +255,8 @@ func (l *loader) parseSections() error { return nil } switch sectionName { + case ".bss": + return fmt.Errorf("unsupported section .bss") case ".text": err = setSection(&l.shText) case ".symtab": @@ -308,11 +269,25 @@ func (l *loader) parseSections() error { if err != nil { return err } + + if strings.HasPrefix(sectionName, ".bss") { + return fmt.Errorf("unsupported bss-like section") + } + if (sh.Flags&uint64(elf.SHF_ALLOC|elf.SHF_WRITE)) == uint64(elf.SHF_ALLOC|elf.SHF_WRITE) && + strings.HasPrefix(sectionName, ".data") && + !strings.HasPrefix(sectionName, ".data.rel") { + return fmt.Errorf("unsupported data-like section") + } + + // bounds check + if sh.Off+sh.Size < sh.Off || sh.Off+sh.Size > l.fileSize { + return io.ErrUnexpectedEOF + } } return iter.Err() } -func (l *loader) newDynamicIter() (*tableIter[elf.Dyn64], error) { +func (l *Loader) newDynamicIter() (*tableIter[elf.Dyn64], error) { var off uint64 var size uint64 if ph := l.phDynamic; ph != nil { @@ -334,7 +309,7 @@ func (l *loader) newDynamicIter() (*tableIter[elf.Dyn64], error) { return iter, nil } -func (l *loader) parseDynamicTable() error { +func (l *Loader) parseDynamicTable() error { iter, err := l.newDynamicIter() if err != nil { return err @@ -358,7 +333,7 @@ func (l *loader) parseDynamicTable() error { } // sectionAt finds the section that has a start address matching vaddr. -func (l *loader) sectionAt(vaddr uint64) (*elf.Section64, error) { +func (l *Loader) sectionAt(vaddr uint64) (*elf.Section64, error) { iter := l.newShTableIter() for iter.Next() && iter.Err() == nil { sh := iter.Item() @@ -370,7 +345,7 @@ func (l *loader) sectionAt(vaddr uint64) (*elf.Section64, error) { } // segmentByVaddr finds the segment which vaddr lies within. -func (l *loader) segmentByVaddr(vaddr uint64) (*elf.Prog64, error) { +func (l *Loader) segmentByVaddr(vaddr uint64) (*elf.Prog64, error) { iter := l.newPhTableIter() for iter.Next() && iter.Err() == nil { ph := iter.Item() @@ -384,7 +359,7 @@ func (l *loader) segmentByVaddr(vaddr uint64) (*elf.Prog64, error) { return nil, iter.Err() } -func (l *loader) parseRelocs() error { +func (l *Loader) parseRelocs() error { vaddr := l.dynamic[elf.DT_REL] if vaddr == 0 { return nil @@ -429,7 +404,7 @@ func (l *loader) parseRelocs() error { // getSymtab returns an iterator over the symbols in a symtab-like section. // // Performs necessary bounds checking. -func (l *loader) getSymtab(sh *elf.Section64) (*tableIter[elf.Sym64], error) { +func (l *Loader) getSymtab(sh *elf.Section64) (*tableIter[elf.Sym64], error) { switch elf.SectionType(sh.Type) { case elf.SHT_SYMTAB, elf.SHT_DYNSYM: break @@ -439,7 +414,7 @@ func (l *loader) getSymtab(sh *elf.Section64) (*tableIter[elf.Sym64], error) { return newTableIteratorChecked[elf.Sym64](l, sh.Off, sh.Off+sh.Size, symLen) } -func (l *loader) parseDynSymtab() error { +func (l *Loader) parseDynSymtab() error { vaddr := l.dynamic[elf.DT_SYMTAB] if vaddr == 0 { return nil @@ -457,7 +432,7 @@ func (l *loader) parseDynSymtab() error { return err } -func (l *loader) parseDynamic() error { +func (l *Loader) parseDynamic() error { if err := l.parseDynamicTable(); err != nil { return err } @@ -470,10 +445,31 @@ func (l *loader) parseDynamic() error { return nil } +// validate performs additional checks after parsing. +func (l *Loader) validate() error { + if l.shText == nil { + return fmt.Errorf("missing .text section") + } + if !l.checkEntrypoint() { + return fmt.Errorf("invalid entrypoint") + } + return nil +} + +func (l *Loader) checkEntrypoint() bool { + start := l.shText.Addr + end, overflow := bits.Add64(start, l.shText.Size, 0) + if overflow != 0 { + end = math.MaxUint64 + } + entry := l.eh.Entry + return start <= entry && entry < end && (entry-start)%8 == 0 +} + // tableIter is a memory-efficient iterator over densely packed tables of statically sized items. // Such as the ELF program header and section header tables. type tableIter[T any] struct { - l *loader + l *Loader off uint64 i uint32 // one ahead count uint32 @@ -483,7 +479,7 @@ type tableIter[T any] struct { } // newTableIteratorChecked is like newTableIterator, but with all necessary bounds checks. -func newTableIteratorChecked[T any](l *loader, start uint64, end uint64, elemSize uint16) (*tableIter[T], error) { +func newTableIteratorChecked[T any](l *Loader, start uint64, end uint64, elemSize uint16) (*tableIter[T], error) { if end < start || end > l.fileSize { return nil, io.ErrUnexpectedEOF } @@ -499,7 +495,7 @@ func newTableIteratorChecked[T any](l *loader, start uint64, end uint64, elemSiz } // newTableIterator creates a new tableIter at `off` for `count` elements of `elemSize` len. -func newTableIterator[T any](l *loader, off uint64, count uint32, elemSize uint16) *tableIter[T] { +func newTableIterator[T any](l *Loader, off uint64, count uint32, elemSize uint16) *tableIter[T] { return &tableIter[T]{ l: l, off: off, diff --git a/pkg/sbf/program.go b/pkg/sbf/program.go new file mode 100644 index 0000000..460bb74 --- /dev/null +++ b/pkg/sbf/program.go @@ -0,0 +1,5 @@ +package sbf + +// Program is a loaded SBF program. +type Program struct { +}