sbf: finish ELF parser

This commit is contained in:
Richard Patel 2022-09-02 03:52:15 +02:00
parent fade5aa0b8
commit 8cd54f8e82
2 changed files with 390 additions and 144 deletions

View File

@ -15,14 +15,9 @@ import (
// TODO Fuzz // TODO Fuzz
// TODO Differential fuzz against rbpf // TODO Differential fuzz against rbpf
type Executable struct { const EF_SBF_V2 = 0x20
Header elf.Header64
Load elf.Prog64 const DT_NUM = 35
ShShstrtab elf.Section64
ShSymtab *elf.Section64
ShStrtab *elf.Section64
ShDynstr *elf.Section64
}
// Bounds checks // Bounds checks
const ( const (
@ -34,72 +29,87 @@ const (
maxSymbolNameLen = 1024 maxSymbolNameLen = 1024
) )
func LoadProgram(buf []byte) (*Executable, error) { // loader is based on solana_rbpf::elf_parser
if len(buf) > maxFileLen {
return nil, fmt.Errorf("ELF file too large")
}
l := loader{
rd: bytes.NewReader(buf),
fileSize: uint64(len(buf)),
}
return l.load()
}
const EF_SBF_V2 = 0x20
type loader struct { type loader struct {
rd io.ReaderAt rd io.ReaderAt
fileSize uint64 fileSize uint64
elf *Executable
eh elf.Header64
phLoad elf.Prog64
phDynamic *elf.Prog64
shShstrtab elf.Section64
shSymtab *elf.Section64
shStrtab *elf.Section64
shDynstr *elf.Section64
shDynamic *elf.Section64
dynamic [DT_NUM]uint64
relocsIter *tableIter[elf.Rel64]
dynSymIter *tableIter[elf.Sym64]
} }
func (l *loader) load() (*Executable, error) { func newLoader(buf []byte) (*loader, error) {
l.elf = new(Executable) if len(buf) > maxFileLen {
return nil, fmt.Errorf("ELF file too large")
}
l := &loader{
rd: bytes.NewReader(buf),
fileSize: uint64(len(buf)),
}
return l, nil
}
// parse checks ELF file for validity and loads metadata with minimal allocations.
func (l *loader) parse() error {
if err := l.readHeader(); err != nil { if err := l.readHeader(); err != nil {
return nil, err return err
} }
if err := l.validateHeader(); err != nil { if err := l.validateHeader(); err != nil {
return nil, err return err
} }
if err := l.loadProgramHeaderTable(); err != nil { if err := l.loadProgramHeaderTable(); err != nil {
return nil, err return err
} }
if err := l.readSectionHeaderTable(); err != nil { if err := l.readSectionHeaderTable(); err != nil {
return nil, err return err
} }
if err := l.parseSections(); err != nil { if err := l.parseSections(); err != nil {
return nil, err return err
} }
// TODO parse dynamic segment if err := l.parseDynamic(); err != nil {
return l.elf, nil return err
}
return nil
} }
const ( const (
ehsize = 0x40 ehLen = 0x40 // sizeof(elf.Header64)
phentsize = 0x38 phEntLen = 0x38 // sizeof(elf.Prog64)
shentsize = 0x40 shEntLen = 0x40 // sizeof(elf.Section64)
dynLen = 0x10 // sizeof(elf.Dyn64)
relLen = 0x10 // sizeof(elf.Rel64)
symLen = 0x18 // sizeof(elf.Sym64)
) )
func (l *loader) newPhTableIter() *tableIter[elf.Prog64] { func (l *loader) newPhTableIter() *tableIter[elf.Prog64] {
eh := &l.elf.Header eh := &l.eh
return newTableIterator[elf.Prog64](l, eh.Phoff, eh.Phnum, phentsize) return newTableIterator[elf.Prog64](l, eh.Phoff, uint32(eh.Phnum), phEntLen)
} }
func (l *loader) newShTableIter() *tableIter[elf.Section64] { func (l *loader) newShTableIter() *tableIter[elf.Section64] {
eh := &l.elf.Header eh := &l.eh
return newTableIterator[elf.Section64](l, eh.Shoff, eh.Shnum, shentsize) return newTableIterator[elf.Section64](l, eh.Shoff, uint32(eh.Shnum), shEntLen)
} }
func (l *loader) readHeader() error { func (l *loader) readHeader() error {
var hdrBuf [ehsize]byte var hdrBuf [ehLen]byte
if _, err := io.ReadFull(io.NewSectionReader(l.rd, 0, ehsize), hdrBuf[:]); err != nil { if _, err := io.ReadFull(io.NewSectionReader(l.rd, 0, ehLen), hdrBuf[:]); err != nil {
return err return err
} }
return binary.Read(bytes.NewReader(hdrBuf[:]), binary.LittleEndian, &l.elf.Header) return binary.Read(bytes.NewReader(hdrBuf[:]), binary.LittleEndian, &l.eh)
} }
func (l *loader) validateHeader() error { func (l *loader) validateHeader() error {
eh := &l.elf.Header eh := &l.eh
ident := &eh.Ident ident := &eh.Ident
if string(ident[:elf.EI_CLASS]) != elf.ELFMAG { if string(ident[:elf.EI_CLASS]) != elf.ELFMAG {
@ -117,20 +127,20 @@ func (l *loader) validateHeader() error {
// note: EI_PAD and EI_ABIVERSION are ignored // note: EI_PAD and EI_ABIVERSION are ignored
if eh.Version != uint32(elf.EV_CURRENT) || if eh.Version != uint32(elf.EV_CURRENT) ||
eh.Ehsize != ehsize || eh.Ehsize != ehLen ||
eh.Phentsize != phentsize || eh.Phentsize != phEntLen ||
eh.Shentsize != shentsize || eh.Shentsize != shEntLen ||
eh.Shstrndx >= eh.Shnum { eh.Shstrndx >= eh.Shnum {
return fmt.Errorf("invalid ELF file") return fmt.Errorf("invalid ELF file")
} }
if eh.Phoff < ehsize { if eh.Phoff < ehLen {
return fmt.Errorf("program header overlaps with file header") return fmt.Errorf("program header overlaps with file header")
} }
if eh.Shoff < ehsize { if eh.Shoff < ehLen {
return fmt.Errorf("section header overlaps with file header") return fmt.Errorf("section header overlaps with file header")
} }
if isOverlap(eh.Phoff, uint64(eh.Phnum)*phentsize, eh.Shoff, uint64(eh.Shnum)*shentsize) { if isOverlap(eh.Phoff, uint64(eh.Phnum)*phEntLen, eh.Shoff, uint64(eh.Shnum)*shEntLen) {
return fmt.Errorf("program and section header overlap") return fmt.Errorf("program and section header overlap")
} }
@ -143,12 +153,22 @@ func (l *loader) loadProgramHeaderTable() error {
for iter.Next() && iter.Err() == nil { for iter.Next() && iter.Err() == nil {
ph := iter.Item() ph := iter.Item()
if elf.ProgType(ph.Type) != elf.PT_LOAD { switch elf.ProgType(ph.Type) {
case elf.PT_DYNAMIC:
// remember first segment with PT_DYNAMIC in case we need it later
if l.phDynamic == nil {
l.phDynamic = new(elf.Prog64)
*l.phDynamic = ph
}
continue
case elf.PT_LOAD:
break
default:
continue continue
} }
// vaddr must be ascending // vaddr must be ascending
if ph.Vaddr < l.elf.Load.Vaddr { if ph.Vaddr < l.phLoad.Vaddr {
return fmt.Errorf("invalid program header") return fmt.Errorf("invalid program header")
} }
@ -157,7 +177,7 @@ func (l *loader) loadProgramHeaderTable() error {
return fmt.Errorf("segment out of bounds") return fmt.Errorf("segment out of bounds")
} }
l.elf.Load = ph l.phLoad = ph
} }
return iter.Err() return iter.Err()
} }
@ -165,7 +185,7 @@ func (l *loader) loadProgramHeaderTable() error {
// reads and validates the section header table. // reads and validates the section header table.
// remembers the section header table. // remembers the section header table.
func (l *loader) readSectionHeaderTable() error { func (l *loader) readSectionHeaderTable() error {
eh := &l.elf.Header eh := &l.eh
iter := l.newShTableIter() iter := l.newShTableIter()
sectionDataOff := uint64(0) sectionDataOff := uint64(0)
@ -178,8 +198,17 @@ func (l *loader) readSectionHeaderTable() error {
for iter.Next() && iter.Err() == nil { for iter.Next() && iter.Err() == nil {
i, sh := iter.Index(), iter.Item() i, sh := iter.Index(), iter.Item()
if elf.SectionType(sh.Type) == elf.SHT_NOBITS { switch elf.SectionType(sh.Type) {
case elf.SHT_NOBITS:
continue continue
case elf.SHT_DYNAMIC:
// remember first section with SHT_DYNAMIC in case we need it later
if l.shDynamic == nil {
l.shDynamic = new(elf.Section64)
*l.shDynamic = sh
}
default:
break
} }
// Ensure section data is not overlapping with ELF headers // Ensure section data is not overlapping with ELF headers
@ -187,13 +216,13 @@ func (l *loader) readSectionHeaderTable() error {
if overflow != 0 { if overflow != 0 {
return fmt.Errorf("integer overflow in section %d", i) return fmt.Errorf("integer overflow in section %d", i)
} }
if sh.Off < ehsize { if sh.Off < ehLen {
return fmt.Errorf("section %d overlaps with file header", i) return fmt.Errorf("section %d overlaps with file header", i)
} }
if isOverlap(eh.Phoff, uint64(eh.Phnum)*phentsize, sh.Off, sh.Size) { if isOverlap(eh.Phoff, uint64(eh.Phnum)*phEntLen, sh.Off, sh.Size) {
return fmt.Errorf("section %d overlaps with program header", i) return fmt.Errorf("section %d overlaps with program header", i)
} }
if isOverlap(eh.Shoff, uint64(eh.Shnum)*shentsize, sh.Off, sh.Size) { if isOverlap(eh.Shoff, uint64(eh.Shnum)*shEntLen, sh.Off, sh.Size) {
return fmt.Errorf("section %d overlaps with section header", i) return fmt.Errorf("section %d overlaps with section header", i)
} }
@ -206,14 +235,14 @@ func (l *loader) readSectionHeaderTable() error {
} }
// Remember section header string table. // Remember section header string table.
if eh.Shstrndx != uint16(elf.SHN_UNDEF) && eh.Shstrndx == i { if eh.Shstrndx != uint16(elf.SHN_UNDEF) && uint32(eh.Shstrndx) == i {
l.elf.ShShstrtab = sh l.shShstrtab = sh
} }
sectionDataOff = shend sectionDataOff = shend
} }
// TODO validate offset and size (?) // TODO validate offset and size (?)
if elf.SectionType(l.elf.ShShstrtab.Type) != elf.SHT_STRTAB { if elf.SectionType(l.shShstrtab.Type) != elf.SHT_STRTAB {
return fmt.Errorf("invalid .shstrtab") return fmt.Errorf("invalid .shstrtab")
} }
return iter.Err() return iter.Err()
@ -244,7 +273,7 @@ func (l *loader) getString(strtab *elf.Section64, stroff uint32, maxLen uint16)
// Iterate sections and remember special sections by name. // Iterate sections and remember special sections by name.
func (l *loader) parseSections() error { func (l *loader) parseSections() error {
shShstrtab := &l.elf.ShShstrtab shShstrtab := &l.shShstrtab
iter := l.newShTableIter() iter := l.newShTableIter()
for iter.Next() && iter.Err() == nil { for iter.Next() && iter.Err() == nil {
sh := iter.Item() sh := iter.Item()
@ -264,11 +293,11 @@ func (l *loader) parseSections() error {
} }
switch sectionName { switch sectionName {
case ".symtab": case ".symtab":
err = setSection(&l.elf.ShSymtab) err = setSection(&l.shSymtab)
case ".strtab": case ".strtab":
err = setSection(&l.elf.ShStrtab) err = setSection(&l.shStrtab)
case ".dynstr": case ".dynstr":
err = setSection(&l.elf.ShDynstr) err = setSection(&l.shDynstr)
} }
if err != nil { if err != nil {
return err return err
@ -277,20 +306,194 @@ func (l *loader) parseSections() error {
return iter.Err() return iter.Err()
} }
func (l *loader) newDynamicIter() (*tableIter[elf.Dyn64], error) {
var off uint64
var size uint64
if ph := l.phDynamic; ph != nil {
off, size = ph.Off, ph.Filesz
} else if sh := l.shDynamic; sh != nil {
off, size = sh.Off, sh.Size
} else {
return nil, nil
}
if size%dynLen != 0 {
return nil, fmt.Errorf("odd .dynamic size")
}
if (off+size) > l.fileSize || (off+size) < off {
return nil, io.ErrUnexpectedEOF
}
iter := newTableIterator[elf.Dyn64](l, off, uint32(off/dynLen), dynLen)
return iter, nil
}
func (l *loader) parseDynamicTable() error {
iter, err := l.newDynamicIter()
if err != nil {
return err
}
if iter == nil {
// static file, nothing to do
return nil
}
for iter.Next() && iter.Err() == nil {
dyn := iter.Item()
if dyn.Tag == int64(elf.DT_NULL) {
break
}
if dyn.Tag >= int64(len(l.dynamic)) {
continue
}
l.dynamic[dyn.Tag] = dyn.Val
}
return iter.Err()
}
// sectionAt finds the section that has a start address matching vaddr.
func (l *loader) sectionAt(vaddr uint64) (*elf.Section64, error) {
iter := l.newShTableIter()
for iter.Next() && iter.Err() == nil {
sh := iter.Item()
if sh.Addr == vaddr {
return &sh, nil
}
}
return nil, iter.Err()
}
// segmentByVaddr finds the segment which vaddr lies within.
func (l *loader) segmentByVaddr(vaddr uint64) (*elf.Prog64, error) {
iter := l.newPhTableIter()
for iter.Next() && iter.Err() == nil {
ph := iter.Item()
if ph.Vaddr+ph.Memsz < ph.Vaddr {
return nil, fmt.Errorf("segment ends past math.MaxUint64")
}
if ph.Vaddr <= vaddr && vaddr < ph.Vaddr+ph.Memsz {
return &ph, nil
}
}
return nil, iter.Err()
}
func (l *loader) parseRelocs() error {
vaddr := l.dynamic[elf.DT_REL]
if vaddr == 0 {
return nil
}
if l.dynamic[elf.DT_RELENT] != relLen {
return fmt.Errorf("invalid DT_RELENT")
}
size := l.dynamic[elf.DT_RELSZ]
if size == 0 || size%relLen != 0 || size > math.MaxUint32 {
return fmt.Errorf("invalid DT_RELSZ")
}
ph, err := l.segmentByVaddr(vaddr)
if err != nil {
return err
}
offset := vaddr
if ph != nil {
var overflow uint64
offset, overflow = bits.Sub64(offset, ph.Vaddr, 0)
if overflow != 0 {
return fmt.Errorf("offset underflow")
}
offset, overflow = bits.Add64(offset, ph.Vaddr, 0)
if overflow != 0 {
return fmt.Errorf("offset overflow")
}
} else {
// Handle invalid dynamic sections where DT_REL is not in any program segment.
sh, err := l.sectionAt(vaddr)
if err != nil {
return err
}
if sh == nil {
return fmt.Errorf("cannot find physical address of relocation table")
}
offset = sh.Off
}
l.relocsIter, err = newTableIteratorChecked[elf.Rel64](l, offset, offset+size, relLen)
return err
}
// getSymtab returns an iterator over the symbols in a symtab-like section.
//
// Performs necessary bounds checking.
func (l *loader) getSymtab(sh *elf.Section64) (*tableIter[elf.Sym64], error) {
switch elf.SectionType(sh.Type) {
case elf.SHT_SYMTAB, elf.SHT_DYNSYM:
break
default:
return nil, fmt.Errorf("not a symtab section")
}
return newTableIteratorChecked[elf.Sym64](l, sh.Off, sh.Off+sh.Size, symLen)
}
func (l *loader) parseDynSymtab() error {
vaddr := l.dynamic[elf.DT_SYMTAB]
if vaddr == 0 {
return nil
}
dynsym, err := l.sectionAt(vaddr)
if err != nil {
return err
}
if dynsym == nil {
return fmt.Errorf("cannot find DT_SYMTAB section")
}
l.dynSymIter, err = l.getSymtab(dynsym)
return err
}
func (l *loader) parseDynamic() error {
if err := l.parseDynamicTable(); err != nil {
return err
}
if err := l.parseRelocs(); err != nil {
return err
}
if err := l.parseDynSymtab(); err != nil {
return err
}
return nil
}
// tableIter is a memory-efficient iterator over densely packed tables of statically sized items. // tableIter is a memory-efficient iterator over densely packed tables of statically sized items.
// Such as the ELF program header and section header tables. // Such as the ELF program header and section header tables.
type tableIter[T any] struct { type tableIter[T any] struct {
l *loader l *loader
off uint64 off uint64
i uint16 // one ahead i uint32 // one ahead
count uint16 count uint32
elemSize uint16 elemSize uint16
elem T elem T
err error err error
} }
// newTableIteratorChecked is like newTableIterator, but with all necessary bounds checks.
func newTableIteratorChecked[T any](l *loader, start uint64, end uint64, elemSize uint16) (*tableIter[T], error) {
if end < start || end > l.fileSize {
return nil, io.ErrUnexpectedEOF
}
size := end - start
if size%uint64(elemSize) != 0 {
return nil, fmt.Errorf("misaligned table")
}
if size > math.MaxInt32 {
return nil, io.ErrUnexpectedEOF
}
iter := newTableIterator[T](l, start, uint32(size/uint64(elemSize)), elemSize)
return iter, nil
}
// newTableIterator creates a new tableIter at `off` for `count` elements of `elemSize` len. // newTableIterator creates a new tableIter at `off` for `count` elements of `elemSize` len.
func newTableIterator[T any](l *loader, off uint64, count uint16, elemSize uint16) *tableIter[T] { func newTableIterator[T any](l *loader, off uint64, count uint32, elemSize uint16) *tableIter[T] {
return &tableIter[T]{ return &tableIter[T]{
l: l, l: l,
off: off, off: off,
@ -312,7 +515,7 @@ func (it *tableIter[T]) Next() (ok bool) {
} }
// Index returns the current table index. // Index returns the current table index.
func (it *tableIter[T]) Index() uint16 { func (it *tableIter[T]) Index() uint32 {
return it.i - 1 return it.i - 1
} }

View File

@ -15,83 +15,126 @@ var (
) )
func TestLoadProgram_Noop(t *testing.T) { func TestLoadProgram_Noop(t *testing.T) {
exe, err := LoadProgram(soNoop) loader, err := newLoader(soNoop)
require.NoError(t, err) require.NoError(t, err)
assert.Equal(t, &Executable{
Header: elf.Header64{ err = loader.parse()
Ident: [16]byte{
0x7f, 0x45, 0x4c, 0x46, assert.Equal(t, elf.Header64{
0x02, 0x01, 0x01, 0x00, Ident: [16]byte{
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x45, 0x4c, 0x46,
}, 0x02, 0x01, 0x01, 0x00,
Type: uint16(elf.ET_DYN), 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
Machine: uint16(elf.EM_BPF),
Version: uint32(elf.EV_CURRENT),
Entry: 4096,
Phoff: 64,
Shoff: 8792,
Flags: 0,
Ehsize: 64,
Phentsize: 56,
Phnum: 7,
Shentsize: 64,
Shnum: 13,
Shstrndx: 11,
}, },
Load: elf.Prog64{ Type: uint16(elf.ET_DYN),
Type: uint32(elf.PT_LOAD), Machine: uint16(elf.EM_BPF),
Flags: 06, Version: uint32(elf.EV_CURRENT),
Off: 8192, Entry: 4096,
Vaddr: 8192, Phoff: 64,
Paddr: 8192, Shoff: 8792,
Filesz: 208, Flags: 0,
Memsz: 208, Ehsize: 64,
Align: 4096, Phentsize: 56,
}, Phnum: 7,
ShShstrtab: elf.Section64{ Shentsize: 64,
Name: 82, Shnum: 13,
Type: uint32(elf.SHT_STRTAB), Shstrndx: 11,
Flags: 0, }, loader.eh)
Addr: 0,
Off: 8648, assert.Equal(t, elf.Prog64{
Size: 100, Type: uint32(elf.PT_LOAD),
Addralign: 1, Flags: 06,
}, Off: 8192,
ShSymtab: &elf.Section64{ Vaddr: 8192,
Name: 74, Paddr: 8192,
Type: uint32(elf.SHT_SYMTAB), Filesz: 208,
Flags: 0, Memsz: 208,
Addr: 0, Align: 4096,
Off: 8504, }, loader.phLoad)
Size: 144,
Link: 12, assert.Equal(t, elf.Section64{
Info: 3, Name: 82,
Addralign: 8, Type: uint32(elf.SHT_STRTAB),
Entsize: 24, Flags: 0,
}, Addr: 0,
ShStrtab: &elf.Section64{ Off: 8648,
Name: 92, Size: 100,
Type: uint32(elf.SHT_STRTAB), Addralign: 1,
Flags: 0, }, loader.shShstrtab)
Addr: 0,
Off: 8748, assert.Equal(t, &elf.Section64{
Size: 39, Name: 74,
Link: 0, Type: uint32(elf.SHT_SYMTAB),
Info: 0, Flags: 0,
Addralign: 1, Addr: 0,
Entsize: 0, Off: 8504,
}, Size: 144,
ShDynstr: &elf.Section64{ Link: 12,
Name: 25, Info: 3,
Type: uint32(elf.SHT_STRTAB), Addralign: 8,
Flags: uint64(elf.DF_SYMBOLIC), Entsize: 24,
Addr: 624, }, loader.shSymtab)
Off: 624,
Size: 23, assert.Equal(t, &elf.Section64{
Link: 0, Name: 92,
Info: 0, Type: uint32(elf.SHT_STRTAB),
Addralign: 1, Flags: 0,
Entsize: 0, Addr: 0,
}, Off: 8748,
}, exe) Size: 39,
Link: 0,
Info: 0,
Addralign: 1,
Entsize: 0,
}, loader.shStrtab)
assert.Equal(t, &elf.Section64{
Name: 25,
Type: uint32(elf.SHT_STRTAB),
Flags: uint64(elf.DF_SYMBOLIC),
Addr: 624,
Off: 624,
Size: 23,
Link: 0,
Info: 0,
Addralign: 1,
Entsize: 0,
}, loader.shDynstr)
assert.Equal(t, &elf.Prog64{
Type: uint32(elf.PT_DYNAMIC),
Flags: uint32(elf.DF_TEXTREL | elf.DF_SYMBOLIC),
Off: 8192,
Vaddr: 8192,
Paddr: 8192,
Filesz: 208,
Memsz: 208,
Align: 8,
}, loader.phDynamic)
assert.Equal(t, &elf.Section64{
Name: 56,
Type: uint32(elf.SHT_DYNAMIC),
Flags: 3,
Addr: 8192,
Off: 8192,
Size: 208,
Link: 4,
Info: 0,
Addralign: 8,
Entsize: 16,
}, loader.shDynamic)
var dynamic [DT_NUM]uint64
dynamic[elf.DT_HASH] = 0x248
dynamic[elf.DT_STRTAB] = 0x270
dynamic[elf.DT_SYMTAB] = 0x1c8
dynamic[elf.DT_STRSZ] = 0x17
dynamic[elf.DT_SYMENT] = 0x18
dynamic[elf.DT_REL] = 0x288
dynamic[elf.DT_RELSZ] = 0x30
dynamic[elf.DT_RELENT] = 0x10
dynamic[elf.DT_FLAGS] = 0x04
assert.Equal(t, dynamic, loader.dynamic)
} }