1 files changed, 472 insertions, 236 deletions
diff --git a/vendor/github.com/cilium/ebpf/map.go b/vendor/github.com/cilium/ebpf/map.go
index 316fc37b1..e4a6c87e9 100644
--- a/vendor/github.com/cilium/ebpf/map.go
+++ b/vendor/github.com/cilium/ebpf/map.go
@@ -1,15 +1,19 @@
 package ebpf
 
 import (
+	"bytes"
 	"errors"
 	"fmt"
 	"io"
+	"math/rand"
 	"path/filepath"
 	"reflect"
-	"strings"
+	"time"
+	"unsafe"
 
+	"github.com/cilium/ebpf/btf"
 	"github.com/cilium/ebpf/internal"
-	"github.com/cilium/ebpf/internal/btf"
+	"github.com/cilium/ebpf/internal/sys"
 	"github.com/cilium/ebpf/internal/unix"
 )
 
@@ -18,6 +22,8 @@ var (
 	ErrKeyNotExist      = errors.New("key does not exist")
 	ErrKeyExist         = errors.New("key already exists")
 	ErrIterationAborted = errors.New("iteration aborted")
+	ErrMapIncompatible  = errors.New("map spec is incompatible with existing map")
+	errMapNoBTFValue    = errors.New("map spec does not contain a BTF Value")
 )
 
 // MapOptions control loading a map into the kernel.
@@ -25,7 +31,8 @@ type MapOptions struct {
 	// The base path to pin maps in if requested via PinByName.
 	// Existing maps will be re-used if they are compatible, otherwise an
 	// error is returned.
-	PinPath string
+	PinPath        string
+	LoadPinOptions LoadPinOptions
 }
 
 // MapID represents the unique ID of an eBPF map
@@ -40,7 +47,10 @@ type MapSpec struct {
 	KeySize    uint32
 	ValueSize  uint32
 	MaxEntries uint32
-	Flags      uint32
+
+	// Flags is passed to the kernel and specifies additional map
+	// creation attributes.
+	Flags uint32
 
 	// Automatically pin and load a map from MapOptions.PinPath.
 	// Generates an error if an existing pinned map is incompatible with the MapSpec.
@@ -60,8 +70,16 @@ type MapSpec struct {
 	// InnerMap is used as a template for ArrayOfMaps and HashOfMaps
 	InnerMap *MapSpec
 
+	// Extra trailing bytes found in the ELF map definition when using structs
+	// larger than libbpf's bpf_map_def. nil if no trailing bytes were present.
+	// Must be nil or empty before instantiating the MapSpec into a Map.
+	Extra *bytes.Reader
+
+	// The key and value type of this map. May be nil.
+	Key, Value btf.Type
+
 	// The BTF associated with this map.
-	BTF *btf.Map
+	BTF *btf.Spec
 }
 
 func (ms *MapSpec) String() string {
@@ -77,12 +95,63 @@ func (ms *MapSpec) Copy() *MapSpec {
 	}
 
 	cpy := *ms
+
 	cpy.Contents = make([]MapKV, len(ms.Contents))
 	copy(cpy.Contents, ms.Contents)
+
 	cpy.InnerMap = ms.InnerMap.Copy()
+
 	return &cpy
 }
 
+// hasBTF returns true if the MapSpec has a valid BTF spec and if its
+// map type supports associated BTF metadata in the kernel.
+func (ms *MapSpec) hasBTF() bool {
+	return ms.BTF != nil && ms.Type.hasBTF()
+}
+
+func (ms *MapSpec) clampPerfEventArraySize() error {
+	if ms.Type != PerfEventArray {
+		return nil
+	}
+
+	n, err := internal.PossibleCPUs()
+	if err != nil {
+		return fmt.Errorf("perf event array: %w", err)
+	}
+
+	if n := uint32(n); ms.MaxEntries > n {
+		ms.MaxEntries = n
+	}
+
+	return nil
+}
+
+// dataSection returns the contents and BTF Datasec descriptor of the spec.
+func (ms *MapSpec) dataSection() ([]byte, *btf.Datasec, error) {
+
+	if ms.Value == nil {
+		return nil, nil, errMapNoBTFValue
+	}
+
+	ds, ok := ms.Value.(*btf.Datasec)
+	if !ok {
+		return nil, nil, fmt.Errorf("map value BTF is a %T, not a *btf.Datasec", ms.Value)
+	}
+
+	if n := len(ms.Contents); n != 1 {
+		return nil, nil, fmt.Errorf("expected one key, found %d", n)
+	}
+
+	kv := ms.Contents[0]
+	value, ok := kv.Value.([]byte)
+	if !ok {
+		return nil, nil, fmt.Errorf("value at first map key is %T, not []byte", kv.Value)
+	}
+
+	return value, ds, nil
+}
+
 // MapKV is used to initialize the contents of a Map.
 type MapKV struct {
 	Key   interface{}
@@ -92,19 +161,20 @@ type MapKV struct {
 func (ms *MapSpec) checkCompatibility(m *Map) error {
 	switch {
 	case m.typ != ms.Type:
-		return fmt.Errorf("expected type %v, got %v", ms.Type, m.typ)
+		return fmt.Errorf("expected type %v, got %v: %w", ms.Type, m.typ, ErrMapIncompatible)
 
 	case m.keySize != ms.KeySize:
-		return fmt.Errorf("expected key size %v, got %v", ms.KeySize, m.keySize)
+		return fmt.Errorf("expected key size %v, got %v: %w", ms.KeySize, m.keySize, ErrMapIncompatible)
 
 	case m.valueSize != ms.ValueSize:
-		return fmt.Errorf("expected value size %v, got %v", ms.ValueSize, m.valueSize)
+		return fmt.Errorf("expected value size %v, got %v: %w", ms.ValueSize, m.valueSize, ErrMapIncompatible)
 
-	case m.maxEntries != ms.MaxEntries:
-		return fmt.Errorf("expected max entries %v, got %v", ms.MaxEntries, m.maxEntries)
+	case !(ms.Type == PerfEventArray && ms.MaxEntries == 0) &&
+		m.maxEntries != ms.MaxEntries:
+		return fmt.Errorf("expected max entries %v, got %v: %w", ms.MaxEntries, m.maxEntries, ErrMapIncompatible)
 
 	case m.flags != ms.Flags:
-		return fmt.Errorf("expected flags %v, got %v", ms.Flags, m.flags)
+		return fmt.Errorf("expected flags %v, got %v: %w", ms.Flags, m.flags, ErrMapIncompatible)
 	}
 	return nil
 }
@@ -120,7 +190,7 @@ func (ms *MapSpec) checkCompatibility(m *Map) error {
 // if you require custom encoding.
 type Map struct {
 	name       string
-	fd         *internal.FD
+	fd         *sys.FD
 	typ        MapType
 	keySize    uint32
 	valueSize  uint32
@@ -135,18 +205,19 @@ type Map struct {
 //
 // You should not use fd after calling this function.
 func NewMapFromFD(fd int) (*Map, error) {
-	if fd < 0 {
-		return nil, errors.New("invalid fd")
+	f, err := sys.NewFD(fd)
+	if err != nil {
+		return nil, err
 	}
 
-	return newMapFromFD(internal.NewFD(uint32(fd)))
+	return newMapFromFD(f)
 }
 
-func newMapFromFD(fd *internal.FD) (*Map, error) {
+func newMapFromFD(fd *sys.FD) (*Map, error) {
 	info, err := newMapInfoFromFd(fd)
 	if err != nil {
 		fd.Close()
-		return nil, fmt.Errorf("get map info: %s", err)
+		return nil, fmt.Errorf("get map info: %w", err)
 	}
 
 	return newMap(fd, info.Name, info.Type, info.KeySize, info.ValueSize, info.MaxEntries, info.Flags)
@@ -166,32 +237,55 @@ func NewMap(spec *MapSpec) (*Map, error) {
 //
 // The caller is responsible for ensuring the process' rlimit is set
 // sufficiently high for locking memory during map creation. This can be done
-// by calling unix.Setrlimit with unix.RLIMIT_MEMLOCK prior to calling NewMapWithOptions.
+// by calling rlimit.RemoveMemlock() prior to calling NewMapWithOptions.
+//
+// May return an error wrapping ErrMapIncompatible.
 func NewMapWithOptions(spec *MapSpec, opts MapOptions) (*Map, error) {
-	btfs := make(btfHandleCache)
-	defer btfs.close()
+	handles := newHandleCache()
+	defer handles.close()
+
+	m, err := newMapWithOptions(spec, opts, handles)
+	if err != nil {
+		return nil, fmt.Errorf("creating map: %w", err)
+	}
+
+	if err := m.finalize(spec); err != nil {
+		m.Close()
+		return nil, fmt.Errorf("populating map: %w", err)
+	}
 
-	return newMapWithOptions(spec, opts, btfs)
+	return m, nil
 }
 
-func newMapWithOptions(spec *MapSpec, opts MapOptions, btfs btfHandleCache) (*Map, error) {
+func newMapWithOptions(spec *MapSpec, opts MapOptions, handles *handleCache) (_ *Map, err error) {
+	closeOnError := func(c io.Closer) {
+		if err != nil {
+			c.Close()
+		}
+	}
+
 	switch spec.Pinning {
 	case PinByName:
-		if spec.Name == "" || opts.PinPath == "" {
-			return nil, fmt.Errorf("pin by name: missing Name or PinPath")
+		if spec.Name == "" {
+			return nil, fmt.Errorf("pin by name: missing Name")
+		}
+
+		if opts.PinPath == "" {
+			return nil, fmt.Errorf("pin by name: missing MapOptions.PinPath")
 		}
 
-		m, err := LoadPinnedMap(filepath.Join(opts.PinPath, spec.Name))
+		path := filepath.Join(opts.PinPath, spec.Name)
+		m, err := LoadPinnedMap(path, &opts.LoadPinOptions)
 		if errors.Is(err, unix.ENOENT) {
 			break
 		}
 		if err != nil {
-			return nil, fmt.Errorf("load pinned map: %s", err)
+			return nil, fmt.Errorf("load pinned map: %w", err)
 		}
+		defer closeOnError(m)
 
 		if err := spec.checkCompatibility(m); err != nil {
-			m.Close()
-			return nil, fmt.Errorf("use pinned map %s: %s", spec.Name, err)
+			return nil, fmt.Errorf("use pinned map %s: %w", spec.Name, err)
 		}
 
 		return m, nil
@@ -200,10 +294,10 @@ func newMapWithOptions(spec *MapSpec, opts MapOptions, btfs btfHandleCache) (*Ma
 		// Nothing to do here
 
 	default:
-		return nil, fmt.Errorf("unsupported pin type %d", int(spec.Pinning))
+		return nil, fmt.Errorf("pin type %d: %w", int(spec.Pinning), ErrNotSupported)
 	}
 
-	var innerFd *internal.FD
+	var innerFd *sys.FD
 	if spec.Type == ArrayOfMaps || spec.Type == HashOfMaps {
 		if spec.InnerMap == nil {
 			return nil, fmt.Errorf("%s requires InnerMap", spec.Type)
@@ -213,31 +307,37 @@ func newMapWithOptions(spec *MapSpec, opts MapOptions, btfs btfHandleCache) (*Ma
 			return nil, errors.New("inner maps cannot be pinned")
 		}
 
-		template, err := createMap(spec.InnerMap, nil, opts, btfs)
+		template, err := spec.InnerMap.createMap(nil, opts, handles)
 		if err != nil {
-			return nil, err
+			return nil, fmt.Errorf("inner map: %w", err)
 		}
 		defer template.Close()
 
+		// Intentionally skip populating and freezing (finalizing)
+		// the inner map template since it will be removed shortly.
+
 		innerFd = template.fd
 	}
 
-	m, err := createMap(spec, innerFd, opts, btfs)
+	m, err := spec.createMap(innerFd, opts, handles)
 	if err != nil {
 		return nil, err
 	}
+	defer closeOnError(m)
 
 	if spec.Pinning == PinByName {
-		if err := m.Pin(filepath.Join(opts.PinPath, spec.Name)); err != nil {
-			m.Close()
-			return nil, fmt.Errorf("pin map: %s", err)
+		path := filepath.Join(opts.PinPath, spec.Name)
+		if err := m.Pin(path); err != nil {
+			return nil, fmt.Errorf("pin map: %w", err)
 		}
 	}
 
 	return m, nil
 }
 
-func createMap(spec *MapSpec, inner *internal.FD, opts MapOptions, btfs btfHandleCache) (_ *Map, err error) {
+// createMap validates the spec's properties and creates the map in the kernel
+// using the given opts. It does not populate or freeze the map.
+func (spec *MapSpec) createMap(inner *sys.FD, opts MapOptions, handles *handleCache) (_ *Map, err error) {
 	closeOnError := func(closer io.Closer) {
 		if err != nil {
 			closer.Close()
@@ -246,10 +346,18 @@ func createMap(spec *MapSpec, inner *internal.FD, opts MapOptions, btfs btfHandl
 
 	spec = spec.Copy()
 
+	// Kernels 4.13 through 5.4 used a struct bpf_map_def that contained
+	// additional 'inner_map_idx' and later 'numa_node' fields.
+	// In order to support loading these definitions, tolerate the presence of
+	// extra bytes, but require them to be zeroes.
+	if spec.Extra != nil {
+		if _, err := io.Copy(internal.DiscardZeroes{}, spec.Extra); err != nil {
+			return nil, errors.New("extra contains unhandled non-zero bytes, drain before creating map")
+		}
+	}
+
 	switch spec.Type {
-	case ArrayOfMaps:
-		fallthrough
-	case HashOfMaps:
+	case ArrayOfMaps, HashOfMaps:
 		if err := haveNestedMaps(); err != nil {
 			return nil, err
 		}
@@ -284,51 +392,73 @@ func createMap(spec *MapSpec, inner *internal.FD, opts MapOptions, btfs btfHandl
 			return nil, fmt.Errorf("map create: %w", err)
 		}
 	}
+	if spec.Flags&unix.BPF_F_MMAPABLE > 0 {
+		if err := haveMmapableMaps(); err != nil {
+			return nil, fmt.Errorf("map create: %w", err)
+		}
+	}
+	if spec.Flags&unix.BPF_F_INNER_MAP > 0 {
+		if err := haveInnerMaps(); err != nil {
+			return nil, fmt.Errorf("map create: %w", err)
+		}
+	}
+	if spec.Flags&unix.BPF_F_NO_PREALLOC > 0 {
+		if err := haveNoPreallocMaps(); err != nil {
+			return nil, fmt.Errorf("map create: %w", err)
+		}
+	}
 
-	attr := bpfMapCreateAttr{
-		mapType:    spec.Type,
-		keySize:    spec.KeySize,
-		valueSize:  spec.ValueSize,
-		maxEntries: spec.MaxEntries,
-		flags:      spec.Flags,
-		numaNode:   spec.NumaNode,
+	attr := sys.MapCreateAttr{
+		MapType:    sys.MapType(spec.Type),
+		KeySize:    spec.KeySize,
+		ValueSize:  spec.ValueSize,
+		MaxEntries: spec.MaxEntries,
+		MapFlags:   spec.Flags,
+		NumaNode:   spec.NumaNode,
 	}
 
 	if inner != nil {
-		var err error
-		attr.innerMapFd, err = inner.Value()
-		if err != nil {
-			return nil, fmt.Errorf("map create: %w", err)
-		}
+		attr.InnerMapFd = inner.Uint()
 	}
 
 	if haveObjName() == nil {
-		attr.mapName = newBPFObjName(spec.Name)
+		attr.MapName = sys.NewObjName(spec.Name)
 	}
 
-	var btfDisabled bool
-	if spec.BTF != nil {
-		handle, err := btfs.load(btf.MapSpec(spec.BTF))
-		btfDisabled = errors.Is(err, btf.ErrNotSupported)
-		if err != nil && !btfDisabled {
+	if spec.hasBTF() {
+		handle, err := handles.btfHandle(spec.BTF)
+		if err != nil && !errors.Is(err, btf.ErrNotSupported) {
 			return nil, fmt.Errorf("load BTF: %w", err)
 		}
 
 		if handle != nil {
-			attr.btfFd = uint32(handle.FD())
-			attr.btfKeyTypeID = btf.MapKey(spec.BTF).ID()
-			attr.btfValueTypeID = btf.MapValue(spec.BTF).ID()
+			keyTypeID, err := spec.BTF.TypeID(spec.Key)
+			if err != nil {
+				return nil, err
+			}
+
+			valueTypeID, err := spec.BTF.TypeID(spec.Value)
+			if err != nil {
+				return nil, err
+			}
+
+			attr.BtfFd = uint32(handle.FD())
+			attr.BtfKeyTypeId = uint32(keyTypeID)
+			attr.BtfValueTypeId = uint32(valueTypeID)
 		}
 	}
 
-	fd, err := bpfMapCreate(&attr)
+	fd, err := sys.MapCreate(&attr)
 	if err != nil {
 		if errors.Is(err, unix.EPERM) {
-			return nil, fmt.Errorf("map create: RLIMIT_MEMLOCK may be too low: %w", err)
+			return nil, fmt.Errorf("map create: %w (MEMLOCK may be too low, consider rlimit.RemoveMemlock)", err)
 		}
-		if btfDisabled {
+		if !spec.hasBTF() {
 			return nil, fmt.Errorf("map create without BTF: %w", err)
 		}
+		if errors.Is(err, unix.EINVAL) && attr.MaxEntries == 0 {
+			return nil, fmt.Errorf("map create: %w (MaxEntries may be incorrectly set to zero)", err)
+		}
 		return nil, fmt.Errorf("map create: %w", err)
 	}
 	defer closeOnError(fd)
@@ -338,20 +468,12 @@ func createMap(spec *MapSpec, inner *internal.FD, opts MapOptions, btfs btfHandl
 		return nil, fmt.Errorf("map create: %w", err)
 	}
 
-	if err := m.populate(spec.Contents); err != nil {
-		return nil, fmt.Errorf("map create: can't set initial contents: %w", err)
-	}
-
-	if spec.Freeze {
-		if err := m.Freeze(); err != nil {
-			return nil, fmt.Errorf("can't freeze map: %w", err)
-		}
-	}
-
 	return m, nil
 }
 
-func newMap(fd *internal.FD, name string, typ MapType, keySize, valueSize, maxEntries, flags uint32) (*Map, error) {
+// newMap allocates and returns a new Map structure.
+// Sets the fullValueSize on per-CPU maps.
+func newMap(fd *sys.FD, name string, typ MapType, keySize, valueSize, maxEntries, flags uint32) (*Map, error) {
 	m := &Map{
 		name,
 		fd,
@@ -373,7 +495,7 @@ func newMap(fd *internal.FD, name string, typ MapType, keySize, valueSize, maxEn
 		return nil, err
 	}
 
-	m.fullValueSize = align(int(valueSize), 8) * possibleCPUs
+	m.fullValueSize = internal.Align(int(valueSize), 8) * possibleCPUs
 	return m, nil
 }
 
@@ -414,6 +536,12 @@ func (m *Map) Info() (*MapInfo, error) {
 	return newMapInfoFromFd(m.fd)
 }
 
+// MapLookupFlags controls the behaviour of the map lookup calls.
+type MapLookupFlags uint64
+
+// LookupLock look up the value of a spin-locked map.
+const LookupLock MapLookupFlags = 4
+
 // Lookup retrieves a value from a Map.
 //
 // Calls Close() on valueOut if it is of type **Map or **Program,
@@ -422,39 +550,58 @@ func (m *Map) Info() (*MapInfo, error) {
 // Returns an error if the key doesn't exist, see ErrKeyNotExist.
 func (m *Map) Lookup(key, valueOut interface{}) error {
 	valuePtr, valueBytes := makeBuffer(valueOut, m.fullValueSize)
-	if err := m.lookup(key, valuePtr); err != nil {
+	if err := m.lookup(key, valuePtr, 0); err != nil {
 		return err
 	}
 
 	return m.unmarshalValue(valueOut, valueBytes)
 }
 
-// LookupAndDelete retrieves and deletes a value from a Map.
+// LookupWithFlags retrieves a value from a Map with flags.
 //
-// Returns ErrKeyNotExist if the key doesn't exist.
-func (m *Map) LookupAndDelete(key, valueOut interface{}) error {
+// Passing LookupLock flag will look up the value of a spin-locked
+// map without returning the lock. This must be specified if the
+// elements contain a spinlock.
+//
+// Calls Close() on valueOut if it is of type **Map or **Program,
+// and *valueOut is not nil.
+//
+// Returns an error if the key doesn't exist, see ErrKeyNotExist.
+func (m *Map) LookupWithFlags(key, valueOut interface{}, flags MapLookupFlags) error {
 	valuePtr, valueBytes := makeBuffer(valueOut, m.fullValueSize)
-
-	keyPtr, err := m.marshalKey(key)
-	if err != nil {
-		return fmt.Errorf("can't marshal key: %w", err)
-	}
-
-	if err := bpfMapLookupAndDelete(m.fd, keyPtr, valuePtr); err != nil {
-		return fmt.Errorf("lookup and delete failed: %w", err)
+	if err := m.lookup(key, valuePtr, flags); err != nil {
+		return err
 	}
 
 	return m.unmarshalValue(valueOut, valueBytes)
 }
 
+// LookupAndDelete retrieves and deletes a value from a Map.
+//
+// Returns ErrKeyNotExist if the key doesn't exist.
+func (m *Map) LookupAndDelete(key, valueOut interface{}) error {
+	return m.lookupAndDelete(key, valueOut, 0)
+}
+
+// LookupAndDeleteWithFlags retrieves and deletes a value from a Map.
+//
+// Passing LookupLock flag will look up and delete the value of a spin-locked
+// map without returning the lock. This must be specified if the elements
+// contain a spinlock.
+//
+// Returns ErrKeyNotExist if the key doesn't exist.
+func (m *Map) LookupAndDeleteWithFlags(key, valueOut interface{}, flags MapLookupFlags) error {
+	return m.lookupAndDelete(key, valueOut, flags)
+}
+
 // LookupBytes gets a value from Map.
 //
 // Returns a nil value if a key doesn't exist.
 func (m *Map) LookupBytes(key interface{}) ([]byte, error) {
 	valueBytes := make([]byte, m.fullValueSize)
-	valuePtr := internal.NewSlicePointer(valueBytes)
+	valuePtr := sys.NewSlicePointer(valueBytes)
 
-	err := m.lookup(key, valuePtr)
+	err := m.lookup(key, valuePtr, 0)
 	if errors.Is(err, ErrKeyNotExist) {
 		return nil, nil
 	}
@@ -462,18 +609,47 @@ func (m *Map) LookupBytes(key interface{}) ([]byte, error) {
 	return valueBytes, err
 }
 
-func (m *Map) lookup(key interface{}, valueOut internal.Pointer) error {
+func (m *Map) lookup(key interface{}, valueOut sys.Pointer, flags MapLookupFlags) error {
 	keyPtr, err := m.marshalKey(key)
 	if err != nil {
 		return fmt.Errorf("can't marshal key: %w", err)
 	}
 
-	if err = bpfMapLookupElem(m.fd, keyPtr, valueOut); err != nil {
-		return fmt.Errorf("lookup failed: %w", err)
+	attr := sys.MapLookupElemAttr{
+		MapFd: m.fd.Uint(),
+		Key:   keyPtr,
+		Value: valueOut,
+		Flags: uint64(flags),
+	}
+
+	if err = sys.MapLookupElem(&attr); err != nil {
+		return fmt.Errorf("lookup: %w", wrapMapError(err))
 	}
 	return nil
 }
 
+func (m *Map) lookupAndDelete(key, valueOut interface{}, flags MapLookupFlags) error {
+	valuePtr, valueBytes := makeBuffer(valueOut, m.fullValueSize)
+
+	keyPtr, err := m.marshalKey(key)
+	if err != nil {
+		return fmt.Errorf("can't marshal key: %w", err)
+	}
+
+	attr := sys.MapLookupAndDeleteElemAttr{
+		MapFd: m.fd.Uint(),
+		Key:   keyPtr,
+		Value: valuePtr,
+		Flags: uint64(flags),
+	}
+
+	if err := sys.MapLookupAndDeleteElem(&attr); err != nil {
+		return fmt.Errorf("lookup and delete: %w", wrapMapError(err))
+	}
+
+	return m.unmarshalValue(valueOut, valueBytes)
+}
+
 // MapUpdateFlags controls the behaviour of the Map.Update call.
 //
 // The exact semantics depend on the specific MapType.
@@ -486,6 +662,8 @@ const (
 	UpdateNoExist MapUpdateFlags = 1 << (iota - 1)
 	// UpdateExist updates an existing element.
 	UpdateExist
+	// UpdateLock updates elements under bpf_spin_lock.
+	UpdateLock
 )
 
 // Put replaces or creates a value in map.
@@ -507,8 +685,15 @@ func (m *Map) Update(key, value interface{}, flags MapUpdateFlags) error {
 		return fmt.Errorf("can't marshal value: %w", err)
 	}
 
-	if err = bpfMapUpdateElem(m.fd, keyPtr, valuePtr, uint64(flags)); err != nil {
-		return fmt.Errorf("update failed: %w", err)
+	attr := sys.MapUpdateElemAttr{
+		MapFd: m.fd.Uint(),
+		Key:   keyPtr,
+		Value: valuePtr,
+		Flags: uint64(flags),
+	}
+
+	if err = sys.MapUpdateElem(&attr); err != nil {
+		return fmt.Errorf("update: %w", wrapMapError(err))
 	}
 
 	return nil
@@ -523,8 +708,13 @@ func (m *Map) Delete(key interface{}) error {
 		return fmt.Errorf("can't marshal key: %w", err)
 	}
 
-	if err = bpfMapDeleteElem(m.fd, keyPtr); err != nil {
-		return fmt.Errorf("delete failed: %w", err)
+	attr := sys.MapDeleteElemAttr{
+		MapFd: m.fd.Uint(),
+		Key:   keyPtr,
+	}
+
+	if err = sys.MapDeleteElem(&attr); err != nil {
+		return fmt.Errorf("delete: %w", wrapMapError(err))
 	}
 	return nil
 }
@@ -556,7 +746,7 @@ func (m *Map) NextKey(key, nextKeyOut interface{}) error {
 // Returns nil if there are no more keys.
 func (m *Map) NextKeyBytes(key interface{}) ([]byte, error) {
 	nextKey := make([]byte, m.keySize)
-	nextKeyPtr := internal.NewSlicePointer(nextKey)
+	nextKeyPtr := sys.NewSlicePointer(nextKey)
 
 	err := m.nextKey(key, nextKeyPtr)
 	if errors.Is(err, ErrKeyNotExist) {
@@ -566,9 +756,9 @@ func (m *Map) NextKeyBytes(key interface{}) ([]byte, error) {
 	return nextKey, err
 }
 
-func (m *Map) nextKey(key interface{}, nextKeyOut internal.Pointer) error {
+func (m *Map) nextKey(key interface{}, nextKeyOut sys.Pointer) error {
 	var (
-		keyPtr internal.Pointer
+		keyPtr sys.Pointer
 		err    error
 	)
 
@@ -579,12 +769,77 @@ func (m *Map) nextKey(key interface{}, nextKeyOut internal.Pointer) error {
 		}
 	}
 
-	if err = bpfMapGetNextKey(m.fd, keyPtr, nextKeyOut); err != nil {
-		return fmt.Errorf("next key failed: %w", err)
+	attr := sys.MapGetNextKeyAttr{
+		MapFd:   m.fd.Uint(),
+		Key:     keyPtr,
+		NextKey: nextKeyOut,
+	}
+
+	if err = sys.MapGetNextKey(&attr); err != nil {
+		// Kernels 4.4.131 and earlier return EFAULT instead of a pointer to the
+		// first map element when a nil key pointer is specified.
+		if key == nil && errors.Is(err, unix.EFAULT) {
+			var guessKey []byte
+			guessKey, err = m.guessNonExistentKey()
+			if err != nil {
+				return err
+			}
+
+			// Retry the syscall with a valid non-existing key.
+			attr.Key = sys.NewSlicePointer(guessKey)
+			if err = sys.MapGetNextKey(&attr); err == nil {
+				return nil
+			}
+		}
+
+		return fmt.Errorf("next key: %w", wrapMapError(err))
 	}
+
 	return nil
 }
 
+// guessNonExistentKey attempts to perform a map lookup that returns ENOENT.
+// This is necessary on kernels before 4.4.132, since those don't support
+// iterating maps from the start by providing an invalid key pointer.
+func (m *Map) guessNonExistentKey() ([]byte, error) {
+	// Provide an invalid value pointer to prevent a copy on the kernel side.
+	valuePtr := sys.NewPointer(unsafe.Pointer(^uintptr(0)))
+	randKey := make([]byte, int(m.keySize))
+
+	for i := 0; i < 4; i++ {
+		switch i {
+		// For hash maps, the 0 key is less likely to be occupied. They're often
+		// used for storing data related to pointers, and their access pattern is
+		// generally scattered across the keyspace.
+		case 0:
+		// An all-0xff key is guaranteed to be out of bounds of any array, since
+		// those have a fixed key size of 4 bytes. The only corner case being
+		// arrays with 2^32 max entries, but those are prohibitively expensive
+		// in many environments.
+		case 1:
+			for r := range randKey {
+				randKey[r] = 0xff
+			}
+		// Inspired by BCC, 0x55 is an alternating binary pattern (0101), so
+		// is unlikely to be taken.
+		case 2:
+			for r := range randKey {
+				randKey[r] = 0x55
+			}
+		// Last ditch effort, generate a random key.
+		case 3:
+			rand.New(rand.NewSource(time.Now().UnixNano())).Read(randKey)
+		}
+
+		err := m.lookup(randKey, valuePtr, 0)
+		if errors.Is(err, ErrKeyNotExist) {
+			return randKey, nil
+		}
+	}
+
+	return nil, errors.New("couldn't find non-existing key")
+}
+
 // BatchLookup looks up many elements in a map at once.
 //
 // "keysOut" and "valuesOut" must be of type slice, a pointer
@@ -596,7 +851,7 @@ func (m *Map) nextKey(key interface{}, nextKeyOut internal.Pointer) error {
 // the end of all possible results, even when partial results
 // are returned. It should be used to evaluate when lookup is "done".
 func (m *Map) BatchLookup(prevKey, nextKeyOut, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) {
-	return m.batchLookup(internal.BPF_MAP_LOOKUP_BATCH, prevKey, nextKeyOut, keysOut, valuesOut, opts)
+	return m.batchLookup(sys.BPF_MAP_LOOKUP_BATCH, prevKey, nextKeyOut, keysOut, valuesOut, opts)
 }
 
 // BatchLookupAndDelete looks up many elements in a map at once,
@@ -611,10 +866,10 @@ func (m *Map) BatchLookup(prevKey, nextKeyOut, keysOut, valuesOut interface{}, o
 // the end of all possible results, even when partial results
 // are returned. It should be used to evaluate when lookup is "done".
 func (m *Map) BatchLookupAndDelete(prevKey, nextKeyOut, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) {
-	return m.batchLookup(internal.BPF_MAP_LOOKUP_AND_DELETE_BATCH, prevKey, nextKeyOut, keysOut, valuesOut, opts)
+	return m.batchLookup(sys.BPF_MAP_LOOKUP_AND_DELETE_BATCH, prevKey, nextKeyOut, keysOut, valuesOut, opts)
 }
 
-func (m *Map) batchLookup(cmd internal.BPFCmd, startKey, nextKeyOut, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) {
+func (m *Map) batchLookup(cmd sys.Cmd, startKey, nextKeyOut, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) {
 	if err := haveBatchAPI(); err != nil {
 		return 0, err
 	}
@@ -634,29 +889,36 @@ func (m *Map) batchLookup(cmd internal.BPFCmd, startKey, nextKeyOut, keysOut, va
 		return 0, fmt.Errorf("keysOut and valuesOut must be the same length")
 	}
 	keyBuf := make([]byte, count*int(m.keySize))
-	keyPtr := internal.NewSlicePointer(keyBuf)
+	keyPtr := sys.NewSlicePointer(keyBuf)
 	valueBuf := make([]byte, count*int(m.fullValueSize))
-	valuePtr := internal.NewSlicePointer(valueBuf)
+	valuePtr := sys.NewSlicePointer(valueBuf)
+	nextPtr, nextBuf := makeBuffer(nextKeyOut, int(m.keySize))
 
-	var (
-		startPtr internal.Pointer
-		err      error
-		retErr   error
-	)
+	attr := sys.MapLookupBatchAttr{
+		MapFd:    m.fd.Uint(),
+		Keys:     keyPtr,
+		Values:   valuePtr,
+		Count:    uint32(count),
+		OutBatch: nextPtr,
+	}
+
+	if opts != nil {
+		attr.ElemFlags = opts.ElemFlags
+		attr.Flags = opts.Flags
+	}
+
+	var err error
 	if startKey != nil {
-		startPtr, err = marshalPtr(startKey, int(m.keySize))
+		attr.InBatch, err = marshalPtr(startKey, int(m.keySize))
 		if err != nil {
 			return 0, err
 		}
 	}
-	nextPtr, nextBuf := makeBuffer(nextKeyOut, int(m.keySize))
 
-	ct, err := bpfMapBatch(cmd, m.fd, startPtr, nextPtr, keyPtr, valuePtr, uint32(count), opts)
-	if err != nil {
-		if !errors.Is(err, ErrKeyNotExist) {
-			return 0, err
-		}
-		retErr = ErrKeyNotExist
+	_, sysErr := sys.BPF(cmd, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
+	sysErr = wrapMapError(sysErr)
+	if sysErr != nil && !errors.Is(sysErr, unix.ENOENT) {
+		return 0, sysErr
 	}
 
 	err = m.unmarshalKey(nextKeyOut, nextBuf)
@@ -669,9 +931,10 @@ func (m *Map) batchLookup(cmd internal.BPFCmd, startKey, nextKeyOut, keysOut, va
 	}
 	err = unmarshalBytes(valuesOut, valueBuf)
 	if err != nil {
-		retErr = err
+		return 0, err
 	}
-	return int(ct), retErr
+
+	return int(attr.Count), sysErr
 }
 
 // BatchUpdate updates the map with multiple keys and values
@@ -695,7 +958,7 @@ func (m *Map) BatchUpdate(keys, values interface{}, opts *BatchOptions) (int, er
 	}
 	var (
 		count    = keysValue.Len()
-		valuePtr internal.Pointer
+		valuePtr sys.Pointer
 		err      error
 	)
 	if count != valuesValue.Len() {
@@ -709,9 +972,24 @@ func (m *Map) BatchUpdate(keys, values interface{}, opts *BatchOptions) (int, er
 	if err != nil {
 		return 0, err
 	}
-	var nilPtr internal.Pointer
-	ct, err := bpfMapBatch(internal.BPF_MAP_UPDATE_BATCH, m.fd, nilPtr, nilPtr, keyPtr, valuePtr, uint32(count), opts)
-	return int(ct), err
+
+	attr := sys.MapUpdateBatchAttr{
+		MapFd:  m.fd.Uint(),
+		Keys:   keyPtr,
+		Values: valuePtr,
+		Count:  uint32(count),
+	}
+	if opts != nil {
+		attr.ElemFlags = opts.ElemFlags
+		attr.Flags = opts.Flags
+	}
+
+	err = sys.MapUpdateBatch(&attr)
+	if err != nil {
+		return int(attr.Count), fmt.Errorf("batch update: %w", wrapMapError(err))
+	}
+
+	return int(attr.Count), nil
 }
 
 // BatchDelete batch deletes entries in the map by keys.
@@ -732,9 +1010,23 @@ func (m *Map) BatchDelete(keys interface{}, opts *BatchOptions) (int, error) {
 	if err != nil {
 		return 0, fmt.Errorf("cannot marshal keys: %v", err)
 	}
-	var nilPtr internal.Pointer
-	ct, err := bpfMapBatch(internal.BPF_MAP_DELETE_BATCH, m.fd, nilPtr, nilPtr, keyPtr, nilPtr, uint32(count), opts)
-	return int(ct), err
+
+	attr := sys.MapDeleteBatchAttr{
+		MapFd: m.fd.Uint(),
+		Keys:  keyPtr,
+		Count: uint32(count),
+	}
+
+	if opts != nil {
+		attr.ElemFlags = opts.ElemFlags
+		attr.Flags = opts.Flags
+	}
+
+	if err = sys.MapDeleteBatch(&attr); err != nil {
+		return int(attr.Count), fmt.Errorf("batch delete: %w", wrapMapError(err))
+	}
+
+	return int(attr.Count), nil
 }
 
 // Iterate traverses a map.
@@ -747,7 +1039,8 @@ func (m *Map) Iterate() *MapIterator {
 	return newMapIterator(m)
 }
 
-// Close removes a Map
+// Close the Map's underlying file descriptor, which could unload the
+// Map from the kernel if it is not pinned or in use by a loaded Program.
 func (m *Map) Close() error {
 	if m == nil {
 		// This makes it easier to clean up when iterating maps
@@ -762,14 +1055,7 @@ func (m *Map) Close() error {
 //
 // Calling this function is invalid after Close has been called.
 func (m *Map) FD() int {
-	fd, err := m.fd.Value()
-	if err != nil {
-		// Best effort: -1 is the number most likely to be an
-		// invalid file descriptor.
-		return -1
-	}
-
-	return int(fd)
+	return m.fd.Int()
 }
 
 // Clone creates a duplicate of the Map.
@@ -805,12 +1091,13 @@ func (m *Map) Clone() (*Map, error) {
 // Pin persists the map on the BPF virtual file system past the lifetime of
 // the process that created it .
 //
-// Calling Pin on a previously pinned map will override the path.
+// Calling Pin on a previously pinned map will overwrite the path, except when
+// the new path already exists. Re-pinning across filesystems is not supported.
 // You can Clone a map to pin it to a different path.
 //
 // This requires bpffs to be mounted above fileName. See https://docs.cilium.io/en/k8s-doc/admin/#admin-mount-bpffs
 func (m *Map) Pin(fileName string) error {
-	if err := pin(m.pinnedPath, fileName, m.fd); err != nil {
+	if err := internal.Pin(m.pinnedPath, fileName, m.fd); err != nil {
 		return err
 	}
 	m.pinnedPath = fileName
@@ -823,7 +1110,7 @@ func (m *Map) Pin(fileName string) error {
 //
 // Unpinning an unpinned Map returns nil.
 func (m *Map) Unpin() error {
-	if err := unpin(m.pinnedPath); err != nil {
+	if err := internal.Unpin(m.pinnedPath); err != nil {
 		return err
 	}
 	m.pinnedPath = ""
@@ -832,10 +1119,7 @@ func (m *Map) Unpin() error {
 
 // IsPinned returns true if the map has a non-empty pinned path.
 func (m *Map) IsPinned() bool {
-	if m.pinnedPath == "" {
-		return false
-	}
-	return true
+	return m.pinnedPath != ""
 }
 
 // Freeze prevents a map to be modified from user space.
@@ -846,28 +1130,41 @@ func (m *Map) Freeze() error {
 		return fmt.Errorf("can't freeze map: %w", err)
 	}
 
-	if err := bpfMapFreeze(m.fd); err != nil {
+	attr := sys.MapFreezeAttr{
+		MapFd: m.fd.Uint(),
+	}
+
+	if err := sys.MapFreeze(&attr); err != nil {
 		return fmt.Errorf("can't freeze map: %w", err)
 	}
 	return nil
 }
 
-func (m *Map) populate(contents []MapKV) error {
-	for _, kv := range contents {
+// finalize populates the Map according to the Contents specified
+// in spec and freezes the Map if requested by spec.
+func (m *Map) finalize(spec *MapSpec) error {
+	for _, kv := range spec.Contents {
 		if err := m.Put(kv.Key, kv.Value); err != nil {
-			return fmt.Errorf("key %v: %w", kv.Key, err)
+			return fmt.Errorf("putting value: key %v: %w", kv.Key, err)
+		}
+	}
+
+	if spec.Freeze {
+		if err := m.Freeze(); err != nil {
+			return fmt.Errorf("freezing map: %w", err)
 		}
 	}
+
 	return nil
 }
 
-func (m *Map) marshalKey(data interface{}) (internal.Pointer, error) {
+func (m *Map) marshalKey(data interface{}) (sys.Pointer, error) {
 	if data == nil {
 		if m.keySize == 0 {
 			// Queues have a key length of zero, so passing nil here is valid.
-			return internal.NewPointer(nil), nil
+			return sys.NewPointer(nil), nil
 		}
-		return internal.Pointer{}, errors.New("can't use nil as key of map")
+		return sys.Pointer{}, errors.New("can't use nil as key of map")
 	}
 
 	return marshalPtr(data, int(m.keySize))
@@ -882,7 +1179,7 @@ func (m *Map) unmarshalKey(data interface{}, buf []byte) error {
 	return unmarshalBytes(data, buf)
 }
 
-func (m *Map) marshalValue(data interface{}) (internal.Pointer, error) {
+func (m *Map) marshalValue(data interface{}) (sys.Pointer, error) {
 	if m.typ.hasPerCPUValue() {
 		return marshalPerCPUValue(data, int(m.valueSize))
 	}
@@ -895,13 +1192,13 @@ func (m *Map) marshalValue(data interface{}) (internal.Pointer, error) {
 	switch value := data.(type) {
 	case *Map:
 		if !m.typ.canStoreMap() {
-			return internal.Pointer{}, fmt.Errorf("can't store map in %s", m.typ)
+			return sys.Pointer{}, fmt.Errorf("can't store map in %s", m.typ)
 		}
 		buf, err = marshalMap(value, int(m.valueSize))
 
 	case *Program:
 		if !m.typ.canStoreProgram() {
-			return internal.Pointer{}, fmt.Errorf("can't store program in %s", m.typ)
+			return sys.Pointer{}, fmt.Errorf("can't store program in %s", m.typ)
 		}
 		buf, err = marshalProgram(value, int(m.valueSize))
 
@@ -910,10 +1207,10 @@ func (m *Map) marshalValue(data interface{}) (internal.Pointer, error) {
 	}
 
 	if err != nil {
-		return internal.Pointer{}, err
+		return sys.Pointer{}, err
 	}
 
-	return internal.NewSlicePointer(buf), nil
+	return sys.NewSlicePointer(buf), nil
 }
 
 func (m *Map) unmarshalValue(value interface{}, buf []byte) error {
@@ -937,7 +1234,9 @@ func (m *Map) unmarshalValue(value interface{}, buf []byte) error {
 			return err
 		}
 
-		(*value).Close()
+		// The caller might close the map externally, so ignore errors.
+		_ = (*value).Close()
+
 		*value = other
 		return nil
 
@@ -957,7 +1256,9 @@ func (m *Map) unmarshalValue(value interface{}, buf []byte) error {
 			return err
 		}
 
-		(*value).Close()
+		// The caller might close the program externally, so ignore errors.
+		_ = (*value).Close()
+
 		*value = other
 		return nil
 
@@ -971,9 +1272,12 @@ func (m *Map) unmarshalValue(value interface{}, buf []byte) error {
 	return unmarshalBytes(value, buf)
 }
 
-// LoadPinnedMap load a Map from a BPF file.
-func LoadPinnedMap(fileName string) (*Map, error) {
-	fd, err := internal.BPFObjGet(fileName)
+// LoadPinnedMap loads a Map from a BPF file.
+func LoadPinnedMap(fileName string, opts *LoadPinOptions) (*Map, error) {
+	fd, err := sys.ObjGet(&sys.ObjGetAttr{
+		Pathname:  sys.NewStringPointer(fileName),
+		FileFlags: opts.Marshal(),
+	})
 	if err != nil {
 		return nil, err
 	}
@@ -1002,70 +1306,11 @@ func marshalMap(m *Map, length int) ([]byte, error) {
 		return nil, fmt.Errorf("can't marshal map to %d bytes", length)
 	}
 
-	fd, err := m.fd.Value()
-	if err != nil {
-		return nil, err
-	}
-
 	buf := make([]byte, 4)
-	internal.NativeEndian.PutUint32(buf, fd)
+	internal.NativeEndian.PutUint32(buf, m.fd.Uint())
 	return buf, nil
 }
 
-func patchValue(value []byte, typ btf.Type, replacements map[string]interface{}) error {
-	replaced := make(map[string]bool)
-	replace := func(name string, offset, size int, replacement interface{}) error {
-		if offset+size > len(value) {
-			return fmt.Errorf("%s: offset %d(+%d) is out of bounds", name, offset, size)
-		}
-
-		buf, err := marshalBytes(replacement, size)
-		if err != nil {
-			return fmt.Errorf("marshal %s: %w", name, err)
-		}
-
-		copy(value[offset:offset+size], buf)
-		replaced[name] = true
-		return nil
-	}
-
-	switch parent := typ.(type) {
-	case *btf.Datasec:
-		for _, secinfo := range parent.Vars {
-			name := string(secinfo.Type.(*btf.Var).Name)
-			replacement, ok := replacements[name]
-			if !ok {
-				continue
-			}
-
-			err := replace(name, int(secinfo.Offset), int(secinfo.Size), replacement)
-			if err != nil {
-				return err
-			}
-		}
-
-	default:
-		return fmt.Errorf("patching %T is not supported", typ)
-	}
-
-	if len(replaced) == len(replacements) {
-		return nil
-	}
-
-	var missing []string
-	for name := range replacements {
-		if !replaced[name] {
-			missing = append(missing, name)
-		}
-	}
-
-	if len(missing) == 1 {
-		return fmt.Errorf("unknown field: %s", missing[0])
-	}
-
-	return fmt.Errorf("unknown fields: %s", strings.Join(missing, ","))
-}
-
 // MapIterator iterates a Map.
 //
 // See Map.Iterate.
@@ -1160,29 +1405,20 @@ func (mi *MapIterator) Err() error {
 //
 // Returns ErrNotExist, if there is no next eBPF map.
 func MapGetNextID(startID MapID) (MapID, error) {
-	id, err := objGetNextID(internal.BPF_MAP_GET_NEXT_ID, uint32(startID))
-	return MapID(id), err
+	attr := &sys.MapGetNextIdAttr{Id: uint32(startID)}
+	return MapID(attr.NextId), sys.MapGetNextId(attr)
 }
 
 // NewMapFromID returns the map for a given id.
 //
 // Returns ErrNotExist, if there is no eBPF map with the given id.
 func NewMapFromID(id MapID) (*Map, error) {
-	fd, err := bpfObjGetFDByID(internal.BPF_MAP_GET_FD_BY_ID, uint32(id))
+	fd, err := sys.MapGetFdById(&sys.MapGetFdByIdAttr{
+		Id: uint32(id),
+	})
 	if err != nil {
 		return nil, err
 	}
 
 	return newMapFromFD(fd)
 }
-
-// ID returns the systemwide unique ID of the map.
-//
-// Deprecated: use MapInfo.ID() instead.
-func (m *Map) ID() (MapID, error) {
-	info, err := bpfGetMapInfoByFD(m.fd)
-	if err != nil {
-		return MapID(0), err
-	}
-	return MapID(info.id), nil
-}