summaryrefslogtreecommitdiff
path: root/vendor/github.com/cilium/ebpf/link/perf_event.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/cilium/ebpf/link/perf_event.go')
-rw-r--r--vendor/github.com/cilium/ebpf/link/perf_event.go394
1 files changed, 394 insertions, 0 deletions
diff --git a/vendor/github.com/cilium/ebpf/link/perf_event.go b/vendor/github.com/cilium/ebpf/link/perf_event.go
new file mode 100644
index 000000000..0e5bd4791
--- /dev/null
+++ b/vendor/github.com/cilium/ebpf/link/perf_event.go
@@ -0,0 +1,394 @@
+package link
+
+import (
+ "bytes"
+ "errors"
+ "fmt"
+ "os"
+ "path/filepath"
+ "runtime"
+ "strconv"
+ "strings"
+ "unsafe"
+
+ "github.com/cilium/ebpf"
+ "github.com/cilium/ebpf/asm"
+ "github.com/cilium/ebpf/internal"
+ "github.com/cilium/ebpf/internal/sys"
+ "github.com/cilium/ebpf/internal/unix"
+)
+
+// Getting the terminology right is usually the hardest part. For posterity and
+// for staying sane during implementation:
+//
+// - trace event: Representation of a kernel runtime hook. Filesystem entries
+// under <tracefs>/events. Can be tracepoints (static), kprobes or uprobes.
+// Can be instantiated into perf events (see below).
+// - tracepoint: A predetermined hook point in the kernel. Exposed as trace
+// events in (sub)directories under <tracefs>/events. Cannot be closed or
+// removed, they are static.
+// - k(ret)probe: Ephemeral trace events based on entry or exit points of
+// exported kernel symbols. kprobe-based (tracefs) trace events can be
+// created system-wide by writing to the <tracefs>/kprobe_events file, or
+// they can be scoped to the current process by creating PMU perf events.
+// - u(ret)probe: Ephemeral trace events based on user provides ELF binaries
+// and offsets. uprobe-based (tracefs) trace events can be
+// created system-wide by writing to the <tracefs>/uprobe_events file, or
+// they can be scoped to the current process by creating PMU perf events.
+// - perf event: An object instantiated based on an existing trace event or
+// kernel symbol. Referred to by fd in userspace.
+// Exactly one eBPF program can be attached to a perf event. Multiple perf
+// events can be created from a single trace event. Closing a perf event
+// stops any further invocations of the attached eBPF program.
+
+var (
+ tracefsPath = "/sys/kernel/debug/tracing"
+
+ errInvalidInput = errors.New("invalid input")
+)
+
+const (
+ perfAllThreads = -1
+)
+
+type perfEventType uint8
+
+const (
+ tracepointEvent perfEventType = iota
+ kprobeEvent
+ kretprobeEvent
+ uprobeEvent
+ uretprobeEvent
+)
+
+// A perfEvent represents a perf event kernel object. Exactly one eBPF program
+// can be attached to it. It is created based on a tracefs trace event or a
+// Performance Monitoring Unit (PMU).
+type perfEvent struct {
+ // The event type determines the types of programs that can be attached.
+ typ perfEventType
+
+ // Group and name of the tracepoint/kprobe/uprobe.
+ group string
+ name string
+
+ // PMU event ID read from sysfs. Valid IDs are non-zero.
+ pmuID uint64
+ // ID of the trace event read from tracefs. Valid IDs are non-zero.
+ tracefsID uint64
+
+ // User provided arbitrary value.
+ cookie uint64
+
+ // This is the perf event FD.
+ fd *sys.FD
+}
+
+func (pe *perfEvent) Close() error {
+ if err := pe.fd.Close(); err != nil {
+ return fmt.Errorf("closing perf event fd: %w", err)
+ }
+
+ switch pe.typ {
+ case kprobeEvent, kretprobeEvent:
+ // Clean up kprobe tracefs entry.
+ if pe.tracefsID != 0 {
+ return closeTraceFSProbeEvent(kprobeType, pe.group, pe.name)
+ }
+ case uprobeEvent, uretprobeEvent:
+ // Clean up uprobe tracefs entry.
+ if pe.tracefsID != 0 {
+ return closeTraceFSProbeEvent(uprobeType, pe.group, pe.name)
+ }
+ case tracepointEvent:
+ // Tracepoint trace events don't hold any extra resources.
+ return nil
+ }
+
+ return nil
+}
+
+// perfEventLink represents a bpf perf link.
+type perfEventLink struct {
+ RawLink
+ pe *perfEvent
+}
+
+func (pl *perfEventLink) isLink() {}
+
+// Pinning requires the underlying perf event FD to stay open.
+//
+// | PerfEvent FD | BpfLink FD | Works |
+// |--------------|------------|-------|
+// | Open | Open | Yes |
+// | Closed | Open | No |
+// | Open | Closed | No (Pin() -> EINVAL) |
+// | Closed | Closed | No (Pin() -> EINVAL) |
+//
+// There is currently no pretty way to recover the perf event FD
+// when loading a pinned link, so leave as not supported for now.
+func (pl *perfEventLink) Pin(string) error {
+ return fmt.Errorf("perf event link pin: %w", ErrNotSupported)
+}
+
+func (pl *perfEventLink) Unpin() error {
+ return fmt.Errorf("perf event link unpin: %w", ErrNotSupported)
+}
+
+func (pl *perfEventLink) Close() error {
+ if err := pl.pe.Close(); err != nil {
+ return fmt.Errorf("perf event link close: %w", err)
+ }
+ return pl.fd.Close()
+}
+
+func (pl *perfEventLink) Update(prog *ebpf.Program) error {
+ return fmt.Errorf("perf event link update: %w", ErrNotSupported)
+}
+
+// perfEventIoctl implements Link and handles the perf event lifecycle
+// via ioctl().
+type perfEventIoctl struct {
+ *perfEvent
+}
+
+func (pi *perfEventIoctl) isLink() {}
+
+// Since 4.15 (e87c6bc3852b "bpf: permit multiple bpf attachments for a single perf event"),
+// calling PERF_EVENT_IOC_SET_BPF appends the given program to a prog_array
+// owned by the perf event, which means multiple programs can be attached
+// simultaneously.
+//
+// Before 4.15, calling PERF_EVENT_IOC_SET_BPF more than once on a perf event
+// returns EEXIST.
+//
+// Detaching a program from a perf event is currently not possible, so a
+// program replacement mechanism cannot be implemented for perf events.
+func (pi *perfEventIoctl) Update(prog *ebpf.Program) error {
+ return fmt.Errorf("perf event ioctl update: %w", ErrNotSupported)
+}
+
+func (pi *perfEventIoctl) Pin(string) error {
+ return fmt.Errorf("perf event ioctl pin: %w", ErrNotSupported)
+}
+
+func (pi *perfEventIoctl) Unpin() error {
+ return fmt.Errorf("perf event ioctl unpin: %w", ErrNotSupported)
+}
+
+func (pi *perfEventIoctl) Info() (*Info, error) {
+ return nil, fmt.Errorf("perf event ioctl info: %w", ErrNotSupported)
+}
+
+// attach the given eBPF prog to the perf event stored in pe.
+// pe must contain a valid perf event fd.
+// prog's type must match the program type stored in pe.
+func attachPerfEvent(pe *perfEvent, prog *ebpf.Program) (Link, error) {
+ if prog == nil {
+ return nil, errors.New("cannot attach a nil program")
+ }
+ if prog.FD() < 0 {
+ return nil, fmt.Errorf("invalid program: %w", sys.ErrClosedFd)
+ }
+
+ switch pe.typ {
+ case kprobeEvent, kretprobeEvent, uprobeEvent, uretprobeEvent:
+ if t := prog.Type(); t != ebpf.Kprobe {
+ return nil, fmt.Errorf("invalid program type (expected %s): %s", ebpf.Kprobe, t)
+ }
+ case tracepointEvent:
+ if t := prog.Type(); t != ebpf.TracePoint {
+ return nil, fmt.Errorf("invalid program type (expected %s): %s", ebpf.TracePoint, t)
+ }
+ default:
+ return nil, fmt.Errorf("unknown perf event type: %d", pe.typ)
+ }
+
+ if err := haveBPFLinkPerfEvent(); err == nil {
+ return attachPerfEventLink(pe, prog)
+ }
+ return attachPerfEventIoctl(pe, prog)
+}
+
+func attachPerfEventIoctl(pe *perfEvent, prog *ebpf.Program) (*perfEventIoctl, error) {
+ if pe.cookie != 0 {
+ return nil, fmt.Errorf("cookies are not supported: %w", ErrNotSupported)
+ }
+
+ // Assign the eBPF program to the perf event.
+ err := unix.IoctlSetInt(pe.fd.Int(), unix.PERF_EVENT_IOC_SET_BPF, prog.FD())
+ if err != nil {
+ return nil, fmt.Errorf("setting perf event bpf program: %w", err)
+ }
+
+ // PERF_EVENT_IOC_ENABLE and _DISABLE ignore their given values.
+ if err := unix.IoctlSetInt(pe.fd.Int(), unix.PERF_EVENT_IOC_ENABLE, 0); err != nil {
+ return nil, fmt.Errorf("enable perf event: %s", err)
+ }
+
+ pi := &perfEventIoctl{pe}
+
+ // Close the perf event when its reference is lost to avoid leaking system resources.
+ runtime.SetFinalizer(pi, (*perfEventIoctl).Close)
+ return pi, nil
+}
+
+// Use the bpf api to attach the perf event (BPF_LINK_TYPE_PERF_EVENT, 5.15+).
+//
+// https://github.com/torvalds/linux/commit/b89fbfbb854c9afc3047e8273cc3a694650b802e
+func attachPerfEventLink(pe *perfEvent, prog *ebpf.Program) (*perfEventLink, error) {
+ fd, err := sys.LinkCreatePerfEvent(&sys.LinkCreatePerfEventAttr{
+ ProgFd: uint32(prog.FD()),
+ TargetFd: pe.fd.Uint(),
+ AttachType: sys.BPF_PERF_EVENT,
+ BpfCookie: pe.cookie,
+ })
+ if err != nil {
+ return nil, fmt.Errorf("cannot create bpf perf link: %v", err)
+ }
+
+ pl := &perfEventLink{RawLink{fd: fd}, pe}
+
+ // Close the perf event when its reference is lost to avoid leaking system resources.
+ runtime.SetFinalizer(pl, (*perfEventLink).Close)
+ return pl, nil
+}
+
+// unsafeStringPtr returns an unsafe.Pointer to a NUL-terminated copy of str.
+func unsafeStringPtr(str string) (unsafe.Pointer, error) {
+ p, err := unix.BytePtrFromString(str)
+ if err != nil {
+ return nil, err
+ }
+ return unsafe.Pointer(p), nil
+}
+
+// getTraceEventID reads a trace event's ID from tracefs given its group and name.
+// The kernel requires group and name to be alphanumeric or underscore.
+//
+// name automatically has its invalid symbols converted to underscores so the caller
+// can pass a raw symbol name, e.g. a kernel symbol containing dots.
+func getTraceEventID(group, name string) (uint64, error) {
+ name = sanitizeSymbol(name)
+ tid, err := uint64FromFile(tracefsPath, "events", group, name, "id")
+ if errors.Is(err, os.ErrNotExist) {
+ return 0, fmt.Errorf("trace event %s/%s: %w", group, name, os.ErrNotExist)
+ }
+ if err != nil {
+ return 0, fmt.Errorf("reading trace event ID of %s/%s: %w", group, name, err)
+ }
+
+ return tid, nil
+}
+
+// getPMUEventType reads a Performance Monitoring Unit's type (numeric identifier)
+// from /sys/bus/event_source/devices/<pmu>/type.
+//
+// Returns ErrNotSupported if the pmu type is not supported.
+func getPMUEventType(typ probeType) (uint64, error) {
+ et, err := uint64FromFile("/sys/bus/event_source/devices", typ.String(), "type")
+ if errors.Is(err, os.ErrNotExist) {
+ return 0, fmt.Errorf("pmu type %s: %w", typ, ErrNotSupported)
+ }
+ if err != nil {
+ return 0, fmt.Errorf("reading pmu type %s: %w", typ, err)
+ }
+
+ return et, nil
+}
+
+// openTracepointPerfEvent opens a tracepoint-type perf event. System-wide
+// [k,u]probes created by writing to <tracefs>/[k,u]probe_events are tracepoints
+// behind the scenes, and can be attached to using these perf events.
+func openTracepointPerfEvent(tid uint64, pid int) (*sys.FD, error) {
+ attr := unix.PerfEventAttr{
+ Type: unix.PERF_TYPE_TRACEPOINT,
+ Config: tid,
+ Sample_type: unix.PERF_SAMPLE_RAW,
+ Sample: 1,
+ Wakeup: 1,
+ }
+
+ fd, err := unix.PerfEventOpen(&attr, pid, 0, -1, unix.PERF_FLAG_FD_CLOEXEC)
+ if err != nil {
+ return nil, fmt.Errorf("opening tracepoint perf event: %w", err)
+ }
+
+ return sys.NewFD(fd)
+}
+
+// uint64FromFile reads a uint64 from a file. All elements of path are sanitized
+// and joined onto base. Returns error if base no longer prefixes the path after
+// joining all components.
+func uint64FromFile(base string, path ...string) (uint64, error) {
+ l := filepath.Join(path...)
+ p := filepath.Join(base, l)
+ if !strings.HasPrefix(p, base) {
+ return 0, fmt.Errorf("path '%s' attempts to escape base path '%s': %w", l, base, errInvalidInput)
+ }
+
+ data, err := os.ReadFile(p)
+ if err != nil {
+ return 0, fmt.Errorf("reading file %s: %w", p, err)
+ }
+
+ et := bytes.TrimSpace(data)
+ return strconv.ParseUint(string(et), 10, 64)
+}
+
+// Probe BPF perf link.
+//
+// https://elixir.bootlin.com/linux/v5.16.8/source/kernel/bpf/syscall.c#L4307
+// https://github.com/torvalds/linux/commit/b89fbfbb854c9afc3047e8273cc3a694650b802e
+var haveBPFLinkPerfEvent = internal.FeatureTest("bpf_link_perf_event", "5.15", func() error {
+ prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{
+ Name: "probe_bpf_perf_link",
+ Type: ebpf.Kprobe,
+ Instructions: asm.Instructions{
+ asm.Mov.Imm(asm.R0, 0),
+ asm.Return(),
+ },
+ License: "MIT",
+ })
+ if err != nil {
+ return err
+ }
+ defer prog.Close()
+
+ _, err = sys.LinkCreatePerfEvent(&sys.LinkCreatePerfEventAttr{
+ ProgFd: uint32(prog.FD()),
+ AttachType: sys.BPF_PERF_EVENT,
+ })
+ if errors.Is(err, unix.EINVAL) {
+ return internal.ErrNotSupported
+ }
+ if errors.Is(err, unix.EBADF) {
+ return nil
+ }
+ return err
+})
+
+// isValidTraceID implements the equivalent of a regex match
+// against "^[a-zA-Z_][0-9a-zA-Z_]*$".
+//
+// Trace event groups, names and kernel symbols must adhere to this set
+// of characters. Non-empty, first character must not be a number, all
+// characters must be alphanumeric or underscore.
+func isValidTraceID(s string) bool {
+ if len(s) < 1 {
+ return false
+ }
+ for i, c := range []byte(s) {
+ switch {
+ case c >= 'a' && c <= 'z':
+ case c >= 'A' && c <= 'Z':
+ case c == '_':
+ case i > 0 && c >= '0' && c <= '9':
+
+ default:
+ return false
+ }
+ }
+
+ return true
+}