diff options
Diffstat (limited to 'vendor/github.com/cilium/ebpf/link/kprobe.go')
-rw-r--r-- | vendor/github.com/cilium/ebpf/link/kprobe.go | 568 |
1 files changed, 0 insertions, 568 deletions
diff --git a/vendor/github.com/cilium/ebpf/link/kprobe.go b/vendor/github.com/cilium/ebpf/link/kprobe.go deleted file mode 100644 index fdf622a0c..000000000 --- a/vendor/github.com/cilium/ebpf/link/kprobe.go +++ /dev/null @@ -1,568 +0,0 @@ -package link - -import ( - "bytes" - "crypto/rand" - "errors" - "fmt" - "os" - "path/filepath" - "runtime" - "strings" - "sync" - "syscall" - "unsafe" - - "github.com/cilium/ebpf" - "github.com/cilium/ebpf/internal/sys" - "github.com/cilium/ebpf/internal/unix" -) - -var ( - kprobeEventsPath = filepath.Join(tracefsPath, "kprobe_events") - - kprobeRetprobeBit = struct { - once sync.Once - value uint64 - err error - }{} -) - -type probeType uint8 - -type probeArgs struct { - symbol, group, path string - offset, refCtrOffset, cookie uint64 - pid int - ret bool -} - -// KprobeOptions defines additional parameters that will be used -// when loading Kprobes. -type KprobeOptions struct { - // Arbitrary value that can be fetched from an eBPF program - // via `bpf_get_attach_cookie()`. - // - // Needs kernel 5.15+. - Cookie uint64 - // Offset of the kprobe relative to the traced symbol. - // Can be used to insert kprobes at arbitrary offsets in kernel functions, - // e.g. in places where functions have been inlined. - Offset uint64 -} - -const ( - kprobeType probeType = iota - uprobeType -) - -func (pt probeType) String() string { - if pt == kprobeType { - return "kprobe" - } - return "uprobe" -} - -func (pt probeType) EventsPath() string { - if pt == kprobeType { - return kprobeEventsPath - } - return uprobeEventsPath -} - -func (pt probeType) PerfEventType(ret bool) perfEventType { - if pt == kprobeType { - if ret { - return kretprobeEvent - } - return kprobeEvent - } - if ret { - return uretprobeEvent - } - return uprobeEvent -} - -func (pt probeType) RetprobeBit() (uint64, error) { - if pt == kprobeType { - return kretprobeBit() - } - return uretprobeBit() -} - -// Kprobe attaches the given eBPF program to a perf event that fires when the -// given kernel symbol starts executing. See /proc/kallsyms for available -// symbols. For example, printk(): -// -// kp, err := Kprobe("printk", prog, nil) -// -// Losing the reference to the resulting Link (kp) will close the Kprobe -// and prevent further execution of prog. The Link must be Closed during -// program shutdown to avoid leaking system resources. -func Kprobe(symbol string, prog *ebpf.Program, opts *KprobeOptions) (Link, error) { - k, err := kprobe(symbol, prog, opts, false) - if err != nil { - return nil, err - } - - lnk, err := attachPerfEvent(k, prog) - if err != nil { - k.Close() - return nil, err - } - - return lnk, nil -} - -// Kretprobe attaches the given eBPF program to a perf event that fires right -// before the given kernel symbol exits, with the function stack left intact. -// See /proc/kallsyms for available symbols. For example, printk(): -// -// kp, err := Kretprobe("printk", prog, nil) -// -// Losing the reference to the resulting Link (kp) will close the Kretprobe -// and prevent further execution of prog. The Link must be Closed during -// program shutdown to avoid leaking system resources. -func Kretprobe(symbol string, prog *ebpf.Program, opts *KprobeOptions) (Link, error) { - k, err := kprobe(symbol, prog, opts, true) - if err != nil { - return nil, err - } - - lnk, err := attachPerfEvent(k, prog) - if err != nil { - k.Close() - return nil, err - } - - return lnk, nil -} - -// isValidKprobeSymbol implements the equivalent of a regex match -// against "^[a-zA-Z_][0-9a-zA-Z_.]*$". -func isValidKprobeSymbol(s string) bool { - if len(s) < 1 { - return false - } - - for i, c := range []byte(s) { - switch { - case c >= 'a' && c <= 'z': - case c >= 'A' && c <= 'Z': - case c == '_': - case i > 0 && c >= '0' && c <= '9': - - // Allow `.` in symbol name. GCC-compiled kernel may change symbol name - // to have a `.isra.$n` suffix, like `udp_send_skb.isra.52`. - // See: https://gcc.gnu.org/gcc-10/changes.html - case i > 0 && c == '.': - - default: - return false - } - } - - return true -} - -// kprobe opens a perf event on the given symbol and attaches prog to it. -// If ret is true, create a kretprobe. -func kprobe(symbol string, prog *ebpf.Program, opts *KprobeOptions, ret bool) (*perfEvent, error) { - if symbol == "" { - return nil, fmt.Errorf("symbol name cannot be empty: %w", errInvalidInput) - } - if prog == nil { - return nil, fmt.Errorf("prog cannot be nil: %w", errInvalidInput) - } - if !isValidKprobeSymbol(symbol) { - return nil, fmt.Errorf("symbol '%s' must be a valid symbol in /proc/kallsyms: %w", symbol, errInvalidInput) - } - if prog.Type() != ebpf.Kprobe { - return nil, fmt.Errorf("eBPF program type %s is not a Kprobe: %w", prog.Type(), errInvalidInput) - } - - args := probeArgs{ - pid: perfAllThreads, - symbol: symbol, - ret: ret, - } - - if opts != nil { - args.cookie = opts.Cookie - args.offset = opts.Offset - } - - // Use kprobe PMU if the kernel has it available. - tp, err := pmuKprobe(args) - if errors.Is(err, os.ErrNotExist) { - args.symbol = platformPrefix(symbol) - tp, err = pmuKprobe(args) - } - if err == nil { - return tp, nil - } - if err != nil && !errors.Is(err, ErrNotSupported) { - return nil, fmt.Errorf("creating perf_kprobe PMU: %w", err) - } - - // Use tracefs if kprobe PMU is missing. - args.symbol = symbol - tp, err = tracefsKprobe(args) - if errors.Is(err, os.ErrNotExist) { - args.symbol = platformPrefix(symbol) - tp, err = tracefsKprobe(args) - } - if err != nil { - return nil, fmt.Errorf("creating trace event '%s' in tracefs: %w", symbol, err) - } - - return tp, nil -} - -// pmuKprobe opens a perf event based on the kprobe PMU. -// Returns os.ErrNotExist if the given symbol does not exist in the kernel. -func pmuKprobe(args probeArgs) (*perfEvent, error) { - return pmuProbe(kprobeType, args) -} - -// pmuProbe opens a perf event based on a Performance Monitoring Unit. -// -// Requires at least a 4.17 kernel. -// e12f03d7031a "perf/core: Implement the 'perf_kprobe' PMU" -// 33ea4b24277b "perf/core: Implement the 'perf_uprobe' PMU" -// -// Returns ErrNotSupported if the kernel doesn't support perf_[k,u]probe PMU -func pmuProbe(typ probeType, args probeArgs) (*perfEvent, error) { - // Getting the PMU type will fail if the kernel doesn't support - // the perf_[k,u]probe PMU. - et, err := getPMUEventType(typ) - if err != nil { - return nil, err - } - - var config uint64 - if args.ret { - bit, err := typ.RetprobeBit() - if err != nil { - return nil, err - } - config |= 1 << bit - } - - var ( - attr unix.PerfEventAttr - sp unsafe.Pointer - ) - switch typ { - case kprobeType: - // Create a pointer to a NUL-terminated string for the kernel. - sp, err = unsafeStringPtr(args.symbol) - if err != nil { - return nil, err - } - - attr = unix.PerfEventAttr{ - // The minimum size required for PMU kprobes is PERF_ATTR_SIZE_VER1, - // since it added the config2 (Ext2) field. Use Ext2 as probe_offset. - Size: unix.PERF_ATTR_SIZE_VER1, - Type: uint32(et), // PMU event type read from sysfs - Ext1: uint64(uintptr(sp)), // Kernel symbol to trace - Ext2: args.offset, // Kernel symbol offset - Config: config, // Retprobe flag - } - case uprobeType: - sp, err = unsafeStringPtr(args.path) - if err != nil { - return nil, err - } - - if args.refCtrOffset != 0 { - config |= args.refCtrOffset << uprobeRefCtrOffsetShift - } - - attr = unix.PerfEventAttr{ - // The minimum size required for PMU uprobes is PERF_ATTR_SIZE_VER1, - // since it added the config2 (Ext2) field. The Size field controls the - // size of the internal buffer the kernel allocates for reading the - // perf_event_attr argument from userspace. - Size: unix.PERF_ATTR_SIZE_VER1, - Type: uint32(et), // PMU event type read from sysfs - Ext1: uint64(uintptr(sp)), // Uprobe path - Ext2: args.offset, // Uprobe offset - Config: config, // RefCtrOffset, Retprobe flag - } - } - - rawFd, err := unix.PerfEventOpen(&attr, args.pid, 0, -1, unix.PERF_FLAG_FD_CLOEXEC) - - // On some old kernels, kprobe PMU doesn't allow `.` in symbol names and - // return -EINVAL. Return ErrNotSupported to allow falling back to tracefs. - // https://github.com/torvalds/linux/blob/94710cac0ef4/kernel/trace/trace_kprobe.c#L340-L343 - if errors.Is(err, unix.EINVAL) && strings.Contains(args.symbol, ".") { - return nil, fmt.Errorf("symbol '%s+%#x': older kernels don't accept dots: %w", args.symbol, args.offset, ErrNotSupported) - } - // Since commit 97c753e62e6c, ENOENT is correctly returned instead of EINVAL - // when trying to create a kretprobe for a missing symbol. Make sure ENOENT - // is returned to the caller. - if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.EINVAL) { - return nil, fmt.Errorf("symbol '%s+%#x' not found: %w", args.symbol, args.offset, os.ErrNotExist) - } - // Since commit ab105a4fb894, -EILSEQ is returned when a kprobe sym+offset is resolved - // to an invalid insn boundary. - if errors.Is(err, syscall.EILSEQ) { - return nil, fmt.Errorf("symbol '%s+%#x' not found (bad insn boundary): %w", args.symbol, args.offset, os.ErrNotExist) - } - // Since at least commit cb9a19fe4aa51, ENOTSUPP is returned - // when attempting to set a uprobe on a trap instruction. - if errors.Is(err, unix.ENOTSUPP) { - return nil, fmt.Errorf("failed setting uprobe on offset %#x (possible trap insn): %w", args.offset, err) - } - if err != nil { - return nil, fmt.Errorf("opening perf event: %w", err) - } - - // Ensure the string pointer is not collected before PerfEventOpen returns. - runtime.KeepAlive(sp) - - fd, err := sys.NewFD(rawFd) - if err != nil { - return nil, err - } - - // Kernel has perf_[k,u]probe PMU available, initialize perf event. - return &perfEvent{ - typ: typ.PerfEventType(args.ret), - name: args.symbol, - pmuID: et, - cookie: args.cookie, - fd: fd, - }, nil -} - -// tracefsKprobe creates a Kprobe tracefs entry. -func tracefsKprobe(args probeArgs) (*perfEvent, error) { - return tracefsProbe(kprobeType, args) -} - -// tracefsProbe creates a trace event by writing an entry to <tracefs>/[k,u]probe_events. -// A new trace event group name is generated on every call to support creating -// multiple trace events for the same kernel or userspace symbol. -// Path and offset are only set in the case of uprobe(s) and are used to set -// the executable/library path on the filesystem and the offset where the probe is inserted. -// A perf event is then opened on the newly-created trace event and returned to the caller. -func tracefsProbe(typ probeType, args probeArgs) (_ *perfEvent, err error) { - // Generate a random string for each trace event we attempt to create. - // This value is used as the 'group' token in tracefs to allow creating - // multiple kprobe trace events with the same name. - group, err := randomGroup("ebpf") - if err != nil { - return nil, fmt.Errorf("randomizing group name: %w", err) - } - args.group = group - - // Before attempting to create a trace event through tracefs, - // check if an event with the same group and name already exists. - // Kernels 4.x and earlier don't return os.ErrExist on writing a duplicate - // entry, so we need to rely on reads for detecting uniqueness. - _, err = getTraceEventID(group, args.symbol) - if err == nil { - return nil, fmt.Errorf("trace event already exists: %s/%s", group, args.symbol) - } - if err != nil && !errors.Is(err, os.ErrNotExist) { - return nil, fmt.Errorf("checking trace event %s/%s: %w", group, args.symbol, err) - } - - // Create the [k,u]probe trace event using tracefs. - if err := createTraceFSProbeEvent(typ, args); err != nil { - return nil, fmt.Errorf("creating probe entry on tracefs: %w", err) - } - defer func() { - if err != nil { - // Make sure we clean up the created tracefs event when we return error. - // If a livepatch handler is already active on the symbol, the write to - // tracefs will succeed, a trace event will show up, but creating the - // perf event will fail with EBUSY. - _ = closeTraceFSProbeEvent(typ, args.group, args.symbol) - } - }() - - // Get the newly-created trace event's id. - tid, err := getTraceEventID(group, args.symbol) - if err != nil { - return nil, fmt.Errorf("getting trace event id: %w", err) - } - - // Kprobes are ephemeral tracepoints and share the same perf event type. - fd, err := openTracepointPerfEvent(tid, args.pid) - if err != nil { - return nil, err - } - - return &perfEvent{ - typ: typ.PerfEventType(args.ret), - group: group, - name: args.symbol, - tracefsID: tid, - cookie: args.cookie, - fd: fd, - }, nil -} - -// createTraceFSProbeEvent creates a new ephemeral trace event by writing to -// <tracefs>/[k,u]probe_events. Returns os.ErrNotExist if symbol is not a valid -// kernel symbol, or if it is not traceable with kprobes. Returns os.ErrExist -// if a probe with the same group and symbol already exists. -func createTraceFSProbeEvent(typ probeType, args probeArgs) error { - // Open the kprobe_events file in tracefs. - f, err := os.OpenFile(typ.EventsPath(), os.O_APPEND|os.O_WRONLY, 0666) - if err != nil { - return fmt.Errorf("error opening '%s': %w", typ.EventsPath(), err) - } - defer f.Close() - - var pe, token string - switch typ { - case kprobeType: - // The kprobe_events syntax is as follows (see Documentation/trace/kprobetrace.txt): - // p[:[GRP/]EVENT] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS] : Set a probe - // r[MAXACTIVE][:[GRP/]EVENT] [MOD:]SYM[+0] [FETCHARGS] : Set a return probe - // -:[GRP/]EVENT : Clear a probe - // - // Some examples: - // r:ebpf_1234/r_my_kretprobe nf_conntrack_destroy - // p:ebpf_5678/p_my_kprobe __x64_sys_execve - // - // Leaving the kretprobe's MAXACTIVE set to 0 (or absent) will make the - // kernel default to NR_CPUS. This is desired in most eBPF cases since - // subsampling or rate limiting logic can be more accurately implemented in - // the eBPF program itself. - // See Documentation/kprobes.txt for more details. - token = kprobeToken(args) - pe = fmt.Sprintf("%s:%s/%s %s", probePrefix(args.ret), args.group, sanitizeSymbol(args.symbol), token) - case uprobeType: - // The uprobe_events syntax is as follows: - // p[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a probe - // r[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a return probe - // -:[GRP/]EVENT : Clear a probe - // - // Some examples: - // r:ebpf_1234/readline /bin/bash:0x12345 - // p:ebpf_5678/main_mySymbol /bin/mybin:0x12345(0x123) - // - // See Documentation/trace/uprobetracer.txt for more details. - token = uprobeToken(args) - pe = fmt.Sprintf("%s:%s/%s %s", probePrefix(args.ret), args.group, args.symbol, token) - } - _, err = f.WriteString(pe) - // Since commit 97c753e62e6c, ENOENT is correctly returned instead of EINVAL - // when trying to create a kretprobe for a missing symbol. Make sure ENOENT - // is returned to the caller. - // EINVAL is also returned on pre-5.2 kernels when the `SYM[+offs]` token - // is resolved to an invalid insn boundary. - if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.EINVAL) { - return fmt.Errorf("token %s: %w", token, os.ErrNotExist) - } - // Since commit ab105a4fb894, -EILSEQ is returned when a kprobe sym+offset is resolved - // to an invalid insn boundary. - if errors.Is(err, syscall.EILSEQ) { - return fmt.Errorf("token %s: bad insn boundary: %w", token, os.ErrNotExist) - } - // ERANGE is returned when the `SYM[+offs]` token is too big and cannot - // be resolved. - if errors.Is(err, syscall.ERANGE) { - return fmt.Errorf("token %s: offset too big: %w", token, os.ErrNotExist) - } - if err != nil { - return fmt.Errorf("writing '%s' to '%s': %w", pe, typ.EventsPath(), err) - } - - return nil -} - -// closeTraceFSProbeEvent removes the [k,u]probe with the given type, group and symbol -// from <tracefs>/[k,u]probe_events. -func closeTraceFSProbeEvent(typ probeType, group, symbol string) error { - f, err := os.OpenFile(typ.EventsPath(), os.O_APPEND|os.O_WRONLY, 0666) - if err != nil { - return fmt.Errorf("error opening %s: %w", typ.EventsPath(), err) - } - defer f.Close() - - // See [k,u]probe_events syntax above. The probe type does not need to be specified - // for removals. - pe := fmt.Sprintf("-:%s/%s", group, sanitizeSymbol(symbol)) - if _, err = f.WriteString(pe); err != nil { - return fmt.Errorf("writing '%s' to '%s': %w", pe, typ.EventsPath(), err) - } - - return nil -} - -// randomGroup generates a pseudorandom string for use as a tracefs group name. -// Returns an error when the output string would exceed 63 characters (kernel -// limitation), when rand.Read() fails or when prefix contains characters not -// allowed by isValidTraceID. -func randomGroup(prefix string) (string, error) { - if !isValidTraceID(prefix) { - return "", fmt.Errorf("prefix '%s' must be alphanumeric or underscore: %w", prefix, errInvalidInput) - } - - b := make([]byte, 8) - if _, err := rand.Read(b); err != nil { - return "", fmt.Errorf("reading random bytes: %w", err) - } - - group := fmt.Sprintf("%s_%x", prefix, b) - if len(group) > 63 { - return "", fmt.Errorf("group name '%s' cannot be longer than 63 characters: %w", group, errInvalidInput) - } - - return group, nil -} - -func probePrefix(ret bool) string { - if ret { - return "r" - } - return "p" -} - -// determineRetprobeBit reads a Performance Monitoring Unit's retprobe bit -// from /sys/bus/event_source/devices/<pmu>/format/retprobe. -func determineRetprobeBit(typ probeType) (uint64, error) { - p := filepath.Join("/sys/bus/event_source/devices/", typ.String(), "/format/retprobe") - - data, err := os.ReadFile(p) - if err != nil { - return 0, err - } - - var rp uint64 - n, err := fmt.Sscanf(string(bytes.TrimSpace(data)), "config:%d", &rp) - if err != nil { - return 0, fmt.Errorf("parse retprobe bit: %w", err) - } - if n != 1 { - return 0, fmt.Errorf("parse retprobe bit: expected 1 item, got %d", n) - } - - return rp, nil -} - -func kretprobeBit() (uint64, error) { - kprobeRetprobeBit.once.Do(func() { - kprobeRetprobeBit.value, kprobeRetprobeBit.err = determineRetprobeBit(kprobeType) - }) - return kprobeRetprobeBit.value, kprobeRetprobeBit.err -} - -// kprobeToken creates the SYM[+offs] token for the tracefs api. -func kprobeToken(args probeArgs) string { - po := args.symbol - - if args.offset != 0 { - po += fmt.Sprintf("+%#x", args.offset) - } - - return po -} |