summaryrefslogtreecommitdiff
path: root/compat/linux/procinfo.c
blob: bc2f9382a17f8f63a783680770a46358964e8f81 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
#include "cache.h"

#include "strbuf.h"
#include "strvec.h"
#include "trace2.h"

/*
 * We need more complex parsing in stat_parent_pid() and
 * parse_proc_stat() below than a dumb fscanf(). That's because while
 * the statcomm field is surrounded by parentheses, the process itself
 * is free to insert any arbitrary byte sequence its its name. That
 * can include newlines, spaces, closing parentheses etc.
 *
 * See do_task_stat() in fs/proc/array.c in linux.git, this is in
 * contrast with the escaped version of the name found in
 * /proc/%d/status.
 *
 * So instead of using fscanf() we'll read N bytes from it, look for
 * the first "(", and then the last ")", anything in-between is our
 * process name.
 *
 * How much N do we need? On Linux /proc/sys/kernel/pid_max is 2^15 by
 * default, but it can be raised set to values of up to 2^22. So
 * that's 7 digits for a PID. We have 2 PIDs in the first four fields
 * we're interested in, so 2 * 7 = 14.
 *
 * We then have 3 spaces between those four values, and we'd like to
 * get to the space between the 4th and the 5th (the "pgrp" field) to
 * make sure we read the entire "ppid" field. So that brings us up to
 * 14 + 3 + 1 = 18. Add the two parentheses around the "comm" value
 * and it's 20. The "state" value itself is then one character (now at
 * 21).
 *
 * Finally the maximum length of the "comm" name itself is 15
 * characters, e.g. a setting of "123456789abcdefg" will be truncated
 * to "123456789abcdef". See PR_SET_NAME in prctl(2). So all in all
 * we'd need to read 21 + 15 = 36 bytes.
 *
 * Let's just read 2^6 (64) instead for good measure. If PID_MAX ever
 * grows past 2^22 we'll be future-proof. We'll then anchor at the
 * last ")" we find to locate the parent PID.
 */
#define STAT_PARENT_PID_READ_N 64

static int parse_proc_stat(struct strbuf *sb, struct strbuf *name,
			    int *statppid)
{
	const char *comm_lhs = strchr(sb->buf, '(');
	const char *comm_rhs = strrchr(sb->buf, ')');
	const char *ppid_lhs, *ppid_rhs;
	char *p;
	pid_t ppid;

	if (!comm_lhs || !comm_rhs)
		goto bad_kernel;

	/*
	 * We're at the ")", that's followed by " X ", where X is a
	 * single "state" character. So advance by 4 bytes.
	 */
	ppid_lhs = comm_rhs + 4;

	/*
	 * Read until the space between the "ppid" and "pgrp" fields
	 * to make sure we're anchored after the untruncated "ppid"
	 * field..
	 */
	ppid_rhs = strchr(ppid_lhs, ' ');
	if (!ppid_rhs)
		goto bad_kernel;

	ppid = strtol(ppid_lhs, &p, 10);
	if (ppid_rhs == p) {
		const char *comm = comm_lhs + 1;
		size_t commlen = comm_rhs - comm;

		strbuf_add(name, comm, commlen);
		*statppid = ppid;

		return 0;
	}

bad_kernel:
	/*
	 * We were able to read our STAT_PARENT_PID_READ_N bytes from
	 * /proc/%d/stat, but the content is bad. Broken kernel?
	 * Should not happen, but handle it gracefully.
	 */
	return -1;
}

static int stat_parent_pid(pid_t pid, struct strbuf *name, int *statppid)
{
	struct strbuf procfs_path = STRBUF_INIT;
	struct strbuf sb = STRBUF_INIT;
	FILE *fp;
	int ret = -1;

	/* try to use procfs if it's present. */
	strbuf_addf(&procfs_path, "/proc/%d/stat", pid);
	fp = fopen(procfs_path.buf, "r");
	if (!fp)
		goto cleanup;

	/*
	 * We could be more strict here and assert that we read at
	 * least STAT_PARENT_PID_READ_N. My reading of procfs(5) is
	 * that on any modern kernel (at least since 2.6.0 released in
	 * 2003) even if all the mandatory numeric fields were zero'd
	 * out we'd get at least 100 bytes, but let's just check that
	 * we got anything at all and trust the parse_proc_stat()
	 * function to handle its "Bad Kernel?" error checking.
	 */
	if (!strbuf_fread(&sb, STAT_PARENT_PID_READ_N, fp))
		goto cleanup;
	if (parse_proc_stat(&sb, name, statppid) < 0)
		goto cleanup;

	ret = 0;
cleanup:
	if (fp)
		fclose(fp);
	strbuf_release(&procfs_path);
	strbuf_release(&sb);

	return ret;
}

static void push_ancestry_name(struct strvec *names, pid_t pid)
{
	struct strbuf name = STRBUF_INIT;
	int ppid;

	if (stat_parent_pid(pid, &name, &ppid) < 0)
		goto cleanup;

	strvec_push(names, name.buf);

	/*
	 * Both errors and reaching the end of the process chain are
	 * reported as fields of 0 by proc(5)
	 */
	if (ppid)
		push_ancestry_name(names, ppid);
cleanup:
	strbuf_release(&name);

	return;
}

void trace2_collect_process_info(enum trace2_process_info_reason reason)
{
	struct strvec names = STRVEC_INIT;

	if (!trace2_is_enabled())
		return;

	switch (reason) {
	case TRACE2_PROCESS_INFO_EXIT:
		/*
		 * The Windows version of this calls its
		 * get_peak_memory_info() here. We may want to insert
		 * similar process-end statistics here in the future.
		 */
		break;
	case TRACE2_PROCESS_INFO_STARTUP:
		push_ancestry_name(&names, getppid());

		if (names.nr)
			trace2_cmd_ancestry(names.v);
		strvec_clear(&names);
		break;
	}

	return;
}