Commit 64f0013c authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'bpf_get_task_stack'

Song Liu says:

====================
This set introduces a new helper bpf_get_task_stack(). The primary use case
is to dump all /proc/*/stack to seq_file via bpf_iter__task.

A few different approaches have been explored and compared:

  1. A simple wrapper around stack_trace_save_tsk(), as v1 [1].

     This approach introduces new syntax, which is different to existing
     helper bpf_get_stack(). Therefore, this is not ideal.

  2. Extend get_perf_callchain() to support "task" as argument.

     This approach reuses most of bpf_get_stack(). However, extending
     get_perf_callchain() requires non-trivial changes to architecture
     specific code. Which is error prone.

  3. Current (v2) approach, leverages most of existing bpf_get_stack(), and
     uses stack_trace_save_tsk() to handle architecture specific logic.

[1] https://lore.kernel.org/netdev/20200623070802.2310018-1-songliubraving@fb.com/



Changes v4 => v5:
1. Rebase and work around git-am issue. (Alexei)
2. Update commit log for 4/4. (Yonghong)

Changes v3 => v4:
1. Simplify the selftests with bpf_iter.h. (Yonghong)
2. Add example output to commit log of 4/4. (Yonghong)

Changes v2 => v3:
1. Rebase on top of bpf-next. (Yonghong)
2. Sanitize get_callchain_entry(). (Peter)
3. Use has_callchain_buf for bpf_get_task_stack. (Andrii)
4. Other small clean up. (Yonghong, Andrii).

Changes v1 => v2:
1. Reuse most of bpf_get_stack() logic. (Andrii)
2. Fix unsigned long vs. u64 mismatch for 32-bit systems. (Yonghong)
3. Add %pB support in bpf_trace_printk(). (Daniel)
4. Fix buffer size to bytes.
====================

Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents bba1dc0b c7568114
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -1627,6 +1627,7 @@ extern const struct bpf_func_proto bpf_get_current_uid_gid_proto;
extern const struct bpf_func_proto bpf_get_current_comm_proto;
extern const struct bpf_func_proto bpf_get_stackid_proto;
extern const struct bpf_func_proto bpf_get_stack_proto;
extern const struct bpf_func_proto bpf_get_task_stack_proto;
extern const struct bpf_func_proto bpf_sock_map_update_proto;
extern const struct bpf_func_proto bpf_sock_hash_update_proto;
extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto;
+2 −0
Original line number Diff line number Diff line
@@ -1244,6 +1244,8 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
extern struct perf_callchain_entry *perf_callchain(struct perf_event *event, struct pt_regs *regs);
extern int get_callchain_buffers(int max_stack);
extern void put_callchain_buffers(void);
extern struct perf_callchain_entry *get_callchain_entry(int *rctx);
extern void put_callchain_entry(int rctx);

extern int sysctl_perf_event_max_stack;
extern int sysctl_perf_event_max_contexts_per_stack;
+36 −1
Original line number Diff line number Diff line
@@ -3285,6 +3285,39 @@ union bpf_attr {
 *		Dynamically cast a *sk* pointer to a *udp6_sock* pointer.
 *	Return
 *		*sk* if casting is valid, or NULL otherwise.
 *
 * long bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, u64 flags)
 *	Description
 *		Return a user or a kernel stack in bpf program provided buffer.
 *		To achieve this, the helper needs *task*, which is a valid
 *		pointer to struct task_struct. To store the stacktrace, the
 *		bpf program provides *buf* with	a nonnegative *size*.
 *
 *		The last argument, *flags*, holds the number of stack frames to
 *		skip (from 0 to 255), masked with
 *		**BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
 *		the following flags:
 *
 *		**BPF_F_USER_STACK**
 *			Collect a user space stack instead of a kernel stack.
 *		**BPF_F_USER_BUILD_ID**
 *			Collect buildid+offset instead of ips for user stack,
 *			only valid if **BPF_F_USER_STACK** is also specified.
 *
 *		**bpf_get_task_stack**\ () can collect up to
 *		**PERF_MAX_STACK_DEPTH** both kernel and user frames, subject
 *		to sufficient large buffer size. Note that
 *		this limit can be controlled with the **sysctl** program, and
 *		that it should be manually increased in order to profile long
 *		user stacks (such as stacks for Java programs). To do so, use:
 *
 *		::
 *
 *			# sysctl kernel.perf_event_max_stack=<new value>
 *	Return
 *		A non-negative value equal to or less than *size* on success,
 *		or a negative error in case of failure.
 *
 */
#define __BPF_FUNC_MAPPER(FN)		\
	FN(unspec),			\
@@ -3427,7 +3460,9 @@ union bpf_attr {
	FN(skc_to_tcp_sock),		\
	FN(skc_to_tcp_timewait_sock),	\
	FN(skc_to_tcp_request_sock),	\
	FN(skc_to_udp6_sock),
	FN(skc_to_udp6_sock),		\
	FN(get_task_stack),		\
	/* */

/* integer value in 'imm' field of BPF_CALL instruction selects which helper
 * function eBPF program intends to call
+73 −4
Original line number Diff line number Diff line
@@ -348,6 +348,40 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
	}
}

static struct perf_callchain_entry *
get_callchain_entry_for_task(struct task_struct *task, u32 init_nr)
{
	struct perf_callchain_entry *entry;
	int rctx;

	entry = get_callchain_entry(&rctx);

	if (!entry)
		return NULL;

	entry->nr = init_nr +
		stack_trace_save_tsk(task, (unsigned long *)(entry->ip + init_nr),
				     sysctl_perf_event_max_stack - init_nr, 0);

	/* stack_trace_save_tsk() works on unsigned long array, while
	 * perf_callchain_entry uses u64 array. For 32-bit systems, it is
	 * necessary to fix this mismatch.
	 */
	if (__BITS_PER_LONG != 64) {
		unsigned long *from = (unsigned long *) entry->ip;
		u64 *to = entry->ip;
		int i;

		/* copy data from the end to avoid using extra buffer */
		for (i = entry->nr - 1; i >= (int)init_nr; i--)
			to[i] = (u64)(from[i]);
	}

	put_callchain_entry(rctx);

	return entry;
}

BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
	   u64, flags)
{
@@ -448,8 +482,8 @@ const struct bpf_func_proto bpf_get_stackid_proto = {
	.arg3_type	= ARG_ANYTHING,
};

BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size,
	   u64, flags)
static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
			    void *buf, u32 size, u64 flags)
{
	u32 init_nr, trace_nr, copy_len, elem_size, num_elem;
	bool user_build_id = flags & BPF_F_USER_BUILD_ID;
@@ -471,13 +505,22 @@ BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size,
	if (unlikely(size % elem_size))
		goto clear;

	/* cannot get valid user stack for task without user_mode regs */
	if (task && user && !user_mode(regs))
		goto err_fault;

	num_elem = size / elem_size;
	if (sysctl_perf_event_max_stack < num_elem)
		init_nr = 0;
	else
		init_nr = sysctl_perf_event_max_stack - num_elem;

	if (kernel && task)
		trace = get_callchain_entry_for_task(task, init_nr);
	else
		trace = get_perf_callchain(regs, init_nr, kernel, user,
				   sysctl_perf_event_max_stack, false, false);
					   sysctl_perf_event_max_stack,
					   false, false);
	if (unlikely(!trace))
		goto err_fault;

@@ -505,6 +548,12 @@ clear:
	return err;
}

BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size,
	   u64, flags)
{
	return __bpf_get_stack(regs, NULL, buf, size, flags);
}

const struct bpf_func_proto bpf_get_stack_proto = {
	.func		= bpf_get_stack,
	.gpl_only	= true,
@@ -515,6 +564,26 @@ const struct bpf_func_proto bpf_get_stack_proto = {
	.arg4_type	= ARG_ANYTHING,
};

BPF_CALL_4(bpf_get_task_stack, struct task_struct *, task, void *, buf,
	   u32, size, u64, flags)
{
	struct pt_regs *regs = task_pt_regs(task);

	return __bpf_get_stack(regs, task, buf, size, flags);
}

static int bpf_get_task_stack_btf_ids[5];
const struct bpf_func_proto bpf_get_task_stack_proto = {
	.func		= bpf_get_task_stack,
	.gpl_only	= false,
	.ret_type	= RET_INTEGER,
	.arg1_type	= ARG_PTR_TO_BTF_ID,
	.arg2_type	= ARG_PTR_TO_UNINIT_MEM,
	.arg3_type	= ARG_CONST_SIZE_OR_ZERO,
	.arg4_type	= ARG_ANYTHING,
	.btf_id		= bpf_get_task_stack_btf_ids,
};

/* Called from eBPF program */
static void *stack_map_lookup_elem(struct bpf_map *map, void *key)
{
+3 −1
Original line number Diff line number Diff line
@@ -4864,7 +4864,9 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
	if (err)
		return err;

	if (func_id == BPF_FUNC_get_stack && !env->prog->has_callchain_buf) {
	if ((func_id == BPF_FUNC_get_stack ||
	     func_id == BPF_FUNC_get_task_stack) &&
	    !env->prog->has_callchain_buf) {
		const char *err_str;

#ifdef CONFIG_PERF_EVENTS
Loading