Commit 10496f26 authored by Daniel Borkmann's avatar Daniel Borkmann
Browse files

Merge branch 'bpf-sleepable'



Alexei Starovoitov says:

====================
v2->v3:
- switched to minimal allowlist approach. Essentially that means that syscall
  entry, few btrfs allow_error_inject functions, should_fail_bio(), and two LSM
  hooks: file_mprotect and bprm_committed_creds are the only hooks that allow
  attaching of sleepable BPF programs. When comprehensive analysis of LSM hooks
  will be done this allowlist will be extended.
- added patch 1 that fixes prototypes of two mm functions to reliably work with
  error injection. It's also necessary for resolve_btfids tool to recognize
  these two funcs, but that's secondary.

v1->v2:
- split fmod_ret fix into separate patch
- added denylist

v1:
This patch set introduces the minimal viable support for sleepable bpf programs.
In this patch only fentry/fexit/fmod_ret and lsm progs can be sleepable.
Only array and pre-allocated hash and lru maps allowed.

Here is 'perf report' difference of sleepable vs non-sleepable:
   3.86%  bench     [k] __srcu_read_unlock
   3.22%  bench     [k] __srcu_read_lock
   0.92%  bench     [k] bpf_prog_740d4210cdcd99a3_bench_trigger_fentry_sleep
   0.50%  bench     [k] bpf_trampoline_10297
   0.26%  bench     [k] __bpf_prog_exit_sleepable
   0.21%  bench     [k] __bpf_prog_enter_sleepable
vs
   0.88%  bench     [k] bpf_prog_740d4210cdcd99a3_bench_trigger_fentry
   0.84%  bench     [k] bpf_trampoline_10297
   0.13%  bench     [k] __bpf_prog_enter
   0.12%  bench     [k] __bpf_prog_exit
vs
   0.79%  bench     [k] bpf_prog_740d4210cdcd99a3_bench_trigger_fentry_sleep
   0.72%  bench     [k] bpf_trampoline_10381
   0.31%  bench     [k] __bpf_prog_exit_sleepable
   0.29%  bench     [k] __bpf_prog_enter_sleepable

Sleepable vs non-sleepable program invocation overhead is only marginally higher
due to rcu_trace. srcu approach is much slower.
====================

Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parents d557ea39 e68a1445
Loading
Loading
Loading
Loading
+21 −11
Original line number Diff line number Diff line
@@ -1379,10 +1379,15 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
	u8 *prog = *pprog;
	int cnt = 0;

	if (p->aux->sleepable) {
		if (emit_call(&prog, __bpf_prog_enter_sleepable, prog))
			return -EINVAL;
	} else {
		if (emit_call(&prog, __bpf_prog_enter, prog))
			return -EINVAL;
		/* remember prog start time returned by __bpf_prog_enter */
		emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);
	}

	/* arg1: lea rdi, [rbp - stack_size] */
	EMIT4(0x48, 0x8D, 0x7D, -stack_size);
@@ -1402,6 +1407,10 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
	if (mod_ret)
		emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);

	if (p->aux->sleepable) {
		if (emit_call(&prog, __bpf_prog_exit_sleepable, prog))
			return -EINVAL;
	} else {
		/* arg1: mov rdi, progs[i] */
		emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32,
			       (u32) (long) p);
@@ -1409,6 +1418,7 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
		emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
		if (emit_call(&prog, __bpf_prog_exit, prog))
			return -EINVAL;
	}

	*pprog = prog;
	return 0;
+4 −0
Original line number Diff line number Diff line
@@ -539,6 +539,8 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end,
/* these two functions are called from generated trampoline */
u64 notrace __bpf_prog_enter(void);
void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start);
void notrace __bpf_prog_enter_sleepable(void);
void notrace __bpf_prog_exit_sleepable(void);

struct bpf_ksym {
	unsigned long		 start;
@@ -734,6 +736,7 @@ struct bpf_prog_aux {
	bool offload_requested;
	bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */
	bool func_proto_unreliable;
	bool sleepable;
	enum bpf_tramp_prog_type trampoline_prog_type;
	struct bpf_trampoline *trampoline;
	struct hlist_node tramp_hlist;
@@ -1781,6 +1784,7 @@ extern const struct bpf_func_proto bpf_skc_to_tcp_sock_proto;
extern const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto;
extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto;
extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto;
extern const struct bpf_func_proto bpf_copy_from_user_proto;

const struct bpf_func_proto *bpf_tracing_func_proto(
	enum bpf_func_id func_id, const struct bpf_prog *prog);
+16 −0
Original line number Diff line number Diff line
@@ -346,6 +346,14 @@ enum bpf_link_type {
/* The verifier internal test flag. Behavior is undefined */
#define BPF_F_TEST_STATE_FREQ	(1U << 3)

/* If BPF_F_SLEEPABLE is used in BPF_PROG_LOAD command, the verifier will
 * restrict map and helper usage for such programs. Sleepable BPF programs can
 * only be attached to hooks where kernel execution context allows sleeping.
 * Such programs are allowed to use helpers that may sleep like
 * bpf_copy_from_user().
 */
#define BPF_F_SLEEPABLE		(1U << 4)

/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
 * two extensions:
 *
@@ -3561,6 +3569,13 @@ union bpf_attr {
 *		On success, the strictly positive length of the string,
 *		including the trailing NUL character. On error, a negative
 *		value.
 *
 * long bpf_copy_from_user(void *dst, u32 size, const void *user_ptr)
 * 	Description
 * 		Read *size* bytes from user space address *user_ptr* and store
 * 		the data in *dst*. This is a wrapper of copy_from_user().
 * 	Return
 * 		0 on success, or a negative error in case of failure.
 */
#define __BPF_FUNC_MAPPER(FN)		\
	FN(unspec),			\
@@ -3711,6 +3726,7 @@ union bpf_attr {
	FN(inode_storage_get),		\
	FN(inode_storage_delete),	\
	FN(d_path),			\
	FN(copy_from_user),		\
	/* */

/* integer value in 'imm' field of BPF_CALL instruction selects which helper
+1 −0
Original line number Diff line number Diff line
@@ -1691,6 +1691,7 @@ config BPF_SYSCALL
	bool "Enable bpf() system call"
	select BPF
	select IRQ_WORK
	select TASKS_TRACE_RCU
	default n
	help
	  Enable the bpf() system call that allows to manipulate eBPF
+1 −0
Original line number Diff line number Diff line
@@ -10,6 +10,7 @@
#include <linux/filter.h>
#include <linux/perf_event.h>
#include <uapi/linux/btf.h>
#include <linux/rcupdate_trace.h>

#include "map_in_map.h"

Loading