Commit 9ce60102 authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'bpf_modify_ret'

KP Singh says:

====================
v3 -> v4:

* Fix a memory leak noticed by Daniel.

v2 -> v3:

* bpf_trampoline_update_progs -> bpf_trampoline_get_progs + const
  qualification.
* Typos in commit messages.
* Added Andrii's Acks.

v1 -> v2:

* Adressed Andrii's feedback.
* Fixed a bug that Alexei noticed about nop generation.
* Rebase.

This was brought up in the KRSI v4 discussion and found to be useful
both for security and tracing programs.

  https://lore.kernel.org/bpf/20200225193108.GB22391@chromium.org/



The modify_return programs are allowed for security hooks (with an
extra CAP_MAC_ADMIN check) and functions whitelisted for error
injection (ALLOW_ERROR_INJECTION).

The "security_" check is expected to be cleaned up with the KRSI patch
series.

Here is an example of how a fmod_ret program behaves:

int func_to_be_attached(int a, int b)
{  <--- do_fentry

do_fmod_ret:
   <update ret by calling fmod_ret>
   if (ret != 0)
        goto do_fexit;

original_function:

    <side_effects_happen_here>

}  <--- do_fexit

ALLOW_ERROR_INJECTION(func_to_be_attached, ERRNO)

The fmod_ret program attached to this function can be defined as:

SEC("fmod_ret/func_to_be_attached")
int BPF_PROG(func_name, int a, int b, int ret)
{
        // This will skip the original function logic.
        return -1;
}
====================

Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents cc6fa771 3d08b6f2
Loading
Loading
Loading
Loading
+205 −74
Original line number Diff line number Diff line
@@ -1361,13 +1361,12 @@ static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_args,
			 -(stack_size - i * 8));
}

static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
		      struct bpf_prog **progs, int prog_cnt, int stack_size)
static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
			   struct bpf_prog *p, int stack_size, bool mod_ret)
{
	u8 *prog = *pprog;
	int cnt = 0, i;
	int cnt = 0;

	for (i = 0; i < prog_cnt; i++) {
	if (emit_call(&prog, __bpf_prog_enter, prog))
		return -EINVAL;
	/* remember prog start time returned by __bpf_prog_enter */
@@ -1376,26 +1375,151 @@ static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
	/* arg1: lea rdi, [rbp - stack_size] */
	EMIT4(0x48, 0x8D, 0x7D, -stack_size);
	/* arg2: progs[i]->insnsi for interpreter */
		if (!progs[i]->jited)
	if (!p->jited)
		emit_mov_imm64(&prog, BPF_REG_2,
				       (long) progs[i]->insnsi >> 32,
				       (u32) (long) progs[i]->insnsi);
			       (long) p->insnsi >> 32,
			       (u32) (long) p->insnsi);
	/* call JITed bpf program or interpreter */
		if (emit_call(&prog, progs[i]->bpf_func, prog))
	if (emit_call(&prog, p->bpf_func, prog))
		return -EINVAL;

	/* BPF_TRAMP_MODIFY_RETURN trampolines can modify the return
	 * of the previous call which is then passed on the stack to
	 * the next BPF program.
	 */
	if (mod_ret)
		emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);

	/* arg1: mov rdi, progs[i] */
		emit_mov_imm64(&prog, BPF_REG_1, (long) progs[i] >> 32,
			       (u32) (long) progs[i]);
	emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32,
		       (u32) (long) p);
	/* arg2: mov rsi, rbx <- start time in nsec */
	emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
	if (emit_call(&prog, __bpf_prog_exit, prog))
		return -EINVAL;

	*pprog = prog;
	return 0;
}

static void emit_nops(u8 **pprog, unsigned int len)
{
	unsigned int i, noplen;
	u8 *prog = *pprog;
	int cnt = 0;

	while (len > 0) {
		noplen = len;

		if (noplen > ASM_NOP_MAX)
			noplen = ASM_NOP_MAX;

		for (i = 0; i < noplen; i++)
			EMIT1(ideal_nops[noplen][i]);
		len -= noplen;
	}

	*pprog = prog;
}

static void emit_align(u8 **pprog, u32 align)
{
	u8 *target, *prog = *pprog;

	target = PTR_ALIGN(prog, align);
	if (target != prog)
		emit_nops(&prog, target - prog);

	*pprog = prog;
}

static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond)
{
	u8 *prog = *pprog;
	int cnt = 0;
	s64 offset;

	offset = func - (ip + 2 + 4);
	if (!is_simm32(offset)) {
		pr_err("Target %p is out of range\n", func);
		return -EINVAL;
	}
	EMIT2_off32(0x0F, jmp_cond + 0x10, offset);
	*pprog = prog;
	return 0;
}

static int emit_mod_ret_check_imm8(u8 **pprog, int value)
{
	u8 *prog = *pprog;
	int cnt = 0;

	if (!is_imm8(value))
		return -EINVAL;

	if (value == 0)
		EMIT2(0x85, add_2reg(0xC0, BPF_REG_0, BPF_REG_0));
	else
		EMIT3(0x83, add_1reg(0xF8, BPF_REG_0), value);

	*pprog = prog;
	return 0;
}

static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
		      struct bpf_tramp_progs *tp, int stack_size)
{
	int i;
	u8 *prog = *pprog;

	for (i = 0; i < tp->nr_progs; i++) {
		if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, false))
			return -EINVAL;
	}
	*pprog = prog;
	return 0;
}

static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
			      struct bpf_tramp_progs *tp, int stack_size,
			      u8 **branches)
{
	u8 *prog = *pprog;
	int i;

	/* The first fmod_ret program will receive a garbage return value.
	 * Set this to 0 to avoid confusing the program.
	 */
	emit_mov_imm32(&prog, false, BPF_REG_0, 0);
	emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
	for (i = 0; i < tp->nr_progs; i++) {
		if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, true))
			return -EINVAL;

		/* Generate a branch:
		 *
		 * if (ret !=  0)
		 *	goto do_fexit;
		 *
		 * If needed this can be extended to any integer value which can
		 * be passed by user-space when the program is loaded.
		 */
		if (emit_mod_ret_check_imm8(&prog, 0))
			return -EINVAL;

		/* Save the location of the branch and Generate 6 nops
		 * (4 bytes for an offset and 2 bytes for the jump) These nops
		 * are replaced with a conditional jump once do_fexit (i.e. the
		 * start of the fexit invocation) is finalized.
		 */
		branches[i] = prog;
		emit_nops(&prog, 4 + 2);
	}

	*pprog = prog;
	return 0;
}

/* Example:
 * __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev);
 * its 'struct btf_func_model' will be nr_args=2
@@ -1458,12 +1582,15 @@ static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
 */
int arch_prepare_bpf_trampoline(void *image, void *image_end,
				const struct btf_func_model *m, u32 flags,
				struct bpf_prog **fentry_progs, int fentry_cnt,
				struct bpf_prog **fexit_progs, int fexit_cnt,
				struct bpf_tramp_progs *tprogs,
				void *orig_call)
{
	int cnt = 0, nr_args = m->nr_args;
	int ret, i, cnt = 0, nr_args = m->nr_args;
	int stack_size = nr_args * 8;
	struct bpf_tramp_progs *fentry = &tprogs[BPF_TRAMP_FENTRY];
	struct bpf_tramp_progs *fexit = &tprogs[BPF_TRAMP_FEXIT];
	struct bpf_tramp_progs *fmod_ret = &tprogs[BPF_TRAMP_MODIFY_RETURN];
	u8 **branches = NULL;
	u8 *prog;

	/* x86-64 supports up to 6 arguments. 7+ can be added in the future */
@@ -1492,28 +1619,64 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end,

	save_regs(m, &prog, nr_args, stack_size);

	if (fentry_cnt)
		if (invoke_bpf(m, &prog, fentry_progs, fentry_cnt, stack_size))
	if (fentry->nr_progs)
		if (invoke_bpf(m, &prog, fentry, stack_size))
			return -EINVAL;

	if (fmod_ret->nr_progs) {
		branches = kcalloc(fmod_ret->nr_progs, sizeof(u8 *),
				   GFP_KERNEL);
		if (!branches)
			return -ENOMEM;

		if (invoke_bpf_mod_ret(m, &prog, fmod_ret, stack_size,
				       branches)) {
			ret = -EINVAL;
			goto cleanup;
		}
	}

	if (flags & BPF_TRAMP_F_CALL_ORIG) {
		if (fentry_cnt)
		if (fentry->nr_progs || fmod_ret->nr_progs)
			restore_regs(m, &prog, nr_args, stack_size);

		/* call original function */
		if (emit_call(&prog, orig_call, prog))
			return -EINVAL;
		if (emit_call(&prog, orig_call, prog)) {
			ret = -EINVAL;
			goto cleanup;
		}
		/* remember return value in a stack for bpf prog to access */
		emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
	}

	if (fexit_cnt)
		if (invoke_bpf(m, &prog, fexit_progs, fexit_cnt, stack_size))
			return -EINVAL;
	if (fmod_ret->nr_progs) {
		/* From Intel 64 and IA-32 Architectures Optimization
		 * Reference Manual, 3.4.1.4 Code Alignment, Assembly/Compiler
		 * Coding Rule 11: All branch targets should be 16-byte
		 * aligned.
		 */
		emit_align(&prog, 16);
		/* Update the branches saved in invoke_bpf_mod_ret with the
		 * aligned address of do_fexit.
		 */
		for (i = 0; i < fmod_ret->nr_progs; i++)
			emit_cond_near_jump(&branches[i], prog, branches[i],
					    X86_JNE);
	}

	if (fexit->nr_progs)
		if (invoke_bpf(m, &prog, fexit, stack_size)) {
			ret = -EINVAL;
			goto cleanup;
		}

	if (flags & BPF_TRAMP_F_RESTORE_REGS)
		restore_regs(m, &prog, nr_args, stack_size);

	/* This needs to be done regardless. If there were fmod_ret programs,
	 * the return value is only updated on the stack and still needs to be
	 * restored to R0.
	 */
	if (flags & BPF_TRAMP_F_CALL_ORIG)
		/* restore original return value back into RAX */
		emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8);
@@ -1525,45 +1688,15 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end,
		EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */
	EMIT1(0xC3); /* ret */
	/* Make sure the trampoline generation logic doesn't overflow */
	if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY))
		return -EFAULT;
	return prog - (u8 *)image;
	if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY)) {
		ret = -EFAULT;
		goto cleanup;
	}
	ret = prog - (u8 *)image;

static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond)
{
	u8 *prog = *pprog;
	int cnt = 0;
	s64 offset;

	offset = func - (ip + 2 + 4);
	if (!is_simm32(offset)) {
		pr_err("Target %p is out of range\n", func);
		return -EINVAL;
	}
	EMIT2_off32(0x0F, jmp_cond + 0x10, offset);
	*pprog = prog;
	return 0;
}

static void emit_nops(u8 **pprog, unsigned int len)
{
	unsigned int i, noplen;
	u8 *prog = *pprog;
	int cnt = 0;

	while (len > 0) {
		noplen = len;

		if (noplen > ASM_NOP_MAX)
			noplen = ASM_NOP_MAX;

		for (i = 0; i < noplen; i++)
			EMIT1(ideal_nops[noplen][i]);
		len -= noplen;
	}

	*pprog = prog;
cleanup:
	kfree(branches);
	return ret;
}

static int emit_fallback_jump(u8 **pprog)
@@ -1588,7 +1721,7 @@ static int emit_fallback_jump(u8 **pprog)

static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs)
{
	u8 *jg_reloc, *jg_target, *prog = *pprog;
	u8 *jg_reloc, *prog = *pprog;
	int pivot, err, jg_bytes = 1, cnt = 0;
	s64 jg_offset;

@@ -1643,9 +1776,7 @@ static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs)
	 * Coding Rule 11: All branch targets should be 16-byte
	 * aligned.
	 */
	jg_target = PTR_ALIGN(prog, 16);
	if (jg_target != prog)
		emit_nops(&prog, jg_target - prog);
	emit_align(&prog, 16);
	jg_offset = prog - jg_reloc;
	emit_code(jg_reloc - jg_bytes, jg_offset, jg_bytes);

+22 −2
Original line number Diff line number Diff line
@@ -433,6 +433,16 @@ struct btf_func_model {
 */
#define BPF_TRAMP_F_SKIP_FRAME		BIT(2)

/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
 * bytes on x86.  Pick a number to fit into BPF_IMAGE_SIZE / 2
 */
#define BPF_MAX_TRAMP_PROGS 40

struct bpf_tramp_progs {
	struct bpf_prog *progs[BPF_MAX_TRAMP_PROGS];
	int nr_progs;
};

/* Different use cases for BPF trampoline:
 * 1. replace nop at the function entry (kprobe equivalent)
 *    flags = BPF_TRAMP_F_RESTORE_REGS
@@ -455,8 +465,7 @@ struct btf_func_model {
 */
int arch_prepare_bpf_trampoline(void *image, void *image_end,
				const struct btf_func_model *m, u32 flags,
				struct bpf_prog **fentry_progs, int fentry_cnt,
				struct bpf_prog **fexit_progs, int fexit_cnt,
				struct bpf_tramp_progs *tprogs,
				void *orig_call);
/* these two functions are called from generated trampoline */
u64 notrace __bpf_prog_enter(void);
@@ -465,6 +474,7 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start);
enum bpf_tramp_prog_type {
	BPF_TRAMP_FENTRY,
	BPF_TRAMP_FEXIT,
	BPF_TRAMP_MODIFY_RETURN,
	BPF_TRAMP_MAX,
	BPF_TRAMP_REPLACE, /* more than MAX */
};
@@ -1146,6 +1156,9 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
			  union bpf_attr __user *uattr);
int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
			  union bpf_attr __user *uattr);
int bpf_prog_test_run_tracing(struct bpf_prog *prog,
			      const union bpf_attr *kattr,
			      union bpf_attr __user *uattr);
int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
				     const union bpf_attr *kattr,
				     union bpf_attr __user *uattr);
@@ -1303,6 +1316,13 @@ static inline int bpf_prog_test_run_skb(struct bpf_prog *prog,
	return -ENOTSUPP;
}

static inline int bpf_prog_test_run_tracing(struct bpf_prog *prog,
					    const union bpf_attr *kattr,
					    union bpf_attr __user *uattr)
{
	return -ENOTSUPP;
}

static inline int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
						   const union bpf_attr *kattr,
						   union bpf_attr __user *uattr)
+1 −0
Original line number Diff line number Diff line
@@ -210,6 +210,7 @@ enum bpf_attach_type {
	BPF_TRACE_RAW_TP,
	BPF_TRACE_FENTRY,
	BPF_TRACE_FEXIT,
	BPF_MODIFY_RETURN,
	__MAX_BPF_ATTACH_TYPE
};

+9 −1
Original line number Diff line number Diff line
@@ -320,6 +320,7 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
	struct bpf_struct_ops_value *uvalue, *kvalue;
	const struct btf_member *member;
	const struct btf_type *t = st_ops->type;
	struct bpf_tramp_progs *tprogs = NULL;
	void *udata, *kdata;
	int prog_fd, err = 0;
	void *image;
@@ -343,6 +344,10 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
	if (uvalue->state || refcount_read(&uvalue->refcnt))
		return -EINVAL;

	tprogs = kcalloc(BPF_TRAMP_MAX, sizeof(*tprogs), GFP_KERNEL);
	if (!tprogs)
		return -ENOMEM;

	uvalue = (struct bpf_struct_ops_value *)st_map->uvalue;
	kvalue = (struct bpf_struct_ops_value *)&st_map->kvalue;

@@ -425,10 +430,12 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
			goto reset_unlock;
		}

		tprogs[BPF_TRAMP_FENTRY].progs[0] = prog;
		tprogs[BPF_TRAMP_FENTRY].nr_progs = 1;
		err = arch_prepare_bpf_trampoline(image,
						  st_map->image + PAGE_SIZE,
						  &st_ops->func_models[i], 0,
						  &prog, 1, NULL, 0, NULL);
						  tprogs, NULL);
		if (err < 0)
			goto reset_unlock;

@@ -469,6 +476,7 @@ reset_unlock:
	memset(uvalue, 0, map->value_size);
	memset(kvalue, 0, map->value_size);
unlock:
	kfree(tprogs);
	mutex_unlock(&st_map->lock);
	return err;
}
+20 −7
Original line number Diff line number Diff line
@@ -3710,13 +3710,26 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
		nr_args--;
	}

	if (prog->expected_attach_type == BPF_TRACE_FEXIT &&
	    arg == nr_args) {
	if (arg == nr_args) {
		if (prog->expected_attach_type == BPF_TRACE_FEXIT) {
			if (!t)
			/* Default prog with 5 args. 6th arg is retval. */
				return true;
		/* function return type */
			t = btf_type_by_id(btf, t->type);
		} else if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
			/* For now the BPF_MODIFY_RETURN can only be attached to
			 * functions that return an int.
			 */
			if (!t)
				return false;

			t = btf_type_skip_modifiers(btf, t->type, NULL);
			if (!btf_type_is_int(t)) {
				bpf_log(log,
					"ret type %s not allowed for fmod_ret\n",
					btf_kind_str[BTF_INFO_KIND(t->info)]);
				return false;
			}
		}
	} else if (arg >= nr_args) {
		bpf_log(log, "func '%s' doesn't have %d-th argument\n",
			tname, arg + 1);
Loading