Commit e9f02a80 authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'trampoline-fixes'

Jiri Olsa says:

====================
hi,
sending 2 fixes to fix kernel support for loading
trampoline programs in bcc/bpftrace and allow to
unwind through trampoline/dispatcher.

Original rfc post [1].

Speedup output of perf bench while running klockstat.py
on kprobes vs trampolines:

    Without:
            $ perf bench sched messaging -l 50000
            ...
                 Total time: 18.571 [sec]

    With current kprobe tracing:
            $ perf bench sched messaging -l 50000
            ...
                 Total time: 183.395 [sec]

    With kfunc tracing:
            $ perf bench sched messaging -l 50000
            ...
                 Total time: 39.773 [sec]

v4 changes:
  - rebased on latest bpf-next/master
  - removed image tree mutex and use trampoline_mutex instead
  - checking directly for string pointer in patch 1 [Alexei]
  - skipped helpers patches, as they are no longer needed [Alexei]

v3 changes:
  - added ack from John Fastabend for patch 1
  - move out is_bpf_image_address from is_bpf_text_address call [David]

v2 changes:
  - make the unwind work for dispatcher as well
  - added test for allowed trampolines count
  - used raw tp pt_regs nest-arrays for trampoline helpers

thanks,
jirka

[1] https://lore.kernel.org/netdev/20191229143740.29143-1-jolsa@kernel.org/


====================

Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 35b9211c d633d579
Loading
Loading
Loading
Loading
+11 −1
Original line number Diff line number Diff line
@@ -525,7 +525,6 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key);
int bpf_trampoline_link_prog(struct bpf_prog *prog);
int bpf_trampoline_unlink_prog(struct bpf_prog *prog);
void bpf_trampoline_put(struct bpf_trampoline *tr);
void *bpf_jit_alloc_exec_page(void);
#define BPF_DISPATCHER_INIT(name) {			\
	.mutex = __MUTEX_INITIALIZER(name.mutex),	\
	.func = &name##func,				\
@@ -557,6 +556,13 @@ void *bpf_jit_alloc_exec_page(void);
#define BPF_DISPATCHER_PTR(name) (&name)
void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
				struct bpf_prog *to);
struct bpf_image {
	struct latch_tree_node tnode;
	unsigned char data[];
};
#define BPF_IMAGE_SIZE (PAGE_SIZE - sizeof(struct bpf_image))
bool is_bpf_image_address(unsigned long address);
void *bpf_image_alloc(void);
#else
static inline struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
{
@@ -578,6 +584,10 @@ static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {}
static inline void bpf_dispatcher_change_prog(struct bpf_dispatcher *d,
					      struct bpf_prog *from,
					      struct bpf_prog *to) {}
static inline bool is_bpf_image_address(unsigned long address)
{
	return false;
}
#endif

struct bpf_func_info_aux {
+16 −0
Original line number Diff line number Diff line
@@ -3669,6 +3669,19 @@ struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog)
	}
}

static bool is_string_ptr(struct btf *btf, const struct btf_type *t)
{
	/* t comes in already as a pointer */
	t = btf_type_by_id(btf, t->type);

	/* allow const */
	if (BTF_INFO_KIND(t->info) == BTF_KIND_CONST)
		t = btf_type_by_id(btf, t->type);

	/* char, signed char, unsigned char */
	return btf_type_is_int(t) && t->size == 1;
}

bool btf_ctx_access(int off, int size, enum bpf_access_type type,
		    const struct bpf_prog *prog,
		    struct bpf_insn_access_aux *info)
@@ -3735,6 +3748,9 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
		 */
		return true;

	if (is_string_ptr(btf, t))
		return true;

	/* this is a pointer to another type */
	info->reg_type = PTR_TO_BTF_ID;

+2 −2
Original line number Diff line number Diff line
@@ -113,7 +113,7 @@ static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs)
		noff = 0;
	} else {
		old = d->image + d->image_off;
		noff = d->image_off ^ (PAGE_SIZE / 2);
		noff = d->image_off ^ (BPF_IMAGE_SIZE / 2);
	}

	new = d->num_progs ? d->image + noff : NULL;
@@ -140,7 +140,7 @@ void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,

	mutex_lock(&d->mutex);
	if (!d->image) {
		d->image = bpf_jit_alloc_exec_page();
		d->image = bpf_image_alloc();
		if (!d->image)
			goto out;
	}
+72 −8
Original line number Diff line number Diff line
@@ -4,6 +4,7 @@
#include <linux/bpf.h>
#include <linux/filter.h>
#include <linux/ftrace.h>
#include <linux/rbtree_latch.h>

/* dummy _ops. The verifier will operate on target program's ops. */
const struct bpf_verifier_ops bpf_extension_verifier_ops = {
@@ -16,11 +17,12 @@ const struct bpf_prog_ops bpf_extension_prog_ops = {
#define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS)

static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE];
static struct latch_tree_root image_tree __cacheline_aligned;

/* serializes access to trampoline_table */
/* serializes access to trampoline_table and image_tree */
static DEFINE_MUTEX(trampoline_mutex);

void *bpf_jit_alloc_exec_page(void)
static void *bpf_jit_alloc_exec_page(void)
{
	void *image;

@@ -36,6 +38,64 @@ void *bpf_jit_alloc_exec_page(void)
	return image;
}

static __always_inline bool image_tree_less(struct latch_tree_node *a,
				      struct latch_tree_node *b)
{
	struct bpf_image *ia = container_of(a, struct bpf_image, tnode);
	struct bpf_image *ib = container_of(b, struct bpf_image, tnode);

	return ia < ib;
}

static __always_inline int image_tree_comp(void *addr, struct latch_tree_node *n)
{
	void *image = container_of(n, struct bpf_image, tnode);

	if (addr < image)
		return -1;
	if (addr >= image + PAGE_SIZE)
		return 1;

	return 0;
}

static const struct latch_tree_ops image_tree_ops = {
	.less	= image_tree_less,
	.comp	= image_tree_comp,
};

static void *__bpf_image_alloc(bool lock)
{
	struct bpf_image *image;

	image = bpf_jit_alloc_exec_page();
	if (!image)
		return NULL;

	if (lock)
		mutex_lock(&trampoline_mutex);
	latch_tree_insert(&image->tnode, &image_tree, &image_tree_ops);
	if (lock)
		mutex_unlock(&trampoline_mutex);
	return image->data;
}

void *bpf_image_alloc(void)
{
	return __bpf_image_alloc(true);
}

bool is_bpf_image_address(unsigned long addr)
{
	bool ret;

	rcu_read_lock();
	ret = latch_tree_find((void *) addr, &image_tree, &image_tree_ops) != NULL;
	rcu_read_unlock();

	return ret;
}

struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
{
	struct bpf_trampoline *tr;
@@ -56,7 +116,7 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
		goto out;

	/* is_root was checked earlier. No need for bpf_jit_charge_modmem() */
	image = bpf_jit_alloc_exec_page();
	image = __bpf_image_alloc(false);
	if (!image) {
		kfree(tr);
		tr = NULL;
@@ -131,14 +191,14 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
}

/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
 * bytes on x86.  Pick a number to fit into PAGE_SIZE / 2
 * bytes on x86.  Pick a number to fit into BPF_IMAGE_SIZE / 2
 */
#define BPF_MAX_TRAMP_PROGS 40

static int bpf_trampoline_update(struct bpf_trampoline *tr)
{
	void *old_image = tr->image + ((tr->selector + 1) & 1) * PAGE_SIZE/2;
	void *new_image = tr->image + (tr->selector & 1) * PAGE_SIZE/2;
	void *old_image = tr->image + ((tr->selector + 1) & 1) * BPF_IMAGE_SIZE/2;
	void *new_image = tr->image + (tr->selector & 1) * BPF_IMAGE_SIZE/2;
	struct bpf_prog *progs_to_run[BPF_MAX_TRAMP_PROGS];
	int fentry_cnt = tr->progs_cnt[BPF_TRAMP_FENTRY];
	int fexit_cnt = tr->progs_cnt[BPF_TRAMP_FEXIT];
@@ -174,7 +234,7 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr)
	 */
	synchronize_rcu_tasks();

	err = arch_prepare_bpf_trampoline(new_image, new_image + PAGE_SIZE / 2,
	err = arch_prepare_bpf_trampoline(new_image, new_image + BPF_IMAGE_SIZE / 2,
					  &tr->func.model, flags,
					  fentry, fentry_cnt,
					  fexit, fexit_cnt,
@@ -284,6 +344,8 @@ out:

void bpf_trampoline_put(struct bpf_trampoline *tr)
{
	struct bpf_image *image;

	if (!tr)
		return;
	mutex_lock(&trampoline_mutex);
@@ -294,9 +356,11 @@ void bpf_trampoline_put(struct bpf_trampoline *tr)
		goto out;
	if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT])))
		goto out;
	image = container_of(tr->image, struct bpf_image, data);
	latch_tree_erase(&image->tnode, &image_tree, &image_tree_ops);
	/* wait for tasks to get out of trampoline before freeing it */
	synchronize_rcu_tasks();
	bpf_jit_free_exec(tr->image);
	bpf_jit_free_exec(image);
	hlist_del(&tr->hlist);
	kfree(tr);
out:
+5 −2
Original line number Diff line number Diff line
@@ -131,8 +131,9 @@ int kernel_text_address(unsigned long addr)
	 * triggers a stack trace, or a WARN() that happens during
	 * coming back from idle, or cpu on or offlining.
	 *
	 * is_module_text_address() as well as the kprobe slots
	 * and is_bpf_text_address() require RCU to be watching.
	 * is_module_text_address() as well as the kprobe slots,
	 * is_bpf_text_address() and is_bpf_image_address require
	 * RCU to be watching.
	 */
	no_rcu = !rcu_is_watching();

@@ -148,6 +149,8 @@ int kernel_text_address(unsigned long addr)
		goto out;
	if (is_bpf_text_address(addr))
		goto out;
	if (is_bpf_image_address(addr))
		goto out;
	ret = 0;
out:
	if (no_rcu)
Loading