Commit 8f4605ac authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'bpf_iter-fixes'



Yonghong Song says:

====================
Commit ae24345d ("bpf: Implement an interface to register
bpf_iter targets") and its subsequent commits in the same patch set
introduced bpf iterator, a way to run bpf program when iterating
kernel data structures.

This patch set addressed some followup issues. One big change
is to allow target to pass ctx arg register types to verifier
for verification purpose. Please see individual patch for details.

Changelogs:
  v1 -> v2:
    . add "const" qualifier to struct bpf_iter_reg for
      bpf_iter_[un]reg_target, and this results in
      additional "const" qualifiers in some other places
    . drop the patch which will issue WARN_ONCE if
      seq_ops->show() returns a positive value.
      If this does happen, code review should spot
      this or author does know what he is doing.
      In the future, we do want to implement a
      mechanism to find out all registered targets
      so we will be aware of new additions.
====================

Acked-by: default avatarAndrii Nakryiko <andriin@fb.com>
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 0aa0372f 03421a92
Loading
Loading
Loading
Loading
+16 −6
Original line number Diff line number Diff line
@@ -643,6 +643,12 @@ struct bpf_jit_poke_descriptor {
	u16 reason;
};

/* reg_type info for ctx arguments */
struct bpf_ctx_arg_aux {
	u32 offset;
	enum bpf_reg_type reg_type;
};

struct bpf_prog_aux {
	atomic64_t refcnt;
	u32 used_map_cnt;
@@ -654,12 +660,13 @@ struct bpf_prog_aux {
	u32 func_cnt; /* used by non-func prog as the number of func progs */
	u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */
	u32 attach_btf_id; /* in-kernel BTF type id to attach to */
	u32 ctx_arg_info_size;
	const struct bpf_ctx_arg_aux *ctx_arg_info;
	struct bpf_prog *linked_prog;
	bool verifier_zext; /* Zero extensions has been inserted by verifier. */
	bool offload_requested;
	bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */
	bool func_proto_unreliable;
	bool btf_id_or_null_non0_off;
	enum bpf_tramp_prog_type trampoline_prog_type;
	struct bpf_trampoline *trampoline;
	struct hlist_node tramp_hlist;
@@ -1131,20 +1138,23 @@ struct bpf_link *bpf_link_get_from_fd(u32 ufd);
int bpf_obj_pin_user(u32 ufd, const char __user *pathname);
int bpf_obj_get_user(const char __user *pathname, int flags);

#define BPF_ITER_FUNC_PREFIX "__bpf_iter__"
#define BPF_ITER_FUNC_PREFIX "bpf_iter_"
#define DEFINE_BPF_ITER_FUNC(target, args...)			\
	extern int __bpf_iter__ ## target(args);		\
	int __init __bpf_iter__ ## target(args) { return 0; }
	extern int bpf_iter_ ## target(args);			\
	int __init bpf_iter_ ## target(args) { return 0; }

typedef int (*bpf_iter_init_seq_priv_t)(void *private_data);
typedef void (*bpf_iter_fini_seq_priv_t)(void *private_data);

#define BPF_ITER_CTX_ARG_MAX 2
struct bpf_iter_reg {
	const char *target;
	const struct seq_operations *seq_ops;
	bpf_iter_init_seq_priv_t init_seq_private;
	bpf_iter_fini_seq_priv_t fini_seq_private;
	u32 seq_priv_size;
	u32 ctx_arg_info_size;
	struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX];
};

struct bpf_iter_meta {
@@ -1153,8 +1163,8 @@ struct bpf_iter_meta {
	u64 seq_num;
};

int bpf_iter_reg_target(struct bpf_iter_reg *reg_info);
void bpf_iter_unreg_target(const char *target);
int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info);
void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info);
bool bpf_iter_prog_supported(struct bpf_prog *prog);
int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
int bpf_iter_new_fd(struct bpf_link *link);
+7 −0
Original line number Diff line number Diff line
@@ -540,6 +540,13 @@ static inline bool fib6_metric_locked(struct fib6_info *f6i, int metric)
	return !!(f6i->fib6_metrics->metrics[RTAX_LOCK - 1] & (1 << metric));
}

#if IS_BUILTIN(CONFIG_IPV6) && defined(CONFIG_BPF_SYSCALL)
struct bpf_iter__ipv6_route {
	__bpf_md_ptr(struct bpf_iter_meta *, meta);
	__bpf_md_ptr(struct fib6_info *, rt);
};
#endif

#ifdef CONFIG_IPV6_MULTIPLE_TABLES
static inline bool fib6_has_custom_rules(const struct net *net)
{
+29 −20
Original line number Diff line number Diff line
@@ -8,11 +8,7 @@

struct bpf_iter_target_info {
	struct list_head list;
	const char *target;
	const struct seq_operations *seq_ops;
	bpf_iter_init_seq_priv_t init_seq_private;
	bpf_iter_fini_seq_priv_t fini_seq_private;
	u32 seq_priv_size;
	const struct bpf_iter_reg *reg_info;
	u32 btf_id;	/* cached value */
};

@@ -222,8 +218,8 @@ static int iter_release(struct inode *inode, struct file *file)
	iter_priv = container_of(seq->private, struct bpf_iter_priv_data,
				 target_private);

	if (iter_priv->tinfo->fini_seq_private)
		iter_priv->tinfo->fini_seq_private(seq->private);
	if (iter_priv->tinfo->reg_info->fini_seq_private)
		iter_priv->tinfo->reg_info->fini_seq_private(seq->private);

	bpf_prog_put(iter_priv->prog);
	seq->private = iter_priv;
@@ -238,7 +234,12 @@ const struct file_operations bpf_iter_fops = {
	.release	= iter_release,
};

int bpf_iter_reg_target(struct bpf_iter_reg *reg_info)
/* The argument reg_info will be cached in bpf_iter_target_info.
 * The common practice is to declare target reg_info as
 * a const static variable and passed as an argument to
 * bpf_iter_reg_target().
 */
int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info)
{
	struct bpf_iter_target_info *tinfo;

@@ -246,11 +247,7 @@ int bpf_iter_reg_target(struct bpf_iter_reg *reg_info)
	if (!tinfo)
		return -ENOMEM;

	tinfo->target = reg_info->target;
	tinfo->seq_ops = reg_info->seq_ops;
	tinfo->init_seq_private = reg_info->init_seq_private;
	tinfo->fini_seq_private = reg_info->fini_seq_private;
	tinfo->seq_priv_size = reg_info->seq_priv_size;
	tinfo->reg_info = reg_info;
	INIT_LIST_HEAD(&tinfo->list);

	mutex_lock(&targets_mutex);
@@ -260,14 +257,14 @@ int bpf_iter_reg_target(struct bpf_iter_reg *reg_info)
	return 0;
}

void bpf_iter_unreg_target(const char *target)
void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info)
{
	struct bpf_iter_target_info *tinfo;
	bool found = false;

	mutex_lock(&targets_mutex);
	list_for_each_entry(tinfo, &targets, list) {
		if (!strcmp(target, tinfo->target)) {
		if (reg_info == tinfo->reg_info) {
			list_del(&tinfo->list);
			kfree(tinfo);
			found = true;
@@ -303,7 +300,7 @@ bool bpf_iter_prog_supported(struct bpf_prog *prog)
			supported = true;
			break;
		}
		if (!strcmp(attach_fname + prefix_len, tinfo->target)) {
		if (!strcmp(attach_fname + prefix_len, tinfo->reg_info->target)) {
			cache_btf_id(tinfo, prog);
			supported = true;
			break;
@@ -311,6 +308,11 @@ bool bpf_iter_prog_supported(struct bpf_prog *prog)
	}
	mutex_unlock(&targets_mutex);

	if (supported) {
		prog->aux->ctx_arg_info_size = tinfo->reg_info->ctx_arg_info_size;
		prog->aux->ctx_arg_info = tinfo->reg_info->ctx_arg_info;
	}

	return supported;
}

@@ -431,15 +433,16 @@ static int prepare_seq_file(struct file *file, struct bpf_iter_link *link)

	tinfo = link->tinfo;
	total_priv_dsize = offsetof(struct bpf_iter_priv_data, target_private) +
			   tinfo->seq_priv_size;
	priv_data = __seq_open_private(file, tinfo->seq_ops, total_priv_dsize);
			   tinfo->reg_info->seq_priv_size;
	priv_data = __seq_open_private(file, tinfo->reg_info->seq_ops,
				       total_priv_dsize);
	if (!priv_data) {
		err = -ENOMEM;
		goto release_prog;
	}

	if (tinfo->init_seq_private) {
		err = tinfo->init_seq_private(priv_data->target_private);
	if (tinfo->reg_info->init_seq_private) {
		err = tinfo->reg_info->init_seq_private(priv_data->target_private);
		if (err)
			goto release_seq_file;
	}
@@ -526,5 +529,11 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
	migrate_enable();
	rcu_read_unlock();

	/* bpf program can only return 0 or 1:
	 *  0 : okay
	 *  1 : retry the same object
	 * The bpf_iter_run_prog() return value
	 * will be seq_ops->show() return value.
	 */
	return ret == 0 ? 0 : -EAGAIN;
}
+10 −5
Original line number Diff line number Diff line
@@ -3694,7 +3694,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
	struct bpf_verifier_log *log = info->log;
	const struct btf_param *args;
	u32 nr_args, arg;
	int ret;
	int i, ret;

	if (off % 8) {
		bpf_log(log, "func '%s' offset %d is not multiple of 8\n",
@@ -3790,10 +3790,15 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
		return true;

	/* this is a pointer to another type */
	if (off != 0 && prog->aux->btf_id_or_null_non0_off)
		info->reg_type = PTR_TO_BTF_ID_OR_NULL;
	else
	info->reg_type = PTR_TO_BTF_ID;
	for (i = 0; i < prog->aux->ctx_arg_info_size; i++) {
		const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i];

		if (ctx_arg_info->offset == off) {
			info->reg_type = ctx_arg_info->reg_type;
			break;
		}
	}

	if (tgt_prog) {
		ret = btf_translate_to_vmlinux(log, btf, t, tgt_prog->type, arg);
+14 −9
Original line number Diff line number Diff line
@@ -81,17 +81,22 @@ static const struct seq_operations bpf_map_seq_ops = {
	.show	= bpf_map_seq_show,
};

static int __init bpf_map_iter_init(void)
{
	struct bpf_iter_reg reg_info = {
static const struct bpf_iter_reg bpf_map_reg_info = {
	.target			= "bpf_map",
	.seq_ops		= &bpf_map_seq_ops,
	.init_seq_private	= NULL,
	.fini_seq_private	= NULL,
	.seq_priv_size		= sizeof(struct bpf_iter_seq_map_info),
	.ctx_arg_info_size	= 1,
	.ctx_arg_info		= {
		{ offsetof(struct bpf_iter__bpf_map, map),
		  PTR_TO_BTF_ID_OR_NULL },
	},
};

	return bpf_iter_reg_target(&reg_info);
static int __init bpf_map_iter_init(void)
{
	return bpf_iter_reg_target(&bpf_map_reg_info);
}

late_initcall(bpf_map_iter_init);
Loading