Commit 5e7b3020 authored by Yonghong Song's avatar Yonghong Song Committed by Alexei Starovoitov
Browse files

bpf: Change uapi for bpf iterator map elements



Commit a5cbe05a ("bpf: Implement bpf iterator for
map elements") added bpf iterator support for
map elements. The map element bpf iterator requires
info to identify a particular map. In the above
commit, the attr->link_create.target_fd is used
to carry map_fd and an enum bpf_iter_link_info
is added to uapi to specify the target_fd actually
representing a map_fd:
    enum bpf_iter_link_info {
	BPF_ITER_LINK_UNSPEC = 0,
	BPF_ITER_LINK_MAP_FD = 1,

	MAX_BPF_ITER_LINK_INFO,
    };

This is an extensible approach as we can grow
enumerator for pid, cgroup_id, etc. and we can
unionize target_fd for pid, cgroup_id, etc.
But in the future, there are chances that
more complex customization may happen, e.g.,
for tasks, it could be filtered based on
both cgroup_id and user_id.

This patch changed the uapi to have fields
	__aligned_u64	iter_info;
	__u32		iter_info_len;
for additional iter_info for link_create.
The iter_info is defined as
	union bpf_iter_link_info {
		struct {
			__u32   map_fd;
		} map;
	};

So future extension for additional customization
will be easier. The bpf_iter_link_info will be
passed to target callback to validate and generic
bpf_iter framework does not need to deal it any
more.

Note that map_fd = 0 will be considered invalid
and -EBADF will be returned to user space.

Fixes: a5cbe05a ("bpf: Implement bpf iterator for map elements")
Signed-off-by: default avatarYonghong Song <yhs@fb.com>
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
Acked-by: default avatarAndrii Nakryiko <andriin@fb.com>
Acked-by: default avatarJohn Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20200805055056.1457463-1-yhs@fb.com
parent 6bcaf41f
Loading
Loading
Loading
Loading
+6 −4
Original line number Diff line number Diff line
@@ -1214,15 +1214,17 @@ struct bpf_iter_aux_info {
	struct bpf_map *map;
};

typedef int (*bpf_iter_check_target_t)(struct bpf_prog *prog,
typedef int (*bpf_iter_attach_target_t)(struct bpf_prog *prog,
					union bpf_iter_link_info *linfo,
					struct bpf_iter_aux_info *aux);
typedef void (*bpf_iter_detach_target_t)(struct bpf_iter_aux_info *aux);

#define BPF_ITER_CTX_ARG_MAX 2
struct bpf_iter_reg {
	const char *target;
	bpf_iter_check_target_t check_target;
	bpf_iter_attach_target_t attach_target;
	bpf_iter_detach_target_t detach_target;
	u32 ctx_arg_info_size;
	enum bpf_iter_link_info req_linfo;
	struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX];
	const struct bpf_iter_seq_info *seq_info;
};
+8 −7
Original line number Diff line number Diff line
@@ -81,6 +81,12 @@ struct bpf_cgroup_storage_key {
	__u32	attach_type;		/* program attach type */
};

union bpf_iter_link_info {
	struct {
		__u32	map_fd;
	} map;
};

/* BPF syscall commands, see bpf(2) man-page for details. */
enum bpf_cmd {
	BPF_MAP_CREATE,
@@ -249,13 +255,6 @@ enum bpf_link_type {
	MAX_BPF_LINK_TYPE,
};

enum bpf_iter_link_info {
	BPF_ITER_LINK_UNSPEC = 0,
	BPF_ITER_LINK_MAP_FD = 1,

	MAX_BPF_ITER_LINK_INFO,
};

/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
 *
 * NONE(default): No further bpf programs allowed in the subtree.
@@ -623,6 +622,8 @@ union bpf_attr {
		};
		__u32		attach_type;	/* attach type */
		__u32		flags;		/* extra flags */
		__aligned_u64	iter_info;	/* extra bpf_iter_link_info */
		__u32		iter_info_len;	/* iter_info length */
	} link_create;

	struct { /* struct used by BPF_LINK_UPDATE command */
+29 −29
Original line number Diff line number Diff line
@@ -338,8 +338,8 @@ static void bpf_iter_link_release(struct bpf_link *link)
	struct bpf_iter_link *iter_link =
		container_of(link, struct bpf_iter_link, link);

	if (iter_link->aux.map)
		bpf_map_put_with_uref(iter_link->aux.map);
	if (iter_link->tinfo->reg_info->detach_target)
		iter_link->tinfo->reg_info->detach_target(&iter_link->aux);
}

static void bpf_iter_link_dealloc(struct bpf_link *link)
@@ -390,15 +390,35 @@ bool bpf_link_is_iter(struct bpf_link *link)

int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
{
	union bpf_iter_link_info __user *ulinfo;
	struct bpf_link_primer link_primer;
	struct bpf_iter_target_info *tinfo;
	struct bpf_iter_aux_info aux = {};
	union bpf_iter_link_info linfo;
	struct bpf_iter_link *link;
	u32 prog_btf_id, target_fd;
	u32 prog_btf_id, linfo_len;
	bool existed = false;
	struct bpf_map *map;
	int err;

	if (attr->link_create.target_fd || attr->link_create.flags)
		return -EINVAL;

	memset(&linfo, 0, sizeof(union bpf_iter_link_info));

	ulinfo = u64_to_user_ptr(attr->link_create.iter_info);
	linfo_len = attr->link_create.iter_info_len;
	if (!ulinfo ^ !linfo_len)
		return -EINVAL;

	if (ulinfo) {
		err = bpf_check_uarg_tail_zero(ulinfo, sizeof(linfo),
					       linfo_len);
		if (err)
			return err;
		linfo_len = min_t(u32, linfo_len, sizeof(linfo));
		if (copy_from_user(&linfo, ulinfo, linfo_len))
			return -EFAULT;
	}

	prog_btf_id = prog->aux->attach_btf_id;
	mutex_lock(&targets_mutex);
	list_for_each_entry(tinfo, &targets, list) {
@@ -411,13 +431,6 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
	if (!existed)
		return -ENOENT;

	/* Make sure user supplied flags are target expected. */
	target_fd = attr->link_create.target_fd;
	if (attr->link_create.flags != tinfo->reg_info->req_linfo)
		return -EINVAL;
	if (!attr->link_create.flags && target_fd)
		return -EINVAL;

	link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN);
	if (!link)
		return -ENOMEM;
@@ -431,28 +444,15 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
		return err;
	}

	if (tinfo->reg_info->req_linfo == BPF_ITER_LINK_MAP_FD) {
		map = bpf_map_get_with_uref(target_fd);
		if (IS_ERR(map)) {
			err = PTR_ERR(map);
			goto cleanup_link;
		}

		aux.map = map;
		err = tinfo->reg_info->check_target(prog, &aux);
	if (tinfo->reg_info->attach_target) {
		err = tinfo->reg_info->attach_target(prog, &linfo, &link->aux);
		if (err) {
			bpf_map_put_with_uref(map);
			goto cleanup_link;
			bpf_link_cleanup(&link_primer);
			return err;
		}

		link->aux.map = map;
	}

	return bpf_link_settle(&link_primer);

cleanup_link:
	bpf_link_cleanup(&link_primer);
	return err;
}

static void init_seq_meta(struct bpf_iter_priv_data *priv_data,
+29 −8
Original line number Diff line number Diff line
@@ -98,12 +98,21 @@ static struct bpf_iter_reg bpf_map_reg_info = {
	.seq_info		= &bpf_map_seq_info,
};

static int bpf_iter_check_map(struct bpf_prog *prog,
static int bpf_iter_attach_map(struct bpf_prog *prog,
			       union bpf_iter_link_info *linfo,
			       struct bpf_iter_aux_info *aux)
{
	u32 key_acc_size, value_acc_size, key_size, value_size;
	struct bpf_map *map = aux->map;
	struct bpf_map *map;
	bool is_percpu = false;
	int err = -EINVAL;

	if (!linfo->map.map_fd)
		return -EBADF;

	map = bpf_map_get_with_uref(linfo->map.map_fd);
	if (IS_ERR(map))
		return PTR_ERR(map);

	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
	    map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
@@ -112,7 +121,7 @@ static int bpf_iter_check_map(struct bpf_prog *prog,
	else if (map->map_type != BPF_MAP_TYPE_HASH &&
		 map->map_type != BPF_MAP_TYPE_LRU_HASH &&
		 map->map_type != BPF_MAP_TYPE_ARRAY)
		return -EINVAL;
		goto put_map;

	key_acc_size = prog->aux->max_rdonly_access;
	value_acc_size = prog->aux->max_rdwr_access;
@@ -122,10 +131,22 @@ static int bpf_iter_check_map(struct bpf_prog *prog,
	else
		value_size = round_up(map->value_size, 8) * num_possible_cpus();

	if (key_acc_size > key_size || value_acc_size > value_size)
		return -EACCES;
	if (key_acc_size > key_size || value_acc_size > value_size) {
		err = -EACCES;
		goto put_map;
	}

	aux->map = map;
	return 0;

put_map:
	bpf_map_put_with_uref(map);
	return err;
}

static void bpf_iter_detach_map(struct bpf_iter_aux_info *aux)
{
	bpf_map_put_with_uref(aux->map);
}

DEFINE_BPF_ITER_FUNC(bpf_map_elem, struct bpf_iter_meta *meta,
@@ -133,8 +154,8 @@ DEFINE_BPF_ITER_FUNC(bpf_map_elem, struct bpf_iter_meta *meta,

static const struct bpf_iter_reg bpf_map_elem_reg_info = {
	.target			= "bpf_map_elem",
	.check_target		= bpf_iter_check_map,
	.req_linfo		= BPF_ITER_LINK_MAP_FD,
	.attach_target		= bpf_iter_attach_map,
	.detach_target		= bpf_iter_detach_map,
	.ctx_arg_info_size	= 2,
	.ctx_arg_info		= {
		{ offsetof(struct bpf_iter__bpf_map_elem, key),
+1 −1
Original line number Diff line number Diff line
@@ -3883,7 +3883,7 @@ static int tracing_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *
	return -EINVAL;
}

#define BPF_LINK_CREATE_LAST_FIELD link_create.flags
#define BPF_LINK_CREATE_LAST_FIELD link_create.iter_info_len
static int link_create(union bpf_attr *attr)
{
	enum bpf_prog_type ptype;
Loading