Commit ac51d99b authored by Yonghong Song's avatar Yonghong Song Committed by Alexei Starovoitov
Browse files

bpf: Create anonymous bpf iterator



A new bpf command BPF_ITER_CREATE is added.

The anonymous bpf iterator is seq_file based.
The seq_file private data are referenced by targets.
The bpf_iter infrastructure allocated additional space
at seq_file->private before the space used by targets
to store some meta data, e.g.,
  prog:       prog to run
  session_id: an unique id for each opened seq_file
  seq_num:    how many times bpf programs are queried in this session
  done_stop:  an internal state to decide whether bpf program
              should be called in seq_ops->stop() or not

The seq_num will start from 0 for valid objects.
The bpf program may see the same seq_num more than once if
 - seq_file buffer overflow happens and the same object
   is retried by bpf_seq_read(), or
 - the bpf program explicitly requests a retry of the
   same object

Since module is not supported for bpf_iter, all target
registeration happens at __init time, so there is no
need to change bpf_iter_unreg_target() as it is used
mostly in error path of the init function at which time
no bpf iterators have been created yet.

Signed-off-by: default avatarYonghong Song <yhs@fb.com>
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
Acked-by: default avatarAndrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200509175905.2475770-1-yhs@fb.com
parent fd4f12bc
Loading
Loading
Loading
Loading
+1 −0
Original line number Original line Diff line number Diff line
@@ -1144,6 +1144,7 @@ int bpf_iter_reg_target(struct bpf_iter_reg *reg_info);
void bpf_iter_unreg_target(const char *target);
void bpf_iter_unreg_target(const char *target);
bool bpf_iter_prog_supported(struct bpf_prog *prog);
bool bpf_iter_prog_supported(struct bpf_prog *prog);
int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
int bpf_iter_new_fd(struct bpf_link *link);


int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
+6 −0
Original line number Original line Diff line number Diff line
@@ -116,6 +116,7 @@ enum bpf_cmd {
	BPF_LINK_GET_FD_BY_ID,
	BPF_LINK_GET_FD_BY_ID,
	BPF_LINK_GET_NEXT_ID,
	BPF_LINK_GET_NEXT_ID,
	BPF_ENABLE_STATS,
	BPF_ENABLE_STATS,
	BPF_ITER_CREATE,
};
};


enum bpf_map_type {
enum bpf_map_type {
@@ -614,6 +615,11 @@ union bpf_attr {
		__u32		type;
		__u32		type;
	} enable_stats;
	} enable_stats;


	struct { /* struct used by BPF_ITER_CREATE command */
		__u32		link_fd;
		__u32		flags;
	} iter_create;

} __attribute__((aligned(8)));
} __attribute__((aligned(8)));


/* The description below is an attempt at providing documentation to eBPF
/* The description below is an attempt at providing documentation to eBPF
+129 −0
Original line number Original line Diff line number Diff line
@@ -2,6 +2,7 @@
/* Copyright (c) 2020 Facebook */
/* Copyright (c) 2020 Facebook */


#include <linux/fs.h>
#include <linux/fs.h>
#include <linux/anon_inodes.h>
#include <linux/filter.h>
#include <linux/filter.h>
#include <linux/bpf.h>
#include <linux/bpf.h>


@@ -20,12 +21,24 @@ struct bpf_iter_link {
	struct bpf_iter_target_info *tinfo;
	struct bpf_iter_target_info *tinfo;
};
};


struct bpf_iter_priv_data {
	struct bpf_iter_target_info *tinfo;
	struct bpf_prog *prog;
	u64 session_id;
	u64 seq_num;
	bool done_stop;
	u8 target_private[] __aligned(8);
};

static struct list_head targets = LIST_HEAD_INIT(targets);
static struct list_head targets = LIST_HEAD_INIT(targets);
static DEFINE_MUTEX(targets_mutex);
static DEFINE_MUTEX(targets_mutex);


/* protect bpf_iter_link changes */
/* protect bpf_iter_link changes */
static DEFINE_MUTEX(link_mutex);
static DEFINE_MUTEX(link_mutex);


/* incremented on every opened seq_file */
static atomic64_t session_id;

/* bpf_seq_read, a customized and simpler version for bpf iterator.
/* bpf_seq_read, a customized and simpler version for bpf iterator.
 * no_llseek is assumed for this file.
 * no_llseek is assumed for this file.
 * The following are differences from seq_read():
 * The following are differences from seq_read():
@@ -149,6 +162,33 @@ done:
	return copied;
	return copied;
}
}


static int iter_release(struct inode *inode, struct file *file)
{
	struct bpf_iter_priv_data *iter_priv;
	struct seq_file *seq;

	seq = file->private_data;
	if (!seq)
		return 0;

	iter_priv = container_of(seq->private, struct bpf_iter_priv_data,
				 target_private);

	if (iter_priv->tinfo->fini_seq_private)
		iter_priv->tinfo->fini_seq_private(seq->private);

	bpf_prog_put(iter_priv->prog);
	seq->private = iter_priv;

	return seq_release_private(inode, file);
}

static const struct file_operations bpf_iter_fops = {
	.llseek		= no_llseek,
	.read		= bpf_seq_read,
	.release	= iter_release,
};

int bpf_iter_reg_target(struct bpf_iter_reg *reg_info)
int bpf_iter_reg_target(struct bpf_iter_reg *reg_info)
{
{
	struct bpf_iter_target_info *tinfo;
	struct bpf_iter_target_info *tinfo;
@@ -309,3 +349,92 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)


	return bpf_link_settle(&link_primer);
	return bpf_link_settle(&link_primer);
}
}

static void init_seq_meta(struct bpf_iter_priv_data *priv_data,
			  struct bpf_iter_target_info *tinfo,
			  struct bpf_prog *prog)
{
	priv_data->tinfo = tinfo;
	priv_data->prog = prog;
	priv_data->session_id = atomic64_inc_return(&session_id);
	priv_data->seq_num = 0;
	priv_data->done_stop = false;
}

static int prepare_seq_file(struct file *file, struct bpf_iter_link *link)
{
	struct bpf_iter_priv_data *priv_data;
	struct bpf_iter_target_info *tinfo;
	struct bpf_prog *prog;
	u32 total_priv_dsize;
	struct seq_file *seq;
	int err = 0;

	mutex_lock(&link_mutex);
	prog = link->link.prog;
	bpf_prog_inc(prog);
	mutex_unlock(&link_mutex);

	tinfo = link->tinfo;
	total_priv_dsize = offsetof(struct bpf_iter_priv_data, target_private) +
			   tinfo->seq_priv_size;
	priv_data = __seq_open_private(file, tinfo->seq_ops, total_priv_dsize);
	if (!priv_data) {
		err = -ENOMEM;
		goto release_prog;
	}

	if (tinfo->init_seq_private) {
		err = tinfo->init_seq_private(priv_data->target_private);
		if (err)
			goto release_seq_file;
	}

	init_seq_meta(priv_data, tinfo, prog);
	seq = file->private_data;
	seq->private = priv_data->target_private;

	return 0;

release_seq_file:
	seq_release_private(file->f_inode, file);
	file->private_data = NULL;
release_prog:
	bpf_prog_put(prog);
	return err;
}

int bpf_iter_new_fd(struct bpf_link *link)
{
	struct file *file;
	unsigned int flags;
	int err, fd;

	if (link->ops != &bpf_iter_link_lops)
		return -EINVAL;

	flags = O_RDONLY | O_CLOEXEC;
	fd = get_unused_fd_flags(flags);
	if (fd < 0)
		return fd;

	file = anon_inode_getfile("bpf_iter", &bpf_iter_fops, NULL, flags);
	if (IS_ERR(file)) {
		err = PTR_ERR(file);
		goto free_fd;
	}

	err = prepare_seq_file(file,
			       container_of(link, struct bpf_iter_link, link));
	if (err)
		goto free_file;

	fd_install(fd, file);
	return fd;

free_file:
	fput(file);
free_fd:
	put_unused_fd(fd);
	return err;
}
+26 −0
Original line number Original line Diff line number Diff line
@@ -3941,6 +3941,29 @@ static int bpf_enable_stats(union bpf_attr *attr)
	return -EINVAL;
	return -EINVAL;
}
}


#define BPF_ITER_CREATE_LAST_FIELD iter_create.flags

static int bpf_iter_create(union bpf_attr *attr)
{
	struct bpf_link *link;
	int err;

	if (CHECK_ATTR(BPF_ITER_CREATE))
		return -EINVAL;

	if (attr->iter_create.flags)
		return -EINVAL;

	link = bpf_link_get_from_fd(attr->iter_create.link_fd);
	if (IS_ERR(link))
		return PTR_ERR(link);

	err = bpf_iter_new_fd(link);
	bpf_link_put(link);

	return err;
}

SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
{
{
	union bpf_attr attr;
	union bpf_attr attr;
@@ -4068,6 +4091,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
	case BPF_ENABLE_STATS:
	case BPF_ENABLE_STATS:
		err = bpf_enable_stats(&attr);
		err = bpf_enable_stats(&attr);
		break;
		break;
	case BPF_ITER_CREATE:
		err = bpf_iter_create(&attr);
		break;
	default:
	default:
		err = -EINVAL;
		err = -EINVAL;
		break;
		break;
+6 −0
Original line number Original line Diff line number Diff line
@@ -116,6 +116,7 @@ enum bpf_cmd {
	BPF_LINK_GET_FD_BY_ID,
	BPF_LINK_GET_FD_BY_ID,
	BPF_LINK_GET_NEXT_ID,
	BPF_LINK_GET_NEXT_ID,
	BPF_ENABLE_STATS,
	BPF_ENABLE_STATS,
	BPF_ITER_CREATE,
};
};


enum bpf_map_type {
enum bpf_map_type {
@@ -614,6 +615,11 @@ union bpf_attr {
		__u32		type;
		__u32		type;
	} enable_stats;
	} enable_stats;


	struct { /* struct used by BPF_ITER_CREATE command */
		__u32		link_fd;
		__u32		flags;
	} iter_create;

} __attribute__((aligned(8)));
} __attribute__((aligned(8)));


/* The description below is an attempt at providing documentation to eBPF
/* The description below is an attempt at providing documentation to eBPF