Commit d71fa5c9 authored by Alexei Starovoitov's avatar Alexei Starovoitov Committed by Daniel Borkmann
Browse files

bpf: Add kernel module with user mode driver that populates bpffs.



Add kernel module with user mode driver that populates bpffs with
BPF iterators.

$ mount bpffs /my/bpffs/ -t bpf
$ ls -la /my/bpffs/
total 4
drwxrwxrwt  2 root root    0 Jul  2 00:27 .
drwxr-xr-x 19 root root 4096 Jul  2 00:09 ..
-rw-------  1 root root    0 Jul  2 00:27 maps.debug
-rw-------  1 root root    0 Jul  2 00:27 progs.debug

The user mode driver will load BPF Type Formats, create BPF maps, populate BPF
maps, load two BPF programs, attach them to BPF iterators, and finally send two
bpf_link IDs back to the kernel.
The kernel will pin two bpf_links into newly mounted bpffs instance under
names "progs.debug" and "maps.debug". These two files become human readable.

$ cat /my/bpffs/progs.debug
  id name            attached
  11 dump_bpf_map    bpf_iter_bpf_map
  12 dump_bpf_prog   bpf_iter_bpf_prog
  27 test_pkt_access
  32 test_main       test_pkt_access test_pkt_access
  33 test_subprog1   test_pkt_access_subprog1 test_pkt_access
  34 test_subprog2   test_pkt_access_subprog2 test_pkt_access
  35 test_subprog3   test_pkt_access_subprog3 test_pkt_access
  36 new_get_skb_len get_skb_len test_pkt_access
  37 new_get_skb_ifindex get_skb_ifindex test_pkt_access
  38 new_get_constant get_constant test_pkt_access

The BPF program dump_bpf_prog() in iterators.bpf.c is printing this data about
all BPF programs currently loaded in the system. This information is unstable
and will change from kernel to kernel as ".debug" suffix conveys.

Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200819042759.51280-4-alexei.starovoitov@gmail.com
parent f0fdfefb
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -1710,6 +1710,8 @@ config BPF_JIT_DEFAULT_ON
	def_bool ARCH_WANT_DEFAULT_BPF_JIT || BPF_JIT_ALWAYS_ON
	depends on HAVE_EBPF_JIT && BPF_JIT

source "kernel/bpf/preload/Kconfig"

config USERFAULTFD
	bool "Enable userfaultfd() system call"
	depends on MMU
+1 −1
Original line number Diff line number Diff line
@@ -12,7 +12,7 @@ obj-y = fork.o exec_domain.o panic.o \
	    notifier.o ksysfs.o cred.o reboot.o \
	    async.o range.o smpboot.o ucount.o regset.o

obj-$(CONFIG_BPFILTER) += usermode_driver.o
obj-$(CONFIG_USERMODE_DRIVER) += usermode_driver.o
obj-$(CONFIG_MODULES) += kmod.o
obj-$(CONFIG_MULTIUSER) += groups.o

+1 −0
Original line number Diff line number Diff line
@@ -29,3 +29,4 @@ ifeq ($(CONFIG_BPF_JIT),y)
obj-$(CONFIG_BPF_SYSCALL) += bpf_struct_ops.o
obj-${CONFIG_BPF_LSM} += bpf_lsm.o
endif
obj-$(CONFIG_BPF_PRELOAD) += preload/
+113 −3
Original line number Diff line number Diff line
@@ -20,6 +20,7 @@
#include <linux/filter.h>
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
#include "preload/bpf_preload.h"

enum bpf_type {
	BPF_TYPE_UNSPEC	= 0,
@@ -369,9 +370,10 @@ static struct dentry *
bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags)
{
	/* Dots in names (e.g. "/sys/fs/bpf/foo.bar") are reserved for future
	 * extensions.
	 * extensions. That allows popoulate_bpffs() create special files.
	 */
	if (strchr(dentry->d_name.name, '.'))
	if ((dir->i_mode & S_IALLUGO) &&
	    strchr(dentry->d_name.name, '.'))
		return ERR_PTR(-EPERM);

	return simple_lookup(dir, dentry, flags);
@@ -409,6 +411,27 @@ static const struct inode_operations bpf_dir_iops = {
	.unlink		= simple_unlink,
};

/* pin iterator link into bpffs */
static int bpf_iter_link_pin_kernel(struct dentry *parent,
				    const char *name, struct bpf_link *link)
{
	umode_t mode = S_IFREG | S_IRUSR;
	struct dentry *dentry;
	int ret;

	inode_lock(parent->d_inode);
	dentry = lookup_one_len(name, parent, strlen(name));
	if (IS_ERR(dentry)) {
		inode_unlock(parent->d_inode);
		return PTR_ERR(dentry);
	}
	ret = bpf_mkobj_ops(dentry, mode, link, &bpf_link_iops,
			    &bpf_iter_fops);
	dput(dentry);
	inode_unlock(parent->d_inode);
	return ret;
}

static int bpf_obj_do_pin(const char __user *pathname, void *raw,
			  enum bpf_type type)
{
@@ -638,6 +661,91 @@ static int bpf_parse_param(struct fs_context *fc, struct fs_parameter *param)
	return 0;
}

struct bpf_preload_ops *bpf_preload_ops;
EXPORT_SYMBOL_GPL(bpf_preload_ops);

static bool bpf_preload_mod_get(void)
{
	/* If bpf_preload.ko wasn't loaded earlier then load it now.
	 * When bpf_preload is built into vmlinux the module's __init
	 * function will populate it.
	 */
	if (!bpf_preload_ops) {
		request_module("bpf_preload");
		if (!bpf_preload_ops)
			return false;
	}
	/* And grab the reference, so the module doesn't disappear while the
	 * kernel is interacting with the kernel module and its UMD.
	 */
	if (!try_module_get(bpf_preload_ops->owner)) {
		pr_err("bpf_preload module get failed.\n");
		return false;
	}
	return true;
}

static void bpf_preload_mod_put(void)
{
	if (bpf_preload_ops)
		/* now user can "rmmod bpf_preload" if necessary */
		module_put(bpf_preload_ops->owner);
}

static DEFINE_MUTEX(bpf_preload_lock);

static int populate_bpffs(struct dentry *parent)
{
	struct bpf_preload_info objs[BPF_PRELOAD_LINKS] = {};
	struct bpf_link *links[BPF_PRELOAD_LINKS] = {};
	int err = 0, i;

	/* grab the mutex to make sure the kernel interactions with bpf_preload
	 * UMD are serialized
	 */
	mutex_lock(&bpf_preload_lock);

	/* if bpf_preload.ko wasn't built into vmlinux then load it */
	if (!bpf_preload_mod_get())
		goto out;

	if (!bpf_preload_ops->info.tgid) {
		/* preload() will start UMD that will load BPF iterator programs */
		err = bpf_preload_ops->preload(objs);
		if (err)
			goto out_put;
		for (i = 0; i < BPF_PRELOAD_LINKS; i++) {
			links[i] = bpf_link_by_id(objs[i].link_id);
			if (IS_ERR(links[i])) {
				err = PTR_ERR(links[i]);
				goto out_put;
			}
		}
		for (i = 0; i < BPF_PRELOAD_LINKS; i++) {
			err = bpf_iter_link_pin_kernel(parent,
						       objs[i].link_name, links[i]);
			if (err)
				goto out_put;
			/* do not unlink successfully pinned links even
			 * if later link fails to pin
			 */
			links[i] = NULL;
		}
		/* finish() will tell UMD process to exit */
		err = bpf_preload_ops->finish();
		if (err)
			goto out_put;
	}
out_put:
	bpf_preload_mod_put();
out:
	mutex_unlock(&bpf_preload_lock);
	for (i = 0; i < BPF_PRELOAD_LINKS && err; i++)
		if (!IS_ERR_OR_NULL(links[i]))
			bpf_link_put(links[i]);
	return err;
}

static int bpf_fill_super(struct super_block *sb, struct fs_context *fc)
{
	static const struct tree_descr bpf_rfiles[] = { { "" } };
@@ -654,8 +762,8 @@ static int bpf_fill_super(struct super_block *sb, struct fs_context *fc)
	inode = sb->s_root->d_inode;
	inode->i_op = &bpf_dir_iops;
	inode->i_mode &= ~S_IALLUGO;
	populate_bpffs(sb->s_root);
	inode->i_mode |= S_ISVTX | opts->mode;

	return 0;
}

@@ -705,6 +813,8 @@ static int __init bpf_init(void)
{
	int ret;

	mutex_init(&bpf_preload_lock);

	ret = sysfs_create_mount_point(fs_kobj, "bpf");
	if (ret)
		return ret;
+23 −0
Original line number Diff line number Diff line
# SPDX-License-Identifier: GPL-2.0-only
config USERMODE_DRIVER
	bool
	default n

menuconfig BPF_PRELOAD
	bool "Preload BPF file system with kernel specific program and map iterators"
	depends on BPF
	select USERMODE_DRIVER
	help
	  This builds kernel module with several embedded BPF programs that are
	  pinned into BPF FS mount point as human readable files that are
	  useful in debugging and introspection of BPF programs and maps.

if BPF_PRELOAD
config BPF_PRELOAD_UMD
	tristate "bpf_preload kernel module with user mode driver"
	depends on CC_CAN_LINK
	depends on m || CC_CAN_LINK_STATIC
	default m
	help
	  This builds bpf_preload kernel module with embedded user mode driver.
endif
Loading