Commit a43d0508 authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'bpf-sysctl-hook'



Andrey Ignatov says:

====================
v2->v3:
- simplify C based selftests by relying on variable offset stack access.

v1->v2:
- add fs/proc/proc_sysctl.c mainteners to Cc:.

The patch set introduces new BPF hook for sysctl.

It adds new program type BPF_PROG_TYPE_CGROUP_SYSCTL and attach type
BPF_CGROUP_SYSCTL.

BPF_CGROUP_SYSCTL hook is placed before calling to sysctl's proc_handler so
that accesses (read/write) to sysctl can be controlled for specific cgroup
and either allowed or denied, or traced.

The hook has access to sysctl name, current sysctl value and (on write
only) to new sysctl value via corresponding helpers. New sysctl value can
be overridden by program. Both name and values (current/new) are
represented as strings same way they're visible in /proc/sys/. It is up to
program to parse these strings.

To help with parsing the most common kind of sysctl value, vector of
integers, two new helpers are provided: bpf_strtol and bpf_strtoul with
semantic similar to user space strtol(3) and strtoul(3).

The hook also provides bpf_sysctl context with two fields:
* @write indicates whether sysctl is being read (= 0) or written (= 1);
* @file_pos is sysctl file position to read from or write to, can be
  overridden.

The hook allows to make better isolation for containerized applications
that are run as root so that one container can't change a sysctl and affect
all other containers on a host, make changes to allowed sysctl in a safer
way and simplify sysctl tracing for cgroups.

Patch 1 is preliminary refactoring.
Patch 2 adds new program and attach types.
Patches 3-5 implement helpers to access sysctl name and value.
Patch 6 adds file_pos field to bpf_sysctl context.
Patch 7 updates UAPI in tools.
Patches 8-9 add support for the new hook to libbpf and corresponding test.
Patches 10-14 add selftests for the new hook.
Patch 15 adds support for new arg types to verifier: pointer to integer.
Patch 16 adds bpf_strto{l,ul} helpers to parse integers from sysctl value.
Patch 17 updates UAPI in tools.
Patch 18 updates bpf_helpers.h.
Patch 19 adds selftests for pointer to integer in verifier.
Patches 20-21 add selftests for bpf_strto{l,ul}, including integration
              C based test for sysctl value parsing.
====================

Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents e0a092eb 7568f4cb
Loading
Loading
Loading
Loading
+21 −4
Original line number Diff line number Diff line
@@ -13,6 +13,7 @@
#include <linux/namei.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/bpf-cgroup.h>
#include "internal.h"

static const struct dentry_operations proc_sys_dentry_operations;
@@ -569,8 +570,8 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
	struct inode *inode = file_inode(filp);
	struct ctl_table_header *head = grab_header(inode);
	struct ctl_table *table = PROC_I(inode)->sysctl_entry;
	void *new_buf = NULL;
	ssize_t error;
	size_t res;

	if (IS_ERR(head))
		return PTR_ERR(head);
@@ -588,11 +589,27 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
	if (!table->proc_handler)
		goto out;

	error = BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, &count,
					   ppos, &new_buf);
	if (error)
		goto out;

	/* careful: calling conventions are nasty here */
	res = count;
	error = table->proc_handler(table, write, buf, &res, ppos);
	if (new_buf) {
		mm_segment_t old_fs;

		old_fs = get_fs();
		set_fs(KERNEL_DS);
		error = table->proc_handler(table, write, (void __user *)new_buf,
					    &count, ppos);
		set_fs(old_fs);
		kfree(new_buf);
	} else {
		error = table->proc_handler(table, write, buf, &count, ppos);
	}

	if (!error)
		error = res;
		error = count;
out:
	sysctl_head_finish(head);

+21 −0
Original line number Diff line number Diff line
@@ -17,6 +17,8 @@ struct bpf_map;
struct bpf_prog;
struct bpf_sock_ops_kern;
struct bpf_cgroup_storage;
struct ctl_table;
struct ctl_table_header;

#ifdef CONFIG_CGROUP_BPF

@@ -109,6 +111,12 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
				      short access, enum bpf_attach_type type);

int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
				   struct ctl_table *table, int write,
				   void __user *buf, size_t *pcount,
				   loff_t *ppos, void **new_buf,
				   enum bpf_attach_type type);

static inline enum bpf_cgroup_storage_type cgroup_storage_type(
	struct bpf_map *map)
{
@@ -253,6 +261,18 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
									      \
	__ret;								      \
})


#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, count, pos, nbuf)  \
({									       \
	int __ret = 0;							       \
	if (cgroup_bpf_enabled)						       \
		__ret = __cgroup_bpf_run_filter_sysctl(head, table, write,     \
						       buf, count, pos, nbuf,  \
						       BPF_CGROUP_SYSCTL);     \
	__ret;								       \
})

int cgroup_bpf_prog_attach(const union bpf_attr *attr,
			   enum bpf_prog_type ptype, struct bpf_prog *prog);
int cgroup_bpf_prog_detach(const union bpf_attr *attr,
@@ -321,6 +341,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; })
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
#define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos,nbuf) ({ 0; })

#define for_each_cgroup_storage_type(stype) for (; false; )

+4 −0
Original line number Diff line number Diff line
@@ -202,6 +202,8 @@ enum bpf_arg_type {
	ARG_ANYTHING,		/* any (initialized) argument is ok */
	ARG_PTR_TO_SPIN_LOCK,	/* pointer to bpf_spin_lock */
	ARG_PTR_TO_SOCK_COMMON,	/* pointer to sock_common */
	ARG_PTR_TO_INT,		/* pointer to int */
	ARG_PTR_TO_LONG,	/* pointer to long */
};

/* type of values returned from helper functions */
@@ -987,6 +989,8 @@ extern const struct bpf_func_proto bpf_sk_redirect_map_proto;
extern const struct bpf_func_proto bpf_spin_lock_proto;
extern const struct bpf_func_proto bpf_spin_unlock_proto;
extern const struct bpf_func_proto bpf_get_local_storage_proto;
extern const struct bpf_func_proto bpf_strtol_proto;
extern const struct bpf_func_proto bpf_strtoul_proto;

/* Shared helpers among cBPF and eBPF. */
void bpf_user_rnd_init_once(void);
+1 −0
Original line number Diff line number Diff line
@@ -28,6 +28,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint)
#endif
#ifdef CONFIG_CGROUP_BPF
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SYSCTL, cg_sysctl)
#endif
#ifdef CONFIG_BPF_LIRC_MODE2
BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2)
+16 −0
Original line number Diff line number Diff line
@@ -33,6 +33,8 @@ struct bpf_prog_aux;
struct xdp_rxq_info;
struct xdp_buff;
struct sock_reuseport;
struct ctl_table;
struct ctl_table_header;

/* ArgX, context and stack frame pointer register positions. Note,
 * Arg1, Arg2, Arg3, etc are used as argument mappings of function
@@ -1177,4 +1179,18 @@ struct bpf_sock_ops_kern {
					 */
};

struct bpf_sysctl_kern {
	struct ctl_table_header *head;
	struct ctl_table *table;
	void *cur_val;
	size_t cur_len;
	void *new_val;
	size_t new_len;
	int new_updated;
	int write;
	loff_t *ppos;
	/* Temporary "register" for indirect stores to ppos. */
	u64 tmp_reg;
};

#endif /* __LINUX_FILTER_H__ */
Loading