Commit 99cacdc6 authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'replace-cg_bpf-prog'



Andrey Ignatov says:

====================
v3->v4:
- use OPTS_VALID and OPTS_GET to handle bpf_prog_attach_opts.

v2->v3:
- rely on DECLARE_LIBBPF_OPTS from libbpf_common.h;
- separate "required" and "optional" arguments in bpf_prog_attach_xattr;
- convert test_cgroup_attach to prog_tests;
- move the new selftest to prog_tests/cgroup_attach_multi.

v1->v2:
- move DECLARE_LIBBPF_OPTS from libbpf.h to bpf.h (patch 4);
- switch new libbpf API to OPTS framework;
- switch selftest to libbpf OPTS framework.

This patch set adds support for replacing cgroup-bpf programs attached with
BPF_F_ALLOW_MULTI flag so that any program in a list can be updated to a new
version without service interruption and order of programs can be preserved.

Please see patch 3 for details on the use-case and API changes.

Other patches:
Patch 1 is preliminary refactoring of __cgroup_bpf_attach to simplify it.
Patch 2 is minor cleanup of hierarchy_allows_attach.
Patch 4 extends libbpf API to support new set of attach attributes.
Patch 5 converts test_cgroup_attach to prog_tests.
Patch 6 adds selftest coverage for the new API.
====================

Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents c92bbaa0 06ac0186
Loading
Loading
Loading
Loading
+3 −1
Original line number Diff line number Diff line
@@ -85,6 +85,7 @@ int cgroup_bpf_inherit(struct cgroup *cgrp);
void cgroup_bpf_offline(struct cgroup *cgrp);

int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
			struct bpf_prog *replace_prog,
			enum bpf_attach_type type, u32 flags);
int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
			enum bpf_attach_type type);
@@ -93,7 +94,8 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,

/* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */
int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
		      enum bpf_attach_type type, u32 flags);
		      struct bpf_prog *replace_prog, enum bpf_attach_type type,
		      u32 flags);
int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
		      enum bpf_attach_type type, u32 flags);
int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
+10 −0
Original line number Diff line number Diff line
@@ -231,6 +231,11 @@ enum bpf_attach_type {
 * When children program makes decision (like picking TCP CA or sock bind)
 * parent program has a chance to override it.
 *
 * With BPF_F_ALLOW_MULTI a new program is added to the end of the list of
 * programs for a cgroup. Though it's possible to replace an old program at
 * any position by also specifying BPF_F_REPLACE flag and position itself in
 * replace_bpf_fd attribute. Old program at this position will be released.
 *
 * A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups.
 * A cgroup with NONE doesn't allow any programs in sub-cgroups.
 * Ex1:
@@ -249,6 +254,7 @@ enum bpf_attach_type {
 */
#define BPF_F_ALLOW_OVERRIDE	(1U << 0)
#define BPF_F_ALLOW_MULTI	(1U << 1)
#define BPF_F_REPLACE		(1U << 2)

/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
 * verifier will perform strict alignment checking as if the kernel
@@ -442,6 +448,10 @@ union bpf_attr {
		__u32		attach_bpf_fd;	/* eBPF program to attach */
		__u32		attach_type;
		__u32		attach_flags;
		__u32		replace_bpf_fd;	/* previously attached eBPF
						 * program to replace if
						 * BPF_F_REPLACE is used
						 */
	};

	struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
+51 −46
Original line number Diff line number Diff line
@@ -103,8 +103,7 @@ static u32 prog_list_length(struct list_head *head)
 * if parent has overridable or multi-prog, allow attaching
 */
static bool hierarchy_allows_attach(struct cgroup *cgrp,
				    enum bpf_attach_type type,
				    u32 new_flags)
				    enum bpf_attach_type type)
{
	struct cgroup *p;

@@ -283,31 +282,34 @@ cleanup:
 *                         propagate the change to descendants
 * @cgrp: The cgroup which descendants to traverse
 * @prog: A program to attach
 * @replace_prog: Previously attached program to replace if BPF_F_REPLACE is set
 * @type: Type of attach operation
 * @flags: Option flags
 *
 * Must be called with cgroup_mutex held.
 */
int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
			struct bpf_prog *replace_prog,
			enum bpf_attach_type type, u32 flags)
{
	u32 saved_flags = (flags & (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI));
	struct list_head *progs = &cgrp->bpf.progs[type];
	struct bpf_prog *old_prog = NULL;
	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE],
		*old_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {NULL};
	struct bpf_prog_list *pl, *replace_pl = NULL;
	enum bpf_cgroup_storage_type stype;
	struct bpf_prog_list *pl;
	bool pl_was_allocated;
	int err;

	if ((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI))
	if (((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI)) ||
	    ((flags & BPF_F_REPLACE) && !(flags & BPF_F_ALLOW_MULTI)))
		/* invalid combination */
		return -EINVAL;

	if (!hierarchy_allows_attach(cgrp, type, flags))
	if (!hierarchy_allows_attach(cgrp, type))
		return -EPERM;

	if (!list_empty(progs) && cgrp->bpf.flags[type] != flags)
	if (!list_empty(progs) && cgrp->bpf.flags[type] != saved_flags)
		/* Disallow attaching non-overridable on top
		 * of existing overridable in this cgroup.
		 * Disallow attaching multi-prog if overridable or none
@@ -317,63 +319,53 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
	if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS)
		return -E2BIG;

	for_each_cgroup_storage_type(stype) {
		storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
		if (IS_ERR(storage[stype])) {
			storage[stype] = NULL;
			for_each_cgroup_storage_type(stype)
				bpf_cgroup_storage_free(storage[stype]);
			return -ENOMEM;
		}
	}

	if (flags & BPF_F_ALLOW_MULTI) {
		list_for_each_entry(pl, progs, node) {
			if (pl->prog == prog) {
			if (pl->prog == prog)
				/* disallow attaching the same prog twice */
				for_each_cgroup_storage_type(stype)
					bpf_cgroup_storage_free(storage[stype]);
				return -EINVAL;
			if (pl->prog == replace_prog)
				replace_pl = pl;
		}
		if ((flags & BPF_F_REPLACE) && !replace_pl)
			/* prog to replace not found for cgroup */
			return -ENOENT;
	} else if (!list_empty(progs)) {
		replace_pl = list_first_entry(progs, typeof(*pl), node);
	}

		pl = kmalloc(sizeof(*pl), GFP_KERNEL);
		if (!pl) {
	for_each_cgroup_storage_type(stype) {
		storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
		if (IS_ERR(storage[stype])) {
			storage[stype] = NULL;
			for_each_cgroup_storage_type(stype)
				bpf_cgroup_storage_free(storage[stype]);
			return -ENOMEM;
		}
	}

		pl_was_allocated = true;
		pl->prog = prog;
		for_each_cgroup_storage_type(stype)
			pl->storage[stype] = storage[stype];
		list_add_tail(&pl->node, progs);
	if (replace_pl) {
		pl = replace_pl;
		old_prog = pl->prog;
		for_each_cgroup_storage_type(stype) {
			old_storage[stype] = pl->storage[stype];
			bpf_cgroup_storage_unlink(old_storage[stype]);
		}
	} else {
		if (list_empty(progs)) {
		pl = kmalloc(sizeof(*pl), GFP_KERNEL);
		if (!pl) {
			for_each_cgroup_storage_type(stype)
				bpf_cgroup_storage_free(storage[stype]);
			return -ENOMEM;
		}
			pl_was_allocated = true;
		list_add_tail(&pl->node, progs);
		} else {
			pl = list_first_entry(progs, typeof(*pl), node);
			old_prog = pl->prog;
			for_each_cgroup_storage_type(stype) {
				old_storage[stype] = pl->storage[stype];
				bpf_cgroup_storage_unlink(old_storage[stype]);
			}
			pl_was_allocated = false;
	}

	pl->prog = prog;
	for_each_cgroup_storage_type(stype)
		pl->storage[stype] = storage[stype];
	}

	cgrp->bpf.flags[type] = flags;
	cgrp->bpf.flags[type] = saved_flags;

	err = update_effective_progs(cgrp, type);
	if (err)
@@ -401,7 +393,7 @@ cleanup:
		pl->storage[stype] = old_storage[stype];
		bpf_cgroup_storage_link(old_storage[stype], cgrp, type);
	}
	if (pl_was_allocated) {
	if (!replace_pl) {
		list_del(&pl->node);
		kfree(pl);
	}
@@ -539,6 +531,7 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
int cgroup_bpf_prog_attach(const union bpf_attr *attr,
			   enum bpf_prog_type ptype, struct bpf_prog *prog)
{
	struct bpf_prog *replace_prog = NULL;
	struct cgroup *cgrp;
	int ret;

@@ -546,8 +539,20 @@ int cgroup_bpf_prog_attach(const union bpf_attr *attr,
	if (IS_ERR(cgrp))
		return PTR_ERR(cgrp);

	ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type,
	if ((attr->attach_flags & BPF_F_ALLOW_MULTI) &&
	    (attr->attach_flags & BPF_F_REPLACE)) {
		replace_prog = bpf_prog_get_type(attr->replace_bpf_fd, ptype);
		if (IS_ERR(replace_prog)) {
			cgroup_put(cgrp);
			return PTR_ERR(replace_prog);
		}
	}

	ret = cgroup_bpf_attach(cgrp, prog, replace_prog, attr->attach_type,
				attr->attach_flags);

	if (replace_prog)
		bpf_prog_put(replace_prog);
	cgroup_put(cgrp);
	return ret;
}
+2 −2
Original line number Diff line number Diff line
@@ -2073,10 +2073,10 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
	}
}

#define BPF_PROG_ATTACH_LAST_FIELD attach_flags
#define BPF_PROG_ATTACH_LAST_FIELD replace_bpf_fd

#define BPF_F_ATTACH_MASK \
	(BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI)
	(BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI | BPF_F_REPLACE)

static int bpf_prog_attach(const union bpf_attr *attr)
{
+3 −2
Original line number Diff line number Diff line
@@ -6288,12 +6288,13 @@ void cgroup_sk_free(struct sock_cgroup_data *skcd)

#ifdef CONFIG_CGROUP_BPF
int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
		      enum bpf_attach_type type, u32 flags)
		      struct bpf_prog *replace_prog, enum bpf_attach_type type,
		      u32 flags)
{
	int ret;

	mutex_lock(&cgroup_mutex);
	ret = __cgroup_bpf_attach(cgrp, prog, type, flags);
	ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, type, flags);
	mutex_unlock(&cgroup_mutex);
	return ret;
}
Loading