Commit 695c1214 authored by Jakub Sitnicki's avatar Jakub Sitnicki Committed by Alexei Starovoitov
Browse files

bpf, netns: Keep attached programs in bpf_prog_array



Prepare for having multi-prog attachments for new netns attach types by
storing programs to run in a bpf_prog_array, which is well suited for
iterating over programs and running them in sequence.

After this change bpf(PROG_QUERY) may block to allocate memory in
bpf_prog_array_copy_to_user() for collected program IDs. This forces a
change in how we protect access to the attached program in the query
callback. Because bpf_prog_array_copy_to_user() can sleep, we switch from
an RCU read lock to holding a mutex that serializes updaters.

Because we allow only one BPF flow_dissector program to be attached to
netns at all times, the bpf_prog_array pointed by net->bpf.run_array is
always either detached (null) or one element long.

No functional changes intended.

Signed-off-by: default avatarJakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
Acked-by: default avatarMartin KaFai Lau <kafai@fb.com>
Acked-by: default avatarAndrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200625141357.910330-3-jakub@cloudflare.com
parent 3b701699
Loading
Loading
Loading
Loading
+4 −1
Original line number Diff line number Diff line
@@ -9,9 +9,12 @@
#include <linux/bpf-netns.h>

struct bpf_prog;
struct bpf_prog_array;

struct netns_bpf {
	struct bpf_prog __rcu *progs[MAX_NETNS_BPF_ATTACH_TYPE];
	/* Array of programs to run compiled from progs or links */
	struct bpf_prog_array __rcu *run_array[MAX_NETNS_BPF_ATTACH_TYPE];
	struct bpf_prog *progs[MAX_NETNS_BPF_ATTACH_TYPE];
	struct bpf_link *links[MAX_NETNS_BPF_ATTACH_TYPE];
};

+82 −38
Original line number Diff line number Diff line
@@ -33,6 +33,17 @@ static void __net_exit bpf_netns_link_auto_detach(struct bpf_link *link)
	net_link->net = NULL;
}

/* Must be called with netns_bpf_mutex held. */
static void netns_bpf_run_array_detach(struct net *net,
				       enum netns_bpf_attach_type type)
{
	struct bpf_prog_array *run_array;

	run_array = rcu_replace_pointer(net->bpf.run_array[type], NULL,
					lockdep_is_held(&netns_bpf_mutex));
	bpf_prog_array_free(run_array);
}

static void bpf_netns_link_release(struct bpf_link *link)
{
	struct bpf_netns_link *net_link =
@@ -54,8 +65,8 @@ static void bpf_netns_link_release(struct bpf_link *link)
	if (!net)
		goto out_unlock;

	netns_bpf_run_array_detach(net, type);
	net->bpf.links[type] = NULL;
	RCU_INIT_POINTER(net->bpf.progs[type], NULL);

out_unlock:
	mutex_unlock(&netns_bpf_mutex);
@@ -76,6 +87,7 @@ static int bpf_netns_link_update_prog(struct bpf_link *link,
	struct bpf_netns_link *net_link =
		container_of(link, struct bpf_netns_link, link);
	enum netns_bpf_attach_type type = net_link->netns_type;
	struct bpf_prog_array *run_array;
	struct net *net;
	int ret = 0;

@@ -93,8 +105,11 @@ static int bpf_netns_link_update_prog(struct bpf_link *link,
		goto out_unlock;
	}

	run_array = rcu_dereference_protected(net->bpf.run_array[type],
					      lockdep_is_held(&netns_bpf_mutex));
	WRITE_ONCE(run_array->items[0].prog, new_prog);

	old_prog = xchg(&link->prog, new_prog);
	rcu_assign_pointer(net->bpf.progs[type], new_prog);
	bpf_prog_put(old_prog);

out_unlock:
@@ -142,14 +157,38 @@ static const struct bpf_link_ops bpf_netns_link_ops = {
	.show_fdinfo = bpf_netns_link_show_fdinfo,
};

/* Must be called with netns_bpf_mutex held. */
static int __netns_bpf_prog_query(const union bpf_attr *attr,
				  union bpf_attr __user *uattr,
				  struct net *net,
				  enum netns_bpf_attach_type type)
{
	__u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
	struct bpf_prog_array *run_array;
	u32 prog_cnt = 0, flags = 0;

	run_array = rcu_dereference_protected(net->bpf.run_array[type],
					      lockdep_is_held(&netns_bpf_mutex));
	if (run_array)
		prog_cnt = bpf_prog_array_length(run_array);

	if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)))
		return -EFAULT;
	if (copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt)))
		return -EFAULT;
	if (!attr->query.prog_cnt || !prog_ids || !prog_cnt)
		return 0;

	return bpf_prog_array_copy_to_user(run_array, prog_ids,
					   attr->query.prog_cnt);
}

int netns_bpf_prog_query(const union bpf_attr *attr,
			 union bpf_attr __user *uattr)
{
	__u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
	u32 prog_id, prog_cnt = 0, flags = 0;
	enum netns_bpf_attach_type type;
	struct bpf_prog *attached;
	struct net *net;
	int ret;

	if (attr->query.query_flags)
		return -EINVAL;
@@ -162,32 +201,17 @@ int netns_bpf_prog_query(const union bpf_attr *attr,
	if (IS_ERR(net))
		return PTR_ERR(net);

	rcu_read_lock();
	attached = rcu_dereference(net->bpf.progs[type]);
	if (attached) {
		prog_cnt = 1;
		prog_id = attached->aux->id;
	}
	rcu_read_unlock();
	mutex_lock(&netns_bpf_mutex);
	ret = __netns_bpf_prog_query(attr, uattr, net, type);
	mutex_unlock(&netns_bpf_mutex);

	put_net(net);

	if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)))
		return -EFAULT;
	if (copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt)))
		return -EFAULT;

	if (!attr->query.prog_cnt || !prog_ids || !prog_cnt)
		return 0;

	if (copy_to_user(prog_ids, &prog_id, sizeof(u32)))
		return -EFAULT;

	return 0;
	return ret;
}

int netns_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog)
{
	struct bpf_prog_array *run_array;
	enum netns_bpf_attach_type type;
	struct bpf_prog *attached;
	struct net *net;
@@ -217,14 +241,28 @@ int netns_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog)
	if (ret)
		goto out_unlock;

	attached = rcu_dereference_protected(net->bpf.progs[type],
					     lockdep_is_held(&netns_bpf_mutex));
	attached = net->bpf.progs[type];
	if (attached == prog) {
		/* The same program cannot be attached twice */
		ret = -EINVAL;
		goto out_unlock;
	}
	rcu_assign_pointer(net->bpf.progs[type], prog);

	run_array = rcu_dereference_protected(net->bpf.run_array[type],
					      lockdep_is_held(&netns_bpf_mutex));
	if (run_array) {
		WRITE_ONCE(run_array->items[0].prog, prog);
	} else {
		run_array = bpf_prog_array_alloc(1, GFP_KERNEL);
		if (!run_array) {
			ret = -ENOMEM;
			goto out_unlock;
		}
		run_array->items[0].prog = prog;
		rcu_assign_pointer(net->bpf.run_array[type], run_array);
	}

	net->bpf.progs[type] = prog;
	if (attached)
		bpf_prog_put(attached);

@@ -244,11 +282,11 @@ static int __netns_bpf_prog_detach(struct net *net,
	if (net->bpf.links[type])
		return -EINVAL;

	attached = rcu_dereference_protected(net->bpf.progs[type],
					     lockdep_is_held(&netns_bpf_mutex));
	attached = net->bpf.progs[type];
	if (!attached)
		return -ENOENT;
	RCU_INIT_POINTER(net->bpf.progs[type], NULL);
	netns_bpf_run_array_detach(net, type);
	net->bpf.progs[type] = NULL;
	bpf_prog_put(attached);
	return 0;
}
@@ -272,7 +310,7 @@ int netns_bpf_prog_detach(const union bpf_attr *attr)
static int netns_bpf_link_attach(struct net *net, struct bpf_link *link,
				 enum netns_bpf_attach_type type)
{
	struct bpf_prog *prog;
	struct bpf_prog_array *run_array;
	int err;

	mutex_lock(&netns_bpf_mutex);
@@ -283,9 +321,7 @@ static int netns_bpf_link_attach(struct net *net, struct bpf_link *link,
		goto out_unlock;
	}
	/* Links are not compatible with attaching prog directly */
	prog = rcu_dereference_protected(net->bpf.progs[type],
					 lockdep_is_held(&netns_bpf_mutex));
	if (prog) {
	if (net->bpf.progs[type]) {
		err = -EEXIST;
		goto out_unlock;
	}
@@ -301,7 +337,14 @@ static int netns_bpf_link_attach(struct net *net, struct bpf_link *link,
	if (err)
		goto out_unlock;

	rcu_assign_pointer(net->bpf.progs[type], link->prog);
	run_array = bpf_prog_array_alloc(1, GFP_KERNEL);
	if (!run_array) {
		err = -ENOMEM;
		goto out_unlock;
	}
	run_array->items[0].prog = link->prog;
	rcu_assign_pointer(net->bpf.run_array[type], run_array);

	net->bpf.links[type] = link;

out_unlock:
@@ -368,11 +411,12 @@ static void __net_exit netns_bpf_pernet_pre_exit(struct net *net)

	mutex_lock(&netns_bpf_mutex);
	for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++) {
		netns_bpf_run_array_detach(net, type);
		link = net->bpf.links[type];
		if (link)
			bpf_netns_link_auto_detach(link);
		else
			__netns_bpf_prog_detach(net, type);
		else if (net->bpf.progs[type])
			bpf_prog_put(net->bpf.progs[type]);
	}
	mutex_unlock(&netns_bpf_mutex);
}
+10 −9
Original line number Diff line number Diff line
@@ -86,14 +86,14 @@ int flow_dissector_bpf_prog_attach_check(struct net *net,
		for_each_net(ns) {
			if (ns == &init_net)
				continue;
			if (rcu_access_pointer(ns->bpf.progs[type]))
			if (rcu_access_pointer(ns->bpf.run_array[type]))
				return -EEXIST;
		}
	} else {
		/* Make sure root flow dissector is not attached
		 * when attaching to the non-root namespace.
		 */
		if (rcu_access_pointer(init_net.bpf.progs[type]))
		if (rcu_access_pointer(init_net.bpf.run_array[type]))
			return -EEXIST;
	}

@@ -894,7 +894,6 @@ bool __skb_flow_dissect(const struct net *net,
	struct flow_dissector_key_addrs *key_addrs;
	struct flow_dissector_key_tags *key_tags;
	struct flow_dissector_key_vlan *key_vlan;
	struct bpf_prog *attached = NULL;
	enum flow_dissect_ret fdret;
	enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX;
	bool mpls_el = false;
@@ -951,14 +950,14 @@ bool __skb_flow_dissect(const struct net *net,
	WARN_ON_ONCE(!net);
	if (net) {
		enum netns_bpf_attach_type type = NETNS_BPF_FLOW_DISSECTOR;
		struct bpf_prog_array *run_array;

		rcu_read_lock();
		attached = rcu_dereference(init_net.bpf.progs[type]);
		run_array = rcu_dereference(init_net.bpf.run_array[type]);
		if (!run_array)
			run_array = rcu_dereference(net->bpf.run_array[type]);

		if (!attached)
			attached = rcu_dereference(net->bpf.progs[type]);

		if (attached) {
		if (run_array) {
			struct bpf_flow_keys flow_keys;
			struct bpf_flow_dissector ctx = {
				.flow_keys = &flow_keys,
@@ -966,6 +965,7 @@ bool __skb_flow_dissect(const struct net *net,
				.data_end = data + hlen,
			};
			__be16 n_proto = proto;
			struct bpf_prog *prog;

			if (skb) {
				ctx.skb = skb;
@@ -976,7 +976,8 @@ bool __skb_flow_dissect(const struct net *net,
				n_proto = skb->protocol;
			}

			ret = bpf_flow_dissect(attached, &ctx, n_proto, nhoff,
			prog = READ_ONCE(run_array->items[0].prog);
			ret = bpf_flow_dissect(prog, &ctx, n_proto, nhoff,
					       hlen, flags);
			__skb_flow_bpf_to_target(&flow_keys, flow_dissector,
						 target_container);