Commit cf51abcd authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'Link-based-attach-to-netns'

Jakub Sitnicki says:

====================
One of the pieces of feedback from recent review of BPF hooks for socket
lookup [0] was that new program types should use bpf_link-based
attachment.

This series introduces new bpf_link type for attaching to network
namespace. All link operations are supported. Errors returned from ops
follow cgroup example. Patch 4 description goes into error semantics.

The major change in v2 is a switch away from RCU to mutex-only
synchronization. Andrii pointed out that it is not needed, and it makes
sense to keep locking straightforward.

Also, there were a couple of bugs in update_prog and fill_info initial
implementation, one picked up by kbuild. Those are now fixed. Tests have
been extended to cover them. Full changelog below.

Series is organized as so:

Patches 1-3 prepare a space in struct net to keep state for attached BPF
programs, and massage the code in flow_dissector to make it attach type
agnostic, to finally move it under kernel/bpf/.

Patch 4, the most important one, introduces new bpf_link link type for
attaching to network namespace.

Patch 5 unifies the update error (ENOLINK) between BPF cgroup and netns.

Patches 6-8 make libbpf and bpftool aware of the new link type.

Patches 9-12 Add and extend tests to check that link low- and high-level
API for operating on links to netns works as intended.

Thanks to Alexei, Andrii, Lorenz, Marek, and Stanislav for feedback.

-jkbs

[0] https://lore.kernel.org/bpf/20200511185218.1422406-1-jakub@cloudflare.com/



Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: Lorenz Bauer <lmb@cloudflare.com>
Cc: Marek Majkowski <marek@cloudflare.com>
Cc: Stanislav Fomichev <sdf@google.com>

v1 -> v2:

- Switch to mutex-only synchronization. Don't rely on RCU grace period
  guarantee when accessing struct net from link release / update /
  fill_info, and when accessing bpf_link from pernet pre_exit
  callback. (Andrii)
- Drop patch 1, no longer needed with mutex-only synchronization.
- Don't leak uninitialized variable contents from fill_info callback
  when link is in defunct state. (kbuild)
- Make fill_info treat the link as defunct (i.e. no attached netns) when
  struct net refcount is 0, but link has not been yet auto-detached.
- Add missing BPF_LINK_TYPE define in bpf_types.h for new link type.
- Fix link update_prog callback to update the prog that will run, and
  not just the link itself.
- Return EEXIST on prog attach when link already exists, and on link
  create when prog is already attached directly. (Andrii)
- Return EINVAL on prog detach when link is attached. (Andrii)
- Fold __netns_bpf_link_attach into its only caller. (Stanislav)
- Get rid of a wrapper around container_of() (Andrii)
- Use rcu_dereference_protected instead of rcu_access_pointer on
  update-side. (Stanislav)
- Make return-on-success from netns_bpf_link_create less
  confusing. (Andrii)
- Adapt bpf_link for cgroup to return ENOLINK when updating a defunct
  link. (Andrii, Alexei)
- Order new exported symbols in libbpf.map alphabetically (Andrii)
- Keep libbpf's "failed to attach link" warning message clear as to what
  we failed to attach to (cgroup vs netns). (Andrii)
- Extract helpers for printing link attach type. (bpftool, Andrii)
- Switch flow_dissector tests to BPF skeleton and extend them to
  exercise link-based flow dissector attachment. (Andrii)
- Harden flow dissector attachment tests with prog query checks after
  prog attach/detach, or link create/update/close.
- Extend flow dissector tests to cover fill_info for defunct links.
- Rebase onto recent bpf-next
====================

Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents febeb6df 06716e04
Loading
Loading
Loading
Loading
+64 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BPF_NETNS_H
#define _BPF_NETNS_H

#include <linux/mutex.h>
#include <uapi/linux/bpf.h>

enum netns_bpf_attach_type {
	NETNS_BPF_INVALID = -1,
	NETNS_BPF_FLOW_DISSECTOR = 0,
	MAX_NETNS_BPF_ATTACH_TYPE
};

static inline enum netns_bpf_attach_type
to_netns_bpf_attach_type(enum bpf_attach_type attach_type)
{
	switch (attach_type) {
	case BPF_FLOW_DISSECTOR:
		return NETNS_BPF_FLOW_DISSECTOR;
	default:
		return NETNS_BPF_INVALID;
	}
}

/* Protects updates to netns_bpf */
extern struct mutex netns_bpf_mutex;

union bpf_attr;
struct bpf_prog;

#ifdef CONFIG_NET
int netns_bpf_prog_query(const union bpf_attr *attr,
			 union bpf_attr __user *uattr);
int netns_bpf_prog_attach(const union bpf_attr *attr,
			  struct bpf_prog *prog);
int netns_bpf_prog_detach(const union bpf_attr *attr);
int netns_bpf_link_create(const union bpf_attr *attr,
			  struct bpf_prog *prog);
#else
static inline int netns_bpf_prog_query(const union bpf_attr *attr,
				       union bpf_attr __user *uattr)
{
	return -EOPNOTSUPP;
}

static inline int netns_bpf_prog_attach(const union bpf_attr *attr,
					struct bpf_prog *prog)
{
	return -EOPNOTSUPP;
}

static inline int netns_bpf_prog_detach(const union bpf_attr *attr)
{
	return -EOPNOTSUPP;
}

static inline int netns_bpf_link_create(const union bpf_attr *attr,
					struct bpf_prog *prog)
{
	return -EOPNOTSUPP;
}
#endif

#endif /* _BPF_NETNS_H */
+3 −0
Original line number Diff line number Diff line
@@ -126,3 +126,6 @@ BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing)
BPF_LINK_TYPE(BPF_LINK_TYPE_CGROUP, cgroup)
#endif
BPF_LINK_TYPE(BPF_LINK_TYPE_ITER, iter)
#ifdef CONFIG_NET
BPF_LINK_TYPE(BPF_LINK_TYPE_NETNS, netns)
#endif
+0 −26
Original line number Diff line number Diff line
@@ -1283,32 +1283,6 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
			     const struct flow_dissector_key *key,
			     unsigned int key_count);

#ifdef CONFIG_NET
int skb_flow_dissector_prog_query(const union bpf_attr *attr,
				  union bpf_attr __user *uattr);
int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr,
				       struct bpf_prog *prog);

int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr);
#else
static inline int skb_flow_dissector_prog_query(const union bpf_attr *attr,
						union bpf_attr __user *uattr)
{
	return -EOPNOTSUPP;
}

static inline int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr,
						     struct bpf_prog *prog)
{
	return -EOPNOTSUPP;
}

static inline int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr)
{
	return -EOPNOTSUPP;
}
#endif

struct bpf_flow_dissector;
bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
		      __be16 proto, int nhoff, int hlen, unsigned int flags);
+6 −0
Original line number Diff line number Diff line
@@ -8,6 +8,8 @@
#include <linux/string.h>
#include <uapi/linux/if_ether.h>

struct bpf_prog;
struct net;
struct sk_buff;

/**
@@ -369,4 +371,8 @@ flow_dissector_init_keys(struct flow_dissector_key_control *key_control,
	memset(key_basic, 0, sizeof(*key_basic));
}

#ifdef CONFIG_BPF_SYSCALL
int flow_dissector_bpf_prog_attach(struct net *net, struct bpf_prog *prog);
#endif /* CONFIG_BPF_SYSCALL */

#endif
+3 −1
Original line number Diff line number Diff line
@@ -33,6 +33,7 @@
#include <net/netns/mpls.h>
#include <net/netns/can.h>
#include <net/netns/xdp.h>
#include <net/netns/bpf.h>
#include <linux/ns_common.h>
#include <linux/idr.h>
#include <linux/skbuff.h>
@@ -162,7 +163,8 @@ struct net {
#endif
	struct net_generic __rcu	*gen;

	struct bpf_prog __rcu	*flow_dissector_prog;
	/* Used to store attached BPF programs */
	struct netns_bpf	bpf;

	/* Note : following structs are cache line aligned */
#ifdef CONFIG_XFRM
Loading