Commit 9f0ca0c1 authored by David S. Miller's avatar David S. Miller
Browse files


Alexei Starovoitov says:

====================
pull-request: bpf-next 2020-02-28

The following pull-request contains BPF updates for your *net-next* tree.

We've added 41 non-merge commits during the last 7 day(s) which contain
a total of 49 files changed, 1383 insertions(+), 499 deletions(-).

The main changes are:

1) BPF and Real-Time nicely co-exist.

2) bpftool feature improvements.

3) retrieve bpf_sk_storage via INET_DIAG.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 9a834f9b 812285fa
Loading
Loading
Loading
Loading
+34 −22
Original line number Diff line number Diff line
@@ -371,7 +371,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
				   struct receive_queue *rq,
				   struct page *page, unsigned int offset,
				   unsigned int len, unsigned int truesize,
				   bool hdr_valid)
				   bool hdr_valid, unsigned int metasize)
{
	struct sk_buff *skb;
	struct virtio_net_hdr_mrg_rxbuf *hdr;
@@ -393,6 +393,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
	else
		hdr_padded_len = sizeof(struct padded_vnet_hdr);

	/* hdr_valid means no XDP, so we can copy the vnet header */
	if (hdr_valid)
		memcpy(hdr, p, hdr_len);

@@ -405,6 +406,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
		copy = skb_tailroom(skb);
	skb_put_data(skb, p, copy);

	if (metasize) {
		__skb_pull(skb, metasize);
		skb_metadata_set(skb, metasize);
	}

	len -= copy;
	offset += copy;

@@ -450,10 +456,6 @@ static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
	struct virtio_net_hdr_mrg_rxbuf *hdr;
	int err;

	/* virtqueue want to use data area in-front of packet */
	if (unlikely(xdpf->metasize > 0))
		return -EOPNOTSUPP;

	if (unlikely(xdpf->headroom < vi->hdr_len))
		return -EOVERFLOW;

@@ -644,6 +646,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
	unsigned int delta = 0;
	struct page *xdp_page;
	int err;
	unsigned int metasize = 0;

	len -= vi->hdr_len;
	stats->bytes += len;
@@ -683,8 +686,8 @@ static struct sk_buff *receive_small(struct net_device *dev,

		xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len;
		xdp.data = xdp.data_hard_start + xdp_headroom;
		xdp_set_data_meta_invalid(&xdp);
		xdp.data_end = xdp.data + len;
		xdp.data_meta = xdp.data;
		xdp.rxq = &rq->xdp_rxq;
		orig_data = xdp.data;
		act = bpf_prog_run_xdp(xdp_prog, &xdp);
@@ -695,6 +698,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
			/* Recalculate length in case bpf program changed it */
			delta = orig_data - xdp.data;
			len = xdp.data_end - xdp.data;
			metasize = xdp.data - xdp.data_meta;
			break;
		case XDP_TX:
			stats->xdp_tx++;
@@ -735,10 +739,13 @@ static struct sk_buff *receive_small(struct net_device *dev,
	}
	skb_reserve(skb, headroom - delta);
	skb_put(skb, len);
	if (!delta) {
	if (!xdp_prog) {
		buf += header_offset;
		memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len);
	} /* keep zeroed vnet hdr since packet was changed by bpf */
	} /* keep zeroed vnet hdr since XDP is loaded */

	if (metasize)
		skb_metadata_set(skb, metasize);

err:
	return skb;
@@ -760,8 +767,8 @@ static struct sk_buff *receive_big(struct net_device *dev,
				   struct virtnet_rq_stats *stats)
{
	struct page *page = buf;
	struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len,
					  PAGE_SIZE, true);
	struct sk_buff *skb =
		page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, true, 0);

	stats->bytes += len - vi->hdr_len;
	if (unlikely(!skb))
@@ -793,6 +800,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
	unsigned int truesize;
	unsigned int headroom = mergeable_ctx_to_headroom(ctx);
	int err;
	unsigned int metasize = 0;

	head_skb = NULL;
	stats->bytes += len - vi->hdr_len;
@@ -839,8 +847,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
		data = page_address(xdp_page) + offset;
		xdp.data_hard_start = data - VIRTIO_XDP_HEADROOM + vi->hdr_len;
		xdp.data = data + vi->hdr_len;
		xdp_set_data_meta_invalid(&xdp);
		xdp.data_end = xdp.data + (len - vi->hdr_len);
		xdp.data_meta = xdp.data;
		xdp.rxq = &rq->xdp_rxq;

		act = bpf_prog_run_xdp(xdp_prog, &xdp);
@@ -848,24 +856,27 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,

		switch (act) {
		case XDP_PASS:
			metasize = xdp.data - xdp.data_meta;

			/* recalculate offset to account for any header
			 * adjustments. Note other cases do not build an
			 * skb and avoid using offset
			 * adjustments and minus the metasize to copy the
			 * metadata in page_to_skb(). Note other cases do not
			 * build an skb and avoid using offset
			 */
			offset = xdp.data -
					page_address(xdp_page) - vi->hdr_len;
			offset = xdp.data - page_address(xdp_page) -
				 vi->hdr_len - metasize;

			/* recalculate len if xdp.data or xdp.data_end were
			 * adjusted
			/* recalculate len if xdp.data, xdp.data_end or
			 * xdp.data_meta were adjusted
			 */
			len = xdp.data_end - xdp.data + vi->hdr_len;
			len = xdp.data_end - xdp.data + vi->hdr_len + metasize;
			/* We can only create skb based on xdp_page. */
			if (unlikely(xdp_page != page)) {
				rcu_read_unlock();
				put_page(page);
				head_skb = page_to_skb(vi, rq, xdp_page,
						       offset, len,
						       PAGE_SIZE, false);
				head_skb = page_to_skb(vi, rq, xdp_page, offset,
						       len, PAGE_SIZE, false,
						       metasize);
				return head_skb;
			}
			break;
@@ -921,7 +932,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
		goto err_skb;
	}

	head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog);
	head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog,
			       metasize);
	curr_skb = head_skb;

	if (unlikely(!curr_skb))
+1 −1
Original line number Diff line number Diff line
@@ -36,7 +36,7 @@ struct bpf_cgroup_storage_map;

struct bpf_storage_buffer {
	struct rcu_head rcu;
	char data[0];
	char data[];
};

struct bpf_cgroup_storage {
+36 −5
Original line number Diff line number Diff line
@@ -859,7 +859,7 @@ struct bpf_prog_array_item {

struct bpf_prog_array {
	struct rcu_head rcu;
	struct bpf_prog_array_item items[0];
	struct bpf_prog_array_item items[];
};

struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
@@ -885,7 +885,7 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
		struct bpf_prog *_prog;			\
		struct bpf_prog_array *_array;		\
		u32 _ret = 1;				\
		preempt_disable();			\
		migrate_disable();			\
		rcu_read_lock();			\
		_array = rcu_dereference(array);	\
		if (unlikely(check_non_null && !_array))\
@@ -898,7 +898,7 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
		}					\
_out:							\
		rcu_read_unlock();			\
		preempt_enable();			\
		migrate_enable();			\
		_ret;					\
	 })

@@ -932,7 +932,7 @@ _out: \
		u32 ret;				\
		u32 _ret = 1;				\
		u32 _cn = 0;				\
		preempt_disable();			\
		migrate_disable();			\
		rcu_read_lock();			\
		_array = rcu_dereference(array);	\
		_item = &_array->items[0];		\
@@ -944,7 +944,7 @@ _out: \
			_item++;			\
		}					\
		rcu_read_unlock();			\
		preempt_enable();			\
		migrate_enable();			\
		if (_ret)				\
			_ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS);	\
		else					\
@@ -961,6 +961,36 @@ _out: \
#ifdef CONFIG_BPF_SYSCALL
DECLARE_PER_CPU(int, bpf_prog_active);

/*
 * Block execution of BPF programs attached to instrumentation (perf,
 * kprobes, tracepoints) to prevent deadlocks on map operations as any of
 * these events can happen inside a region which holds a map bucket lock
 * and can deadlock on it.
 *
 * Use the preemption safe inc/dec variants on RT because migrate disable
 * is preemptible on RT and preemption in the middle of the RMW operation
 * might lead to inconsistent state. Use the raw variants for non RT
 * kernels as migrate_disable() maps to preempt_disable() so the slightly
 * more expensive save operation can be avoided.
 */
static inline void bpf_disable_instrumentation(void)
{
	migrate_disable();
	if (IS_ENABLED(CONFIG_PREEMPT_RT))
		this_cpu_inc(bpf_prog_active);
	else
		__this_cpu_inc(bpf_prog_active);
}

static inline void bpf_enable_instrumentation(void)
{
	if (IS_ENABLED(CONFIG_PREEMPT_RT))
		this_cpu_dec(bpf_prog_active);
	else
		__this_cpu_dec(bpf_prog_active);
	migrate_enable();
}

extern const struct file_operations bpf_map_fops;
extern const struct file_operations bpf_prog_fops;

@@ -993,6 +1023,7 @@ void __bpf_free_used_maps(struct bpf_prog_aux *aux,
void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock);
void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock);

struct bpf_map *bpf_map_get(u32 ufd);
struct bpf_map *bpf_map_get_with_uref(u32 ufd);
struct bpf_map *__bpf_map_get(struct fd f);
void bpf_map_inc(struct bpf_map *map);
+29 −8
Original line number Diff line number Diff line
@@ -561,7 +561,7 @@ DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key);

#define __BPF_PROG_RUN(prog, ctx, dfunc)	({			\
	u32 ret;							\
	cant_sleep();							\
	cant_migrate();							\
	if (static_branch_unlikely(&bpf_stats_enabled_key)) {		\
		struct bpf_prog_stats *stats;				\
		u64 start = sched_clock();				\
@@ -576,8 +576,30 @@ DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
	}								\
	ret; })

#define BPF_PROG_RUN(prog, ctx) __BPF_PROG_RUN(prog, ctx,		\
					       bpf_dispatcher_nopfunc)
#define BPF_PROG_RUN(prog, ctx)						\
	__BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nopfunc)

/*
 * Use in preemptible and therefore migratable context to make sure that
 * the execution of the BPF program runs on one CPU.
 *
 * This uses migrate_disable/enable() explicitly to document that the
 * invocation of a BPF program does not require reentrancy protection
 * against a BPF program which is invoked from a preempting task.
 *
 * For non RT enabled kernels migrate_disable/enable() maps to
 * preempt_disable/enable(), i.e. it disables also preemption.
 */
static inline u32 bpf_prog_run_pin_on_cpu(const struct bpf_prog *prog,
					  const void *ctx)
{
	u32 ret;

	migrate_disable();
	ret = __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nopfunc);
	migrate_enable();
	return ret;
}

#define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN

@@ -655,6 +677,7 @@ static inline u8 *bpf_skb_cb(struct sk_buff *skb)
	return qdisc_skb_cb(skb)->data;
}

/* Must be invoked with migration disabled */
static inline u32 __bpf_prog_run_save_cb(const struct bpf_prog *prog,
					 struct sk_buff *skb)
{
@@ -680,9 +703,9 @@ static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog,
{
	u32 res;

	preempt_disable();
	migrate_disable();
	res = __bpf_prog_run_save_cb(prog, skb);
	preempt_enable();
	migrate_enable();
	return res;
}

@@ -695,9 +718,7 @@ static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog,
	if (unlikely(prog->cb_access))
		memset(cb_data, 0, BPF_SKB_CB_LEN);

	preempt_disable();
	res = BPF_PROG_RUN(prog, skb);
	preempt_enable();
	res = bpf_prog_run_pin_on_cpu(prog, skb);
	return res;
}

+16 −11
Original line number Diff line number Diff line
@@ -15,11 +15,9 @@ struct netlink_callback;
struct inet_diag_handler {
	void		(*dump)(struct sk_buff *skb,
				struct netlink_callback *cb,
				const struct inet_diag_req_v2 *r,
				struct nlattr *bc);
				const struct inet_diag_req_v2 *r);

	int		(*dump_one)(struct sk_buff *in_skb,
				    const struct nlmsghdr *nlh,
	int		(*dump_one)(struct netlink_callback *cb,
				    const struct inet_diag_req_v2 *req);

	void		(*idiag_get_info)(struct sock *sk,
@@ -40,18 +38,25 @@ struct inet_diag_handler {
	__u16		idiag_info_size;
};

struct bpf_sk_storage_diag;
struct inet_diag_dump_data {
	struct nlattr *req_nlas[__INET_DIAG_REQ_MAX];
#define inet_diag_nla_bc req_nlas[INET_DIAG_REQ_BYTECODE]
#define inet_diag_nla_bpf_stgs req_nlas[INET_DIAG_REQ_SK_BPF_STORAGES]

	struct bpf_sk_storage_diag *bpf_stg_diag;
};

struct inet_connection_sock;
int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
		      struct sk_buff *skb, const struct inet_diag_req_v2 *req,
		      struct user_namespace *user_ns,
		      u32 pid, u32 seq, u16 nlmsg_flags,
		      const struct nlmsghdr *unlh, bool net_admin);
		      struct sk_buff *skb, struct netlink_callback *cb,
		      const struct inet_diag_req_v2 *req,
		      u16 nlmsg_flags, bool net_admin);
void inet_diag_dump_icsk(struct inet_hashinfo *h, struct sk_buff *skb,
			 struct netlink_callback *cb,
			 const struct inet_diag_req_v2 *r,
			 struct nlattr *bc);
			 const struct inet_diag_req_v2 *r);
int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
			    struct sk_buff *in_skb, const struct nlmsghdr *nlh,
			    struct netlink_callback *cb,
			    const struct inet_diag_req_v2 *req);

struct sock *inet_diag_find_one_icsk(struct net *net,
Loading