Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf (41f57cfd) · Commits · 戴 / test

include/uapi/linux/bpf.h

+7 −9

Original line number	Original line	Diff line number	Diff line
	@@ -1045,9 +1045,9 @@ union bpf_attr {
	* supports redirection to the egress interface, and accepts no		* supports redirection to the egress interface, and accepts no
	* flag at all.		* flag at all.
	*		*
	* The same effect can be attained with the more generic		* The same effect can also be attained with the more generic
	* bpf_redirect_map\ (), which requires specific maps to be		* bpf_redirect_map\ (), which uses a BPF map to store the
	* used but offers better performance.		* redirect target instead of providing it directly to the helper.
	* Return		* Return
	* For XDP, the helper returns XDP_REDIRECT on success or		* For XDP, the helper returns XDP_REDIRECT on success or
	* XDP_ABORTED on error. For other program types, the values		* XDP_ABORTED on error. For other program types, the values
	@@ -1611,13 +1611,11 @@ union bpf_attr {
	* the caller. Any higher bits in the flags argument must be		* the caller. Any higher bits in the flags argument must be
	* unset.		* unset.
	*		*
	* When used to redirect packets to net devices, this helper		* See also bpf_redirect(), which only supports redirecting to an
	* provides a high performance increase over bpf_redirect\ ().		* ifindex, but doesn't require a map to do so.
	* This is due to various implementation details of the underlying
	* mechanisms, one of which is the fact that bpf_redirect_map\
	* () tries to send packet as a "bulk" to the device.
	* Return		* Return
	* XDP_REDIRECT on success, or XDP_ABORTED on error.		* XDP_REDIRECT on success, or the value of the two lower bits
			* of the *flags argument on error.
	*		*
	* int bpf_sk_redirect_map(struct sk_buff skb, struct bpf_map map, u32 key, u64 flags)		* int bpf_sk_redirect_map(struct sk_buff skb, struct bpf_map map, u32 key, u64 flags)
	* Description		* Description

kernel/bpf/btf.c

+3 −3

Original line number	Original line	Diff line number	Diff line
	@@ -4142,7 +4142,7 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
	* EFAULT - verifier bug		* EFAULT - verifier bug
	* 0 - 99% match. The last 1% is validated by the verifier.		* 0 - 99% match. The last 1% is validated by the verifier.
	*/		*/
	int btf_check_func_type_match(struct bpf_verifier_log *log,		static int btf_check_func_type_match(struct bpf_verifier_log *log,
	struct btf btf1, const struct btf_type t1,		struct btf btf1, const struct btf_type t1,
	struct btf btf2, const struct btf_type t2)		struct btf btf2, const struct btf_type t2)
	{		{

kernel/bpf/hashtab.c

+53 −5

Original line number	Original line	Diff line number	Diff line
	@@ -56,6 +56,7 @@ struct htab_elem {
	union {		union {
	struct bpf_htab *htab;		struct bpf_htab *htab;
	struct pcpu_freelist_node fnode;		struct pcpu_freelist_node fnode;
			struct htab_elem *batch_flink;
	};		};
	};		};
	};		};
	@@ -126,6 +127,17 @@ free_elems:
	bpf_map_area_free(htab->elems);		bpf_map_area_free(htab->elems);
	}		}

			/* The LRU list has a lock (lru_lock). Each htab bucket has a lock
			* (bucket_lock). If both locks need to be acquired together, the lock
			* order is always lru_lock -> bucket_lock and this only happens in
			* bpf_lru_list.c logic. For example, certain code path of
			* bpf_lru_pop_free(), which is called by function prealloc_lru_pop(),
			* will acquire lru_lock first followed by acquiring bucket_lock.
			*
			* In hashtab.c, to avoid deadlock, lock acquisition of
			* bucket_lock followed by lru_lock is not allowed. In such cases,
			* bucket_lock needs to be released first before acquiring lru_lock.
			*/
	static struct htab_elem prealloc_lru_pop(struct bpf_htab htab, void *key,		static struct htab_elem prealloc_lru_pop(struct bpf_htab htab, void *key,
	u32 hash)		u32 hash)
	{		{
	@@ -1256,10 +1268,12 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
	void __user *ukeys = u64_to_user_ptr(attr->batch.keys);		void __user *ukeys = u64_to_user_ptr(attr->batch.keys);
	void *ubatch = u64_to_user_ptr(attr->batch.in_batch);		void *ubatch = u64_to_user_ptr(attr->batch.in_batch);
	u32 batch, max_count, size, bucket_size;		u32 batch, max_count, size, bucket_size;
			struct htab_elem *node_to_free = NULL;
	u64 elem_map_flags, map_flags;		u64 elem_map_flags, map_flags;
	struct hlist_nulls_head *head;		struct hlist_nulls_head *head;
	struct hlist_nulls_node *n;		struct hlist_nulls_node *n;
	unsigned long flags;		unsigned long flags = 0;
			bool locked = false;
	struct htab_elem *l;		struct htab_elem *l;
	struct bucket *b;		struct bucket *b;
	int ret = 0;		int ret = 0;
	@@ -1319,15 +1333,25 @@ again_nocopy:
	dst_val = values;		dst_val = values;
	b = &htab->buckets[batch];		b = &htab->buckets[batch];
	head = &b->head;		head = &b->head;
			/* do not grab the lock unless need it (bucket_cnt > 0). */
			if (locked)
	raw_spin_lock_irqsave(&b->lock, flags);		raw_spin_lock_irqsave(&b->lock, flags);

	bucket_cnt = 0;		bucket_cnt = 0;
	hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)		hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
	bucket_cnt++;		bucket_cnt++;

			if (bucket_cnt && !locked) {
			locked = true;
			goto again_nocopy;
			}

	if (bucket_cnt > (max_count - total)) {		if (bucket_cnt > (max_count - total)) {
	if (total == 0)		if (total == 0)
	ret = -ENOSPC;		ret = -ENOSPC;
			/* Note that since bucket_cnt > 0 here, it is implicit
			* that the locked was grabbed, so release it.
			*/
	raw_spin_unlock_irqrestore(&b->lock, flags);		raw_spin_unlock_irqrestore(&b->lock, flags);
	rcu_read_unlock();		rcu_read_unlock();
	this_cpu_dec(bpf_prog_active);		this_cpu_dec(bpf_prog_active);
	@@ -1337,6 +1361,9 @@ again_nocopy:

	if (bucket_cnt > bucket_size) {		if (bucket_cnt > bucket_size) {
	bucket_size = bucket_cnt;		bucket_size = bucket_cnt;
			/* Note that since bucket_cnt > 0 here, it is implicit
			* that the locked was grabbed, so release it.
			*/
	raw_spin_unlock_irqrestore(&b->lock, flags);		raw_spin_unlock_irqrestore(&b->lock, flags);
	rcu_read_unlock();		rcu_read_unlock();
	this_cpu_dec(bpf_prog_active);		this_cpu_dec(bpf_prog_active);
	@@ -1346,6 +1373,10 @@ again_nocopy:
	goto alloc;		goto alloc;
	}		}

			/* Next block is only safe to run if you have grabbed the lock */
			if (!locked)
			goto next_batch;

	hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {		hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
	memcpy(dst_key, l->key, key_size);		memcpy(dst_key, l->key, key_size);

	@@ -1370,16 +1401,33 @@ again_nocopy:
	}		}
	if (do_delete) {		if (do_delete) {
	hlist_nulls_del_rcu(&l->hash_node);		hlist_nulls_del_rcu(&l->hash_node);
	if (is_lru_map)
	bpf_lru_push_free(&htab->lru, &l->lru_node);		/* bpf_lru_push_free() will acquire lru_lock, which
	else		* may cause deadlock. See comments in function
			* prealloc_lru_pop(). Let us do bpf_lru_push_free()
			* after releasing the bucket lock.
			*/
			if (is_lru_map) {
			l->batch_flink = node_to_free;
			node_to_free = l;
			} else {
	free_htab_elem(htab, l);		free_htab_elem(htab, l);
	}		}
			}
	dst_key += key_size;		dst_key += key_size;
	dst_val += value_size;		dst_val += value_size;
	}		}

	raw_spin_unlock_irqrestore(&b->lock, flags);		raw_spin_unlock_irqrestore(&b->lock, flags);
			locked = false;

			while (node_to_free) {
			l = node_to_free;
			node_to_free = node_to_free->batch_flink;
			bpf_lru_push_free(&htab->lru, &l->lru_node);
			}

			next_batch:
	/* If we are not copying data, we can go to next bucket and avoid		/* If we are not copying data, we can go to next bucket and avoid
	* unlocking the rcu.		* unlocking the rcu.
	*/		*/

kernel/bpf/offload.c

+1 −1

Original line number	Original line	Diff line number	Diff line
	@@ -321,7 +321,7 @@ int bpf_prog_offload_info_fill(struct bpf_prog_info *info,

	ulen = info->jited_prog_len;		ulen = info->jited_prog_len;
	info->jited_prog_len = aux->offload->jited_len;		info->jited_prog_len = aux->offload->jited_len;
	if (info->jited_prog_len & ulen) {		if (info->jited_prog_len && ulen) {
	uinsns = u64_to_user_ptr(info->jited_prog_insns);		uinsns = u64_to_user_ptr(info->jited_prog_insns);
	ulen = min_t(u32, info->jited_prog_len, ulen);		ulen = min_t(u32, info->jited_prog_len, ulen);
	if (copy_to_user(uinsns, aux->offload->jited_image, ulen)) {		if (copy_to_user(uinsns, aux->offload->jited_image, ulen)) {

net/xdp/xsk.c

+2 −0

Original line number	Original line	Diff line number	Diff line
	@@ -217,6 +217,7 @@ static int xsk_rcv(struct xdp_sock xs, struct xdp_buff xdp)
	static void xsk_flush(struct xdp_sock *xs)		static void xsk_flush(struct xdp_sock *xs)
	{		{
	xskq_prod_submit(xs->rx);		xskq_prod_submit(xs->rx);
			__xskq_cons_release(xs->umem->fq);
	sock_def_readable(&xs->sk);		sock_def_readable(&xs->sk);
	}		}

	@@ -304,6 +305,7 @@ void xsk_umem_consume_tx_done(struct xdp_umem *umem)

	rcu_read_lock();		rcu_read_lock();
	list_for_each_entry_rcu(xs, &umem->xsk_list, list) {		list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
			__xskq_cons_release(xs->tx);
	xs->sk.sk_write_space(&xs->sk);		xs->sk.sk_write_space(&xs->sk);
	}		}
	rcu_read_unlock();		rcu_read_unlock();

Admin message