Commit 41f57cfd authored by David S. Miller's avatar David S. Miller
Browse files


Alexei Starovoitov says:

====================
pull-request: bpf 2020-02-19

The following pull-request contains BPF updates for your *net* tree.

We've added 10 non-merge commits during the last 10 day(s) which contain
a total of 10 files changed, 93 insertions(+), 31 deletions(-).

The main changes are:

1) batched bpf hashtab fixes from Brian and Yonghong.

2) various selftests and libbpf fixes.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents fca07a93 b9aff38d
Loading
Loading
Loading
Loading
+7 −9
Original line number Original line Diff line number Diff line
@@ -1045,9 +1045,9 @@ union bpf_attr {
 * 		supports redirection to the egress interface, and accepts no
 * 		supports redirection to the egress interface, and accepts no
 * 		flag at all.
 * 		flag at all.
 *
 *
 * 		The same effect can be attained with the more generic
 * 		The same effect can also be attained with the more generic
 * 		**bpf_redirect_map**\ (), which requires specific maps to be
 * 		**bpf_redirect_map**\ (), which uses a BPF map to store the
 * 		used but offers better performance.
 * 		redirect target instead of providing it directly to the helper.
 * 	Return
 * 	Return
 * 		For XDP, the helper returns **XDP_REDIRECT** on success or
 * 		For XDP, the helper returns **XDP_REDIRECT** on success or
 * 		**XDP_ABORTED** on error. For other program types, the values
 * 		**XDP_ABORTED** on error. For other program types, the values
@@ -1611,13 +1611,11 @@ union bpf_attr {
 * 		the caller. Any higher bits in the *flags* argument must be
 * 		the caller. Any higher bits in the *flags* argument must be
 * 		unset.
 * 		unset.
 *
 *
 * 		When used to redirect packets to net devices, this helper
 * 		See also bpf_redirect(), which only supports redirecting to an
 * 		provides a high performance increase over **bpf_redirect**\ ().
 * 		ifindex, but doesn't require a map to do so.
 * 		This is due to various implementation details of the underlying
 * 		mechanisms, one of which is the fact that **bpf_redirect_map**\
 * 		() tries to send packet as a "bulk" to the device.
 * 	Return
 * 	Return
 * 		**XDP_REDIRECT** on success, or **XDP_ABORTED** on error.
 * 		**XDP_REDIRECT** on success, or the value of the two lower bits
 * 		of the **flags* argument on error.
 *
 *
 * int bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags)
 * int bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags)
 * 	Description
 * 	Description
+3 −3
Original line number Original line Diff line number Diff line
@@ -4142,7 +4142,7 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
 * EFAULT - verifier bug
 * EFAULT - verifier bug
 * 0 - 99% match. The last 1% is validated by the verifier.
 * 0 - 99% match. The last 1% is validated by the verifier.
 */
 */
int btf_check_func_type_match(struct bpf_verifier_log *log,
static int btf_check_func_type_match(struct bpf_verifier_log *log,
				     struct btf *btf1, const struct btf_type *t1,
				     struct btf *btf1, const struct btf_type *t1,
				     struct btf *btf2, const struct btf_type *t2)
				     struct btf *btf2, const struct btf_type *t2)
{
{
+53 −5
Original line number Original line Diff line number Diff line
@@ -56,6 +56,7 @@ struct htab_elem {
			union {
			union {
				struct bpf_htab *htab;
				struct bpf_htab *htab;
				struct pcpu_freelist_node fnode;
				struct pcpu_freelist_node fnode;
				struct htab_elem *batch_flink;
			};
			};
		};
		};
	};
	};
@@ -126,6 +127,17 @@ free_elems:
	bpf_map_area_free(htab->elems);
	bpf_map_area_free(htab->elems);
}
}


/* The LRU list has a lock (lru_lock). Each htab bucket has a lock
 * (bucket_lock). If both locks need to be acquired together, the lock
 * order is always lru_lock -> bucket_lock and this only happens in
 * bpf_lru_list.c logic. For example, certain code path of
 * bpf_lru_pop_free(), which is called by function prealloc_lru_pop(),
 * will acquire lru_lock first followed by acquiring bucket_lock.
 *
 * In hashtab.c, to avoid deadlock, lock acquisition of
 * bucket_lock followed by lru_lock is not allowed. In such cases,
 * bucket_lock needs to be released first before acquiring lru_lock.
 */
static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key,
static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key,
					  u32 hash)
					  u32 hash)
{
{
@@ -1256,10 +1268,12 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
	void __user *ukeys = u64_to_user_ptr(attr->batch.keys);
	void __user *ukeys = u64_to_user_ptr(attr->batch.keys);
	void *ubatch = u64_to_user_ptr(attr->batch.in_batch);
	void *ubatch = u64_to_user_ptr(attr->batch.in_batch);
	u32 batch, max_count, size, bucket_size;
	u32 batch, max_count, size, bucket_size;
	struct htab_elem *node_to_free = NULL;
	u64 elem_map_flags, map_flags;
	u64 elem_map_flags, map_flags;
	struct hlist_nulls_head *head;
	struct hlist_nulls_head *head;
	struct hlist_nulls_node *n;
	struct hlist_nulls_node *n;
	unsigned long flags;
	unsigned long flags = 0;
	bool locked = false;
	struct htab_elem *l;
	struct htab_elem *l;
	struct bucket *b;
	struct bucket *b;
	int ret = 0;
	int ret = 0;
@@ -1319,15 +1333,25 @@ again_nocopy:
	dst_val = values;
	dst_val = values;
	b = &htab->buckets[batch];
	b = &htab->buckets[batch];
	head = &b->head;
	head = &b->head;
	/* do not grab the lock unless need it (bucket_cnt > 0). */
	if (locked)
		raw_spin_lock_irqsave(&b->lock, flags);
		raw_spin_lock_irqsave(&b->lock, flags);


	bucket_cnt = 0;
	bucket_cnt = 0;
	hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
	hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
		bucket_cnt++;
		bucket_cnt++;


	if (bucket_cnt && !locked) {
		locked = true;
		goto again_nocopy;
	}

	if (bucket_cnt > (max_count - total)) {
	if (bucket_cnt > (max_count - total)) {
		if (total == 0)
		if (total == 0)
			ret = -ENOSPC;
			ret = -ENOSPC;
		/* Note that since bucket_cnt > 0 here, it is implicit
		 * that the locked was grabbed, so release it.
		 */
		raw_spin_unlock_irqrestore(&b->lock, flags);
		raw_spin_unlock_irqrestore(&b->lock, flags);
		rcu_read_unlock();
		rcu_read_unlock();
		this_cpu_dec(bpf_prog_active);
		this_cpu_dec(bpf_prog_active);
@@ -1337,6 +1361,9 @@ again_nocopy:


	if (bucket_cnt > bucket_size) {
	if (bucket_cnt > bucket_size) {
		bucket_size = bucket_cnt;
		bucket_size = bucket_cnt;
		/* Note that since bucket_cnt > 0 here, it is implicit
		 * that the locked was grabbed, so release it.
		 */
		raw_spin_unlock_irqrestore(&b->lock, flags);
		raw_spin_unlock_irqrestore(&b->lock, flags);
		rcu_read_unlock();
		rcu_read_unlock();
		this_cpu_dec(bpf_prog_active);
		this_cpu_dec(bpf_prog_active);
@@ -1346,6 +1373,10 @@ again_nocopy:
		goto alloc;
		goto alloc;
	}
	}


	/* Next block is only safe to run if you have grabbed the lock */
	if (!locked)
		goto next_batch;

	hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
	hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
		memcpy(dst_key, l->key, key_size);
		memcpy(dst_key, l->key, key_size);


@@ -1370,16 +1401,33 @@ again_nocopy:
		}
		}
		if (do_delete) {
		if (do_delete) {
			hlist_nulls_del_rcu(&l->hash_node);
			hlist_nulls_del_rcu(&l->hash_node);
			if (is_lru_map)

				bpf_lru_push_free(&htab->lru, &l->lru_node);
			/* bpf_lru_push_free() will acquire lru_lock, which
			else
			 * may cause deadlock. See comments in function
			 * prealloc_lru_pop(). Let us do bpf_lru_push_free()
			 * after releasing the bucket lock.
			 */
			if (is_lru_map) {
				l->batch_flink = node_to_free;
				node_to_free = l;
			} else {
				free_htab_elem(htab, l);
				free_htab_elem(htab, l);
			}
			}
		}
		dst_key += key_size;
		dst_key += key_size;
		dst_val += value_size;
		dst_val += value_size;
	}
	}


	raw_spin_unlock_irqrestore(&b->lock, flags);
	raw_spin_unlock_irqrestore(&b->lock, flags);
	locked = false;

	while (node_to_free) {
		l = node_to_free;
		node_to_free = node_to_free->batch_flink;
		bpf_lru_push_free(&htab->lru, &l->lru_node);
	}

next_batch:
	/* If we are not copying data, we can go to next bucket and avoid
	/* If we are not copying data, we can go to next bucket and avoid
	 * unlocking the rcu.
	 * unlocking the rcu.
	 */
	 */
+1 −1
Original line number Original line Diff line number Diff line
@@ -321,7 +321,7 @@ int bpf_prog_offload_info_fill(struct bpf_prog_info *info,


	ulen = info->jited_prog_len;
	ulen = info->jited_prog_len;
	info->jited_prog_len = aux->offload->jited_len;
	info->jited_prog_len = aux->offload->jited_len;
	if (info->jited_prog_len & ulen) {
	if (info->jited_prog_len && ulen) {
		uinsns = u64_to_user_ptr(info->jited_prog_insns);
		uinsns = u64_to_user_ptr(info->jited_prog_insns);
		ulen = min_t(u32, info->jited_prog_len, ulen);
		ulen = min_t(u32, info->jited_prog_len, ulen);
		if (copy_to_user(uinsns, aux->offload->jited_image, ulen)) {
		if (copy_to_user(uinsns, aux->offload->jited_image, ulen)) {
+2 −0
Original line number Original line Diff line number Diff line
@@ -217,6 +217,7 @@ static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
static void xsk_flush(struct xdp_sock *xs)
static void xsk_flush(struct xdp_sock *xs)
{
{
	xskq_prod_submit(xs->rx);
	xskq_prod_submit(xs->rx);
	__xskq_cons_release(xs->umem->fq);
	sock_def_readable(&xs->sk);
	sock_def_readable(&xs->sk);
}
}


@@ -304,6 +305,7 @@ void xsk_umem_consume_tx_done(struct xdp_umem *umem)


	rcu_read_lock();
	rcu_read_lock();
	list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
	list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
		__xskq_cons_release(xs->tx);
		xs->sk.sk_write_space(&xs->sk);
		xs->sk.sk_write_space(&xs->sk);
	}
	}
	rcu_read_unlock();
	rcu_read_unlock();
Loading