Commit 36cd0e69 authored by John Fastabend's avatar John Fastabend Committed by Daniel Borkmann
Browse files

bpf, sockmap: Ensure SO_RCVBUF memory is observed on ingress redirect



Fix sockmap sk_skb programs so that they observe sk_rcvbuf limits. This
allows users to tune SO_RCVBUF and sockmap will honor them.

We can refactor the if(charge) case out in later patches. But, keep this
fix to the point.

Fixes: 51199405 ("bpf: skb_verdict, support SK_PASS on RX BPF path")
Suggested-by: default avatarJakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: default avatarJohn Fastabend <john.fastabend@gmail.com>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Reviewed-by: default avatarJakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/160556568657.73229.8404601585878439060.stgit@john-XPS-13-9370
parent c9c89dcd
Loading
Loading
Loading
Loading
+16 −4
Original line number Diff line number Diff line
@@ -170,10 +170,12 @@ static int sk_msg_free_elem(struct sock *sk, struct sk_msg *msg, u32 i,
	struct scatterlist *sge = sk_msg_elem(msg, i);
	u32 len = sge->length;

	/* When the skb owns the memory we free it from consume_skb path. */
	if (!msg->skb) {
		if (charge)
			sk_mem_uncharge(sk, len);
	if (!msg->skb)
		put_page(sg_page(sge));
	}
	memset(sge, 0, sizeof(*sge));
	return len;
}
@@ -403,6 +405,9 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
	int copied = 0, num_sge;
	struct sk_msg *msg;

	if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
		return -EAGAIN;

	msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC);
	if (unlikely(!msg))
		return -EAGAIN;
@@ -418,7 +423,14 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
		return num_sge;
	}

	sk_mem_charge(sk, skb->len);
	/* This will transition ownership of the data from the socket where
	 * the BPF program was run initiating the redirect to the socket
	 * we will eventually receive this data on. The data will be released
	 * from skb_consume found in __tcp_bpf_recvmsg() after its been copied
	 * into user buffers.
	 */
	skb_set_owner_r(skb, sk);

	copied = skb->len;
	msg->sg.start = 0;
	msg->sg.size = copied;
+2 −1
Original line number Diff line number Diff line
@@ -45,6 +45,7 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
			if (likely(!peek)) {
				sge->offset += copy;
				sge->length -= copy;
				if (!msg_rx->skb)
					sk_mem_uncharge(sk, copy);
				msg_rx->sg.size -= copy;