Commit 02c558b2 authored by John Fastabend's avatar John Fastabend Committed by Daniel Borkmann
Browse files

bpf: sockmap, support for msg_peek in sk_msg with redirect ingress



This adds support for the MSG_PEEK flag when doing redirect to ingress
and receiving on the sk_msg psock queue. Previously the flag was
being ignored which could confuse applications if they expected the
flag to work as normal.

Signed-off-by: default avatarJohn Fastabend <john.fastabend@gmail.com>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parent 8734a162
Loading
Loading
Loading
Loading
+1 −1
Original line number Original line Diff line number Diff line
@@ -2089,7 +2089,7 @@ int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes,
int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
		    int nonblock, int flags, int *addr_len);
		    int nonblock, int flags, int *addr_len);
int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
		      struct msghdr *msg, int len);
		      struct msghdr *msg, int len, int flags);


/* Call BPF_SOCK_OPS program that returns an int. If the return value
/* Call BPF_SOCK_OPS program that returns an int. If the return value
 * is < 0, then the BPF op failed (for example if the loaded BPF
 * is < 0, then the BPF op failed (for example if the loaded BPF
+27 −15
Original line number Original line Diff line number Diff line
@@ -39,17 +39,19 @@ static int tcp_bpf_wait_data(struct sock *sk, struct sk_psock *psock,
}
}


int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
		      struct msghdr *msg, int len)
		      struct msghdr *msg, int len, int flags)
{
{
	struct iov_iter *iter = &msg->msg_iter;
	struct iov_iter *iter = &msg->msg_iter;
	int peek = flags & MSG_PEEK;
	int i, ret, copied = 0;
	int i, ret, copied = 0;

	while (copied != len) {
		struct scatterlist *sge;
	struct sk_msg *msg_rx;
	struct sk_msg *msg_rx;


	msg_rx = list_first_entry_or_null(&psock->ingress_msg,
	msg_rx = list_first_entry_or_null(&psock->ingress_msg,
					  struct sk_msg, list);
					  struct sk_msg, list);

	while (copied != len) {
		struct scatterlist *sge;

		if (unlikely(!msg_rx))
		if (unlikely(!msg_rx))
			break;
			break;


@@ -70,22 +72,30 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
			}
			}


			copied += copy;
			copied += copy;
			if (likely(!peek)) {
				sge->offset += copy;
				sge->offset += copy;
				sge->length -= copy;
				sge->length -= copy;
				sk_mem_uncharge(sk, copy);
				sk_mem_uncharge(sk, copy);
				msg_rx->sg.size -= copy;
				msg_rx->sg.size -= copy;

				if (!sge->length) {
				if (!sge->length) {
				i++;
					sk_msg_iter_var_next(i);
				if (i == MAX_SKB_FRAGS)
					i = 0;
					if (!msg_rx->skb)
					if (!msg_rx->skb)
						put_page(page);
						put_page(page);
				}
				}
			} else {
				sk_msg_iter_var_next(i);
			}


			if (copied == len)
			if (copied == len)
				break;
				break;
		} while (i != msg_rx->sg.end);
		} while (i != msg_rx->sg.end);


		if (unlikely(peek)) {
			msg_rx = list_next_entry(msg_rx, list);
			continue;
		}

		msg_rx->sg.start = i;
		msg_rx->sg.start = i;
		if (!sge->length && msg_rx->sg.start == msg_rx->sg.end) {
		if (!sge->length && msg_rx->sg.start == msg_rx->sg.end) {
			list_del(&msg_rx->list);
			list_del(&msg_rx->list);
@@ -93,6 +103,8 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
				consume_skb(msg_rx->skb);
				consume_skb(msg_rx->skb);
			kfree(msg_rx);
			kfree(msg_rx);
		}
		}
		msg_rx = list_first_entry_or_null(&psock->ingress_msg,
						  struct sk_msg, list);
	}
	}


	return copied;
	return copied;
@@ -115,7 +127,7 @@ int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
		return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
		return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
	lock_sock(sk);
	lock_sock(sk);
msg_bytes_ready:
msg_bytes_ready:
	copied = __tcp_bpf_recvmsg(sk, psock, msg, len);
	copied = __tcp_bpf_recvmsg(sk, psock, msg, len, flags);
	if (!copied) {
	if (!copied) {
		int data, err = 0;
		int data, err = 0;
		long timeo;
		long timeo;
+2 −1
Original line number Original line Diff line number Diff line
@@ -1478,7 +1478,8 @@ int tls_sw_recvmsg(struct sock *sk,
		skb = tls_wait_data(sk, psock, flags, timeo, &err);
		skb = tls_wait_data(sk, psock, flags, timeo, &err);
		if (!skb) {
		if (!skb) {
			if (psock) {
			if (psock) {
				int ret = __tcp_bpf_recvmsg(sk, psock, msg, len);
				int ret = __tcp_bpf_recvmsg(sk, psock,
							    msg, len, flags);


				if (ret > 0) {
				if (ret > 0) {
					copied += ret;
					copied += ret;