Commit 1891c4a0 authored by Florian Westphal's avatar Florian Westphal Committed by David S. Miller
Browse files

mptcp: add subflow write space signalling and mptcp_poll



Add new SEND_SPACE flag to indicate that a subflow has enough space to
accept more data for transmission.

It gets cleared at the end of mptcp_sendmsg() in case ssk has run
below the free watermark.

It is (re-set) from the wspace callback.

This allows us to use msk->flags to determine the poll mask.

Co-developed-by: default avatarPeter Krystad <peter.krystad@linux.intel.com>
Signed-off-by: default avatarPeter Krystad <peter.krystad@linux.intel.com>
Signed-off-by: default avatarFlorian Westphal <fw@strlen.de>
Signed-off-by: default avatarChristoph Paasch <cpaasch@apple.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 648ef4b8
Loading
Loading
Loading
Loading
+53 −0
Original line number Diff line number Diff line
@@ -176,6 +176,23 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
	return ret;
}

static void ssk_check_wmem(struct mptcp_sock *msk, struct sock *ssk)
{
	struct socket *sock;

	if (likely(sk_stream_is_writeable(ssk)))
		return;

	sock = READ_ONCE(ssk->sk_socket);

	if (sock) {
		clear_bit(MPTCP_SEND_SPACE, &msk->flags);
		smp_mb__after_atomic();
		/* set NOSPACE only after clearing SEND_SPACE flag */
		set_bit(SOCK_NOSPACE, &sock->flags);
	}
}

static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
	struct mptcp_sock *msk = mptcp_sk(sk);
@@ -219,6 +236,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
	if (copied > 0)
		ret = copied;

	ssk_check_wmem(msk, ssk);
	release_sock(ssk);
	release_sock(sk);
	return ret;
@@ -315,6 +333,7 @@ static int mptcp_init_sock(struct sock *sk)
	struct mptcp_sock *msk = mptcp_sk(sk);

	INIT_LIST_HEAD(&msk->conn_list);
	__set_bit(MPTCP_SEND_SPACE, &msk->flags);

	return 0;
}
@@ -576,6 +595,13 @@ static void mptcp_sock_graft(struct sock *sk, struct socket *parent)
	write_unlock_bh(&sk->sk_callback_lock);
}

static bool mptcp_memory_free(const struct sock *sk, int wake)
{
	struct mptcp_sock *msk = mptcp_sk(sk);

	return wake ? test_bit(MPTCP_SEND_SPACE, &msk->flags) : true;
}

static struct proto mptcp_prot = {
	.name		= "MPTCP",
	.owner		= THIS_MODULE,
@@ -591,6 +617,7 @@ static struct proto mptcp_prot = {
	.hash		= inet_hash,
	.unhash		= inet_unhash,
	.get_port	= mptcp_get_port,
	.stream_memory_free	= mptcp_memory_free,
	.obj_size	= sizeof(struct mptcp_sock),
	.no_autobind	= true,
};
@@ -767,8 +794,34 @@ unlock_fail:
static __poll_t mptcp_poll(struct file *file, struct socket *sock,
			   struct poll_table_struct *wait)
{
	const struct mptcp_sock *msk;
	struct sock *sk = sock->sk;
	struct socket *ssock;
	__poll_t mask = 0;

	msk = mptcp_sk(sk);
	lock_sock(sk);
	ssock = __mptcp_nmpc_socket(msk);
	if (ssock) {
		mask = ssock->ops->poll(file, ssock, wait);
		release_sock(sk);
		return mask;
	}

	release_sock(sk);
	sock_poll_wait(file, sock, wait);
	lock_sock(sk);

	if (test_bit(MPTCP_DATA_READY, &msk->flags))
		mask = EPOLLIN | EPOLLRDNORM;
	if (sk_stream_is_writeable(sk) &&
	    test_bit(MPTCP_SEND_SPACE, &msk->flags))
		mask |= EPOLLOUT | EPOLLWRNORM;
	if (sk->sk_shutdown & RCV_SHUTDOWN)
		mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;

	release_sock(sk);

	return mask;
}

+1 −0
Original line number Diff line number Diff line
@@ -56,6 +56,7 @@

/* MPTCP socket flags */
#define MPTCP_DATA_READY	BIT(0)
#define MPTCP_SEND_SPACE	BIT(1)

/* MPTCP connection sock */
struct mptcp_sock {
+3 −0
Original line number Diff line number Diff line
@@ -529,6 +529,9 @@ static void subflow_write_space(struct sock *sk)

	sk_stream_write_space(sk);
	if (parent && sk_stream_is_writeable(sk)) {
		set_bit(MPTCP_SEND_SPACE, &mptcp_sk(parent)->flags);
		smp_mb__after_atomic();
		/* set SEND_SPACE before sk_stream_write_space clears NOSPACE */
		sk_stream_write_space(parent);
	}
}