Commit 3b1d6210 authored by Paolo Abeni's avatar Paolo Abeni Committed by David S. Miller
Browse files

mptcp: implement and use MPTCP-level retransmission



On timeout event, schedule a work queue to do the retransmission.
Retransmission code closely resembles the sendmsg() implementation and
re-uses mptcp_sendmsg_frag, providing a dummy msghdr - for flags'
sake - and peeking the relevant dfrag from the rtx head.

Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
Signed-off-by: default avatarMat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 3f8e0aae
Loading
Loading
Loading
Loading
+94 −4
Original line number Diff line number Diff line
@@ -283,6 +283,10 @@ static void mptcp_reset_timer(struct sock *sk)
void mptcp_data_acked(struct sock *sk)
{
	mptcp_reset_timer(sk);

	if (!sk_stream_is_writeable(sk) &&
	    schedule_work(&mptcp_sk(sk)->work))
		sock_hold(sk);
}

static void mptcp_stop_timer(struct sock *sk)
@@ -900,10 +904,13 @@ static void mptcp_retransmit_handler(struct sock *sk)
{
	struct mptcp_sock *msk = mptcp_sk(sk);

	if (atomic64_read(&msk->snd_una) == msk->write_seq)
	if (atomic64_read(&msk->snd_una) == msk->write_seq) {
		mptcp_stop_timer(sk);
	else
		mptcp_reset_timer(sk);
	} else {
		set_bit(MPTCP_WORK_RTX, &msk->flags);
		if (schedule_work(&msk->work))
			sock_hold(sk);
	}
}

static void mptcp_retransmit_timer(struct timer_list *t)
@@ -925,6 +932,37 @@ static void mptcp_retransmit_timer(struct timer_list *t)
	sock_put(sk);
}

/* Find an idle subflow.  Return NULL if there is unacked data at tcp
 * level.
 *
 * A backup subflow is returned only if that is the only kind available.
 */
static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk)
{
	struct mptcp_subflow_context *subflow;
	struct sock *backup = NULL;

	sock_owned_by_me((const struct sock *)msk);

	mptcp_for_each_subflow(msk, subflow) {
		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);

		/* still data outstanding at TCP level?  Don't retransmit. */
		if (!tcp_write_queue_empty(ssk))
			return NULL;

		if (subflow->backup) {
			if (!backup)
				backup = ssk;
			continue;
		}

		return ssk;
	}

	return backup;
}

/* subflow sockets can be either outgoing (connect) or incoming
 * (accept).
 *
@@ -958,11 +996,62 @@ static unsigned int mptcp_sync_mss(struct sock *sk, u32 pmtu)
static void mptcp_worker(struct work_struct *work)
{
	struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work);
	struct sock *sk = &msk->sk.icsk_inet.sk;
	struct sock *ssk, *sk = &msk->sk.icsk_inet.sk;
	int orig_len, orig_offset, ret, mss_now = 0, size_goal = 0;
	struct mptcp_data_frag *dfrag;
	u64 orig_write_seq;
	size_t copied = 0;
	struct msghdr msg;
	long timeo = 0;

	lock_sock(sk);
	mptcp_clean_una(sk);
	__mptcp_flush_join_list(msk);
	__mptcp_move_skbs(msk);

	if (!test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
		goto unlock;

	dfrag = mptcp_rtx_head(sk);
	if (!dfrag)
		goto unlock;

	ssk = mptcp_subflow_get_retrans(msk);
	if (!ssk)
		goto reset_unlock;

	lock_sock(ssk);

	msg.msg_flags = MSG_DONTWAIT;
	orig_len = dfrag->data_len;
	orig_offset = dfrag->offset;
	orig_write_seq = dfrag->data_seq;
	while (dfrag->data_len > 0) {
		ret = mptcp_sendmsg_frag(sk, ssk, &msg, dfrag, &timeo, &mss_now,
					 &size_goal);
		if (ret < 0)
			break;

		copied += ret;
		dfrag->data_len -= ret;
		dfrag->offset += ret;
	}
	if (copied)
		tcp_push(ssk, msg.msg_flags, mss_now, tcp_sk(ssk)->nonagle,
			 size_goal);

	dfrag->data_seq = orig_write_seq;
	dfrag->offset = orig_offset;
	dfrag->data_len = orig_len;

	mptcp_set_timeout(sk, ssk);
	release_sock(ssk);

reset_unlock:
	if (!mptcp_timer_pending(sk))
		mptcp_reset_timer(sk);

unlock:
	release_sock(sk);
	sock_put(sk);
}
@@ -1124,6 +1213,7 @@ static int mptcp_disconnect(struct sock *sk, int flags)
	lock_sock(sk);
	__mptcp_clear_xmit(sk);
	release_sock(sk);
	mptcp_cancel_work(sk);
	return tcp_disconnect(sk, flags);
}

+1 −0
Original line number Diff line number Diff line
@@ -88,6 +88,7 @@
/* MPTCP socket flags */
#define MPTCP_DATA_READY	0
#define MPTCP_SEND_SPACE	1
#define MPTCP_WORK_RTX		2

static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field)
{