Commit b51f9b80 authored by Paolo Abeni's avatar Paolo Abeni Committed by David S. Miller
Browse files

mptcp: introduce MPTCP retransmission timer



The timer will be used to schedule retransmission. It's
frequency is based on the current subflow RTO estimation and
is reset on every una_seq update

The timer is clearer for good by __mptcp_clear_xmit()

Also clean MPTCP rtx queue before each transmission.

Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
Signed-off-by: default avatarMat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 18b683bf
Loading
Loading
Loading
Loading
+3 −1
Original line number Diff line number Diff line
@@ -779,10 +779,12 @@ static void update_una(struct mptcp_sock *msk,
		snd_una = old_snd_una;
		old_snd_una = atomic64_cmpxchg(&msk->snd_una, snd_una,
					       new_snd_una);
		if (old_snd_una == snd_una)
		if (old_snd_una == snd_una) {
			mptcp_data_acked((struct sock *)msk);
			break;
		}
	}
}

static bool add_addr_hmac_valid(struct mptcp_sock *msk,
				struct mptcp_options_received *mp_opt)
+88 −1
Original line number Diff line number Diff line
@@ -251,6 +251,46 @@ static void __mptcp_flush_join_list(struct mptcp_sock *msk)
	spin_unlock_bh(&msk->join_list_lock);
}

static void mptcp_set_timeout(const struct sock *sk, const struct sock *ssk)
{
	long tout = ssk && inet_csk(ssk)->icsk_pending ?
				      inet_csk(ssk)->icsk_timeout - jiffies : 0;

	if (tout <= 0)
		tout = mptcp_sk(sk)->timer_ival;
	mptcp_sk(sk)->timer_ival = tout > 0 ? tout : TCP_RTO_MIN;
}

static bool mptcp_timer_pending(struct sock *sk)
{
	return timer_pending(&inet_csk(sk)->icsk_retransmit_timer);
}

static void mptcp_reset_timer(struct sock *sk)
{
	struct inet_connection_sock *icsk = inet_csk(sk);
	unsigned long tout;

	/* should never be called with mptcp level timer cleared */
	tout = READ_ONCE(mptcp_sk(sk)->timer_ival);
	if (WARN_ON_ONCE(!tout))
		tout = TCP_RTO_MIN;
	sk_reset_timer(sk, &icsk->icsk_retransmit_timer, jiffies + tout);
}

void mptcp_data_acked(struct sock *sk)
{
	mptcp_reset_timer(sk);
}

static void mptcp_stop_timer(struct sock *sk)
{
	struct inet_connection_sock *icsk = inet_csk(sk);

	sk_stop_timer(sk, &icsk->icsk_retransmit_timer);
	mptcp_sk(sk)->timer_ival = 0;
}

static bool mptcp_ext_cache_refill(struct mptcp_sock *msk)
{
	if (!msk->cached_ext)
@@ -596,10 +636,15 @@ fallback:
		copied += ret;
	}

	mptcp_set_timeout(sk, ssk);
	if (copied) {
		ret = copied;
		tcp_push(ssk, msg->msg_flags, mss_now, tcp_sk(ssk)->nonagle,
			 size_goal);

		/* start the timer, if it's not pending */
		if (!mptcp_timer_pending(sk))
			mptcp_reset_timer(sk);
	}

	ssk_check_wmem(msk, ssk);
@@ -787,6 +832,35 @@ out_err:
	return copied;
}

static void mptcp_retransmit_handler(struct sock *sk)
{
	struct mptcp_sock *msk = mptcp_sk(sk);

	if (atomic64_read(&msk->snd_una) == msk->write_seq)
		mptcp_stop_timer(sk);
	else
		mptcp_reset_timer(sk);
}

static void mptcp_retransmit_timer(struct timer_list *t)
{
	struct inet_connection_sock *icsk = from_timer(icsk, t,
						       icsk_retransmit_timer);
	struct sock *sk = &icsk->icsk_inet.sk;

	bh_lock_sock(sk);
	if (!sock_owned_by_user(sk)) {
		mptcp_retransmit_handler(sk);
	} else {
		/* delegate our work to tcp_release_cb() */
		if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED,
				      &sk->sk_tsq_flags))
			sock_hold(sk);
	}
	bh_unlock_sock(sk);
	sock_put(sk);
}

/* subflow sockets can be either outgoing (connect) or incoming
 * (accept).
 *
@@ -846,6 +920,9 @@ static int __mptcp_init_sock(struct sock *sk)

	mptcp_pm_data_init(msk);

	/* re-use the csk retrans timer for MPTCP-level retrans */
	timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0);

	return 0;
}

@@ -867,6 +944,8 @@ static void __mptcp_clear_xmit(struct sock *sk)
	struct mptcp_sock *msk = mptcp_sk(sk);
	struct mptcp_data_frag *dtmp, *dfrag;

	sk_stop_timer(sk, &msk->sk.icsk_retransmit_timer);

	list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list)
		dfrag_clear(dfrag);
}
@@ -1155,7 +1234,8 @@ static int mptcp_getsockopt(struct sock *sk, int level, int optname,
	return -EOPNOTSUPP;
}

#define MPTCP_DEFERRED_ALL TCPF_DELACK_TIMER_DEFERRED
#define MPTCP_DEFERRED_ALL (TCPF_DELACK_TIMER_DEFERRED | \
			    TCPF_WRITE_TIMER_DEFERRED)

/* this is very alike tcp_release_cb() but we must handle differently a
 * different set of events
@@ -1171,6 +1251,8 @@ static void mptcp_release_cb(struct sock *sk)
		nflags = flags & ~MPTCP_DEFERRED_ALL;
	} while (cmpxchg(&sk->sk_tsq_flags, flags, nflags) != flags);

	sock_release_ownership(sk);

	if (flags & TCPF_DELACK_TIMER_DEFERRED) {
		struct mptcp_sock *msk = mptcp_sk(sk);
		struct sock *ssk;
@@ -1179,6 +1261,11 @@ static void mptcp_release_cb(struct sock *sk)
		if (!ssk || !schedule_work(&msk->work))
			__sock_put(sk);
	}

	if (flags & TCPF_WRITE_TIMER_DEFERRED) {
		mptcp_retransmit_handler(sk);
		__sock_put(sk);
	}
}

static int mptcp_get_port(struct sock *sk, unsigned short snum)
+2 −0
Original line number Diff line number Diff line
@@ -157,6 +157,7 @@ struct mptcp_sock {
	u64		write_seq;
	u64		ack_seq;
	atomic64_t	snd_una;
	unsigned long	timer_ival;
	u32		token;
	unsigned long	flags;
	bool		can_ack;
@@ -326,6 +327,7 @@ void mptcp_get_options(const struct sk_buff *skb,
void mptcp_finish_connect(struct sock *sk);
void mptcp_data_ready(struct sock *sk, struct sock *ssk);
bool mptcp_finish_join(struct sock *sk);
void mptcp_data_acked(struct sock *sk);

int mptcp_token_new_request(struct request_sock *req);
void mptcp_token_destroy_request(u32 token);