Commit 9e8ac63f authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'mptcp-more-miscellaneous-mptcp-fixes'

Mat Martineau says:

====================
mptcp: More miscellaneous MPTCP fixes

Here's another batch of fixup and enhancement patches that we have
collected in the MPTCP tree.

Patch 1 removes an unnecessary flag and related code.

Patch 2 fixes a bug encountered when closing fallback sockets.

Patches 3 and 4 choose a better transmit subflow, with a self test.

Patch 5 adjusts tracking of unaccepted subflows

Patches 6-8 improve handling of long ADD_ADDR options, with a test.

Patch 9 more reliably tracks the MPTCP-level window shared with peers.

Patch 10 sends MPTCP-level acknowledgements more aggressively, so the
peer can send more data without extra delay.
====================

Link: https://lore.kernel.org/r/20201119194603.103158-1-mathew.j.martineau@linux.intel.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 3cd336c5 ea4ca586
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -88,7 +88,8 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
			       struct mptcp_out_options *opts);
void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb);

void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts);
void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
			 struct mptcp_out_options *opts);

/* move the skb extension owership, with the assumption that 'to' is
 * newly allocated
+7 −4
Original line number Diff line number Diff line
@@ -445,11 +445,12 @@ struct tcp_out_options {
	struct mptcp_out_options mptcp;
};

static void mptcp_options_write(__be32 *ptr, struct tcp_out_options *opts)
static void mptcp_options_write(__be32 *ptr, const struct tcp_sock *tp,
				struct tcp_out_options *opts)
{
#if IS_ENABLED(CONFIG_MPTCP)
	if (unlikely(OPTION_MPTCP & opts->options))
		mptcp_write_options(ptr, &opts->mptcp);
		mptcp_write_options(ptr, tp, &opts->mptcp);
#endif
}

@@ -701,7 +702,7 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,

	smc_options_write(ptr, &options);

	mptcp_options_write(ptr, opts);
	mptcp_options_write(ptr, tp, opts);
}

static void smc_set_option(const struct tcp_sock *tp,
@@ -1346,7 +1347,6 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
		}
	}

	tcp_options_write((__be32 *)(th + 1), tp, &opts);
	skb_shinfo(skb)->gso_type = sk->sk_gso_type;
	if (likely(!(tcb->tcp_flags & TCPHDR_SYN))) {
		th->window      = htons(tcp_select_window(sk));
@@ -1357,6 +1357,9 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
		 */
		th->window	= htons(min(tp->rcv_wnd, 65535U));
	}

	tcp_options_write((__be32 *)(th + 1), tp, &opts);

#ifdef CONFIG_TCP_MD5SIG
	/* Calculate the MD5 hash, as we have all we need now */
	if (md5) {
+44 −4
Original line number Diff line number Diff line
@@ -242,7 +242,9 @@ static void mptcp_parse_option(const struct sk_buff *skb,

		mp_opt->add_addr = 1;
		mp_opt->addr_id = *ptr++;
		pr_debug("ADD_ADDR: id=%d, echo=%d", mp_opt->addr_id, mp_opt->echo);
		pr_debug("ADD_ADDR%s: id=%d, echo=%d",
			 (mp_opt->family == MPTCP_ADDR_IPVERSION_6) ? "6" : "",
			 mp_opt->addr_id, mp_opt->echo);
		if (mp_opt->family == MPTCP_ADDR_IPVERSION_4) {
			memcpy((u8 *)&mp_opt->addr.s_addr, (u8 *)ptr, 4);
			ptr += 4;
@@ -528,6 +530,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
		opts->ext_copy.ack64 = 0;
	}
	opts->ext_copy.use_ack = 1;
	WRITE_ONCE(msk->old_wspace, __mptcp_space((struct sock *)msk));

	/* Add kind/length/subtype/flag overhead if mapping is not populated */
	if (dss_size == 0)
@@ -573,17 +576,27 @@ static u64 add_addr6_generate_hmac(u64 key1, u64 key2, u8 addr_id,
}
#endif

static bool mptcp_established_options_add_addr(struct sock *sk,
static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *skb,
					       unsigned int *size,
					       unsigned int remaining,
					       struct mptcp_out_options *opts)
{
	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
	struct mptcp_sock *msk = mptcp_sk(subflow->conn);
	bool drop_other_suboptions = false;
	unsigned int opt_size = *size;
	struct mptcp_addr_info saddr;
	bool echo;
	int len;

	if (mptcp_pm_should_add_signal_ipv6(msk) &&
	    skb && skb_is_tcp_pure_ack(skb)) {
		pr_debug("drop other suboptions");
		opts->suboptions = 0;
		remaining += opt_size;
		drop_other_suboptions = true;
	}

	if (!mptcp_pm_should_add_signal(msk) ||
	    !(mptcp_pm_add_addr_signal(msk, remaining, &saddr, &echo)))
		return false;
@@ -593,6 +606,8 @@ static bool mptcp_established_options_add_addr(struct sock *sk,
		return false;

	*size = len;
	if (drop_other_suboptions)
		*size -= opt_size;
	opts->addr_id = saddr.id;
	if (saddr.family == AF_INET) {
		opts->suboptions |= OPTION_MPTCP_ADD_ADDR;
@@ -678,7 +693,7 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,

	*size += opt_size;
	remaining -= opt_size;
	if (mptcp_established_options_add_addr(sk, &opt_size, remaining, opts)) {
	if (mptcp_established_options_add_addr(sk, skb, &opt_size, remaining, opts)) {
		*size += opt_size;
		remaining -= opt_size;
		ret = true;
@@ -759,6 +774,11 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
		goto fully_established;
	}

	if (mp_opt->add_addr) {
		WRITE_ONCE(msk->fully_established, true);
		return true;
	}

	/* If the first established packet does not contain MP_CAPABLE + data
	 * then fallback to TCP. Fallback scenarios requires a reset for
	 * MP_JOIN subflows.
@@ -991,7 +1011,24 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
	}
}

void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts)
static void mptcp_set_rwin(const struct tcp_sock *tp)
{
	const struct sock *ssk = (const struct sock *)tp;
	const struct mptcp_subflow_context *subflow;
	struct mptcp_sock *msk;
	u64 ack_seq;

	subflow = mptcp_subflow_ctx(ssk);
	msk = mptcp_sk(subflow->conn);

	ack_seq = READ_ONCE(msk->ack_seq) + tp->rcv_wnd;

	if (after64(ack_seq, READ_ONCE(msk->rcv_wnd_sent)))
		WRITE_ONCE(msk->rcv_wnd_sent, ack_seq);
}

void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
			 struct mptcp_out_options *opts)
{
	if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK |
	     OPTION_MPTCP_MPC_ACK) & opts->suboptions) {
@@ -1148,4 +1185,7 @@ mp_capable_done:
					   TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
		}
	}

	if (tp)
		mptcp_set_rwin(tp);
}
+23 −8
Original line number Diff line number Diff line
@@ -16,11 +16,17 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk,
			   const struct mptcp_addr_info *addr,
			   bool echo)
{
	u8 add_addr = READ_ONCE(msk->pm.add_addr_signal);

	pr_debug("msk=%p, local_id=%d", msk, addr->id);

	msk->pm.local = *addr;
	WRITE_ONCE(msk->pm.add_addr_echo, echo);
	WRITE_ONCE(msk->pm.add_addr_signal, true);
	add_addr |= BIT(MPTCP_ADD_ADDR_SIGNAL);
	if (echo)
		add_addr |= BIT(MPTCP_ADD_ADDR_ECHO);
	if (addr->family == AF_INET6)
		add_addr |= BIT(MPTCP_ADD_ADDR_IPV6);
	WRITE_ONCE(msk->pm.add_addr_signal, add_addr);
	return 0;
}

@@ -149,14 +155,24 @@ void mptcp_pm_add_addr_received(struct mptcp_sock *msk,

	spin_lock_bh(&pm->lock);

	if (!READ_ONCE(pm->accept_addr))
	if (!READ_ONCE(pm->accept_addr)) {
		mptcp_pm_announce_addr(msk, addr, true);
	else if (mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_RECEIVED))
		mptcp_pm_add_addr_send_ack(msk);
	} else if (mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_RECEIVED)) {
		pm->remote = *addr;
	}

	spin_unlock_bh(&pm->lock);
}

void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk)
{
	if (!mptcp_pm_should_add_signal_ipv6(msk))
		return;

	mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_SEND_ACK);
}

void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, u8 rm_id)
{
	struct mptcp_pm_data *pm = &msk->pm;
@@ -182,13 +198,13 @@ bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
	if (!mptcp_pm_should_add_signal(msk))
		goto out_unlock;

	*echo = READ_ONCE(msk->pm.add_addr_echo);
	*echo = mptcp_pm_should_add_signal_echo(msk);

	if (remaining < mptcp_add_addr_len(msk->pm.local.family, *echo))
		goto out_unlock;

	*saddr = msk->pm.local;
	WRITE_ONCE(msk->pm.add_addr_signal, false);
	WRITE_ONCE(msk->pm.add_addr_signal, 0);
	ret = true;

out_unlock:
@@ -232,11 +248,10 @@ void mptcp_pm_data_init(struct mptcp_sock *msk)
	msk->pm.subflows = 0;
	msk->pm.rm_id = 0;
	WRITE_ONCE(msk->pm.work_pending, false);
	WRITE_ONCE(msk->pm.add_addr_signal, false);
	WRITE_ONCE(msk->pm.add_addr_signal, 0);
	WRITE_ONCE(msk->pm.rm_addr_signal, false);
	WRITE_ONCE(msk->pm.accept_addr, false);
	WRITE_ONCE(msk->pm.accept_subflow, false);
	WRITE_ONCE(msk->pm.add_addr_echo, false);
	msk->pm.status = 0;

	spin_lock_init(&msk->pm.lock);
+29 −0
Original line number Diff line number Diff line
@@ -228,6 +228,7 @@ static void mptcp_pm_add_timer(struct timer_list *timer)
	if (!mptcp_pm_should_add_signal(msk)) {
		pr_debug("retransmit ADD_ADDR id=%d", entry->addr.id);
		mptcp_pm_announce_addr(msk, &entry->addr, false);
		mptcp_pm_add_addr_send_ack(msk);
		entry->retrans_times++;
	}

@@ -328,6 +329,7 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
			if (mptcp_pm_alloc_anno_list(msk, local)) {
				msk->pm.add_addr_signaled++;
				mptcp_pm_announce_addr(msk, &local->addr, false);
				mptcp_pm_nl_add_addr_send_ack(msk);
			}
		} else {
			/* pick failed, avoid fourther attempts later */
@@ -398,6 +400,33 @@ void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
	spin_lock_bh(&msk->pm.lock);

	mptcp_pm_announce_addr(msk, &remote, true);
	mptcp_pm_nl_add_addr_send_ack(msk);
}

void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk)
{
	struct mptcp_subflow_context *subflow;

	if (!mptcp_pm_should_add_signal_ipv6(msk))
		return;

	__mptcp_flush_join_list(msk);
	subflow = list_first_entry_or_null(&msk->conn_list, typeof(*subflow), node);
	if (subflow) {
		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
		u8 add_addr;

		spin_unlock_bh(&msk->pm.lock);
		pr_debug("send ack for add_addr6");
		lock_sock(ssk);
		tcp_send_ack(ssk);
		release_sock(ssk);
		spin_lock_bh(&msk->pm.lock);

		add_addr = READ_ONCE(msk->pm.add_addr_signal);
		add_addr &= ~BIT(MPTCP_ADD_ADDR_IPV6);
		WRITE_ONCE(msk->pm.add_addr_signal, add_addr);
	}
}

void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk)
Loading