Commit 8c755953 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'mptcp-fix-incoming-options-parsing'



Paolo Abeni says:

====================
mptcp: fix incoming options parsing

This series addresses a serious issue in MPTCP option parsing.

This is bigger than the usual -net change, but I was unable to find a
working, sane, smaller fix.

The core change is inside patch 2/5 which moved MPTCP options parsing from
the TCP code inside existing MPTCP hooks and clean MPTCP options status on
each processed packet.

The patch 1/5 is a needed pre-requisite, and patches 3,4,5 are smaller,
related fixes.

v1 -> v2:
 - cleaned-up patch 1/5
 - rebased on top of current -net
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 30724ccb a77895db
Loading
Loading
Loading
Loading
+0 −51
Original line number Diff line number Diff line
@@ -78,47 +78,6 @@ struct tcp_sack_block {
#define TCP_SACK_SEEN     (1 << 0)   /*1 = peer is SACK capable, */
#define TCP_DSACK_SEEN    (1 << 2)   /*1 = DSACK was received from peer*/

#if IS_ENABLED(CONFIG_MPTCP)
struct mptcp_options_received {
	u64	sndr_key;
	u64	rcvr_key;
	u64	data_ack;
	u64	data_seq;
	u32	subflow_seq;
	u16	data_len;
	u16	mp_capable : 1,
		mp_join : 1,
		dss : 1,
		add_addr : 1,
		rm_addr : 1,
		family : 4,
		echo : 1,
		backup : 1;
	u32	token;
	u32	nonce;
	u64	thmac;
	u8	hmac[20];
	u8	join_id;
	u8	use_map:1,
		dsn64:1,
		data_fin:1,
		use_ack:1,
		ack64:1,
		mpc_map:1,
		__unused:2;
	u8	addr_id;
	u8	rm_id;
	union {
		struct in_addr	addr;
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
		struct in6_addr	addr6;
#endif
	};
	u64	ahmac;
	u16	port;
};
#endif

struct tcp_options_received {
/*	PAWS/RTTM data	*/
	int	ts_recent_stamp;/* Time we stored ts_recent (for aging) */
@@ -136,9 +95,6 @@ struct tcp_options_received {
	u8	num_sacks;	/* Number of SACK blocks		*/
	u16	user_mss;	/* mss requested by user in ioctl	*/
	u16	mss_clamp;	/* Maximal mss, negotiated at connection setup */
#if IS_ENABLED(CONFIG_MPTCP)
	struct mptcp_options_received	mptcp;
#endif
};

static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
@@ -148,13 +104,6 @@ static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
#if IS_ENABLED(CONFIG_SMC)
	rx_opt->smc_ok = 0;
#endif
#if IS_ENABLED(CONFIG_MPTCP)
	rx_opt->mptcp.mp_capable = 0;
	rx_opt->mptcp.mp_join = 0;
	rx_opt->mptcp.add_addr = 0;
	rx_opt->mptcp.rm_addr = 0;
	rx_opt->mptcp.dss = 0;
#endif
}

/* This is the max number of SACKS that we'll generate and process. It's safe
+0 −3
Original line number Diff line number Diff line
@@ -68,11 +68,8 @@ static inline bool rsk_is_mptcp(const struct request_sock *req)
	return tcp_rsk(req)->is_mptcp;
}

void mptcp_parse_option(const struct sk_buff *skb, const unsigned char *ptr,
			int opsize, struct tcp_options_received *opt_rx);
bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
		       unsigned int *size, struct mptcp_out_options *opts);
void mptcp_rcv_synsent(struct sock *sk);
bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
			  struct mptcp_out_options *opts);
bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
+0 −7
Original line number Diff line number Diff line
@@ -3926,10 +3926,6 @@ void tcp_parse_options(const struct net *net,
				 */
				break;
#endif
			case TCPOPT_MPTCP:
				mptcp_parse_option(skb, ptr, opsize, opt_rx);
				break;

			case TCPOPT_FASTOPEN:
				tcp_parse_fastopen_option(
					opsize - TCPOLEN_FASTOPEN_BASE,
@@ -5990,9 +5986,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
		tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
		tcp_initialize_rcv_mss(sk);

		if (sk_is_mptcp(sk))
			mptcp_rcv_synsent(sk);

		/* Remember, tcp_poll() does not lock socket!
		 * Change state from SYN-SENT only after copied_seq
		 * is initialized. */
+41 −54
Original line number Diff line number Diff line
@@ -16,10 +16,10 @@ static bool mptcp_cap_flag_sha256(u8 flags)
	return (flags & MPTCP_CAP_FLAG_MASK) == MPTCP_CAP_HMAC_SHA256;
}

void mptcp_parse_option(const struct sk_buff *skb, const unsigned char *ptr,
			int opsize, struct tcp_options_received *opt_rx)
static void mptcp_parse_option(const struct sk_buff *skb,
			       const unsigned char *ptr, int opsize,
			       struct mptcp_options_received *mp_opt)
{
	struct mptcp_options_received *mp_opt = &opt_rx->mptcp;
	u8 subtype = *ptr >> 4;
	int expected_opsize;
	u8 version;
@@ -283,12 +283,20 @@ void mptcp_parse_option(const struct sk_buff *skb, const unsigned char *ptr,
}

void mptcp_get_options(const struct sk_buff *skb,
		       struct tcp_options_received *opt_rx)
		       struct mptcp_options_received *mp_opt)
{
	const unsigned char *ptr;
	const struct tcphdr *th = tcp_hdr(skb);
	int length = (th->doff * 4) - sizeof(struct tcphdr);
	const unsigned char *ptr;
	int length;

	/* initialize option status */
	mp_opt->mp_capable = 0;
	mp_opt->mp_join = 0;
	mp_opt->add_addr = 0;
	mp_opt->rm_addr = 0;
	mp_opt->dss = 0;

	length = (th->doff * 4) - sizeof(struct tcphdr);
	ptr = (const unsigned char *)(th + 1);

	while (length > 0) {
@@ -308,7 +316,7 @@ void mptcp_get_options(const struct sk_buff *skb,
			if (opsize > length)
				return;	/* don't parse partial options */
			if (opcode == TCPOPT_MPTCP)
				mptcp_parse_option(skb, ptr, opsize, opt_rx);
				mptcp_parse_option(skb, ptr, opsize, mp_opt);
			ptr += opsize - 2;
			length -= opsize;
		}
@@ -344,28 +352,6 @@ bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
	return false;
}

void mptcp_rcv_synsent(struct sock *sk)
{
	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
	struct tcp_sock *tp = tcp_sk(sk);

	if (subflow->request_mptcp && tp->rx_opt.mptcp.mp_capable) {
		subflow->mp_capable = 1;
		subflow->can_ack = 1;
		subflow->remote_key = tp->rx_opt.mptcp.sndr_key;
		pr_debug("subflow=%p, remote_key=%llu", subflow,
			 subflow->remote_key);
	} else if (subflow->request_join && tp->rx_opt.mptcp.mp_join) {
		subflow->mp_join = 1;
		subflow->thmac = tp->rx_opt.mptcp.thmac;
		subflow->remote_nonce = tp->rx_opt.mptcp.nonce;
		pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow,
			 subflow->thmac, subflow->remote_nonce);
	} else if (subflow->request_mptcp) {
		tcp_sk(sk)->is_mptcp = 0;
	}
}

/* MP_JOIN client subflow must wait for 4th ack before sending any data:
 * TCP can't schedule delack timer before the subflow is fully established.
 * MPTCP uses the delack timer to do 3rd ack retransmissions
@@ -709,7 +695,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk,
	if (TCP_SKB_CB(skb)->seq != subflow->ssn_offset + 1)
		return subflow->mp_capable;

	if (mp_opt->use_ack) {
	if (mp_opt->dss && mp_opt->use_ack) {
		/* subflows are fully established as soon as we get any
		 * additional ack.
		 */
@@ -717,8 +703,6 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk,
		goto fully_established;
	}

	WARN_ON_ONCE(subflow->can_ack);

	/* If the first established packet does not contain MP_CAPABLE + data
	 * then fallback to TCP
	 */
@@ -728,6 +712,8 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk,
		return false;
	}

	if (unlikely(!READ_ONCE(msk->pm.server_side)))
		pr_warn_once("bogus mpc option on established client sk");
	subflow->fully_established = 1;
	subflow->remote_key = mp_opt->sndr_key;
	subflow->can_ack = 1;
@@ -819,41 +805,41 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb,
{
	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
	struct mptcp_sock *msk = mptcp_sk(subflow->conn);
	struct mptcp_options_received *mp_opt;
	struct mptcp_options_received mp_opt;
	struct mptcp_ext *mpext;

	mp_opt = &opt_rx->mptcp;
	if (!check_fully_established(msk, sk, subflow, skb, mp_opt))
	mptcp_get_options(skb, &mp_opt);
	if (!check_fully_established(msk, sk, subflow, skb, &mp_opt))
		return;

	if (mp_opt->add_addr && add_addr_hmac_valid(msk, mp_opt)) {
	if (mp_opt.add_addr && add_addr_hmac_valid(msk, &mp_opt)) {
		struct mptcp_addr_info addr;

		addr.port = htons(mp_opt->port);
		addr.id = mp_opt->addr_id;
		if (mp_opt->family == MPTCP_ADDR_IPVERSION_4) {
		addr.port = htons(mp_opt.port);
		addr.id = mp_opt.addr_id;
		if (mp_opt.family == MPTCP_ADDR_IPVERSION_4) {
			addr.family = AF_INET;
			addr.addr = mp_opt->addr;
			addr.addr = mp_opt.addr;
		}
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
		else if (mp_opt->family == MPTCP_ADDR_IPVERSION_6) {
		else if (mp_opt.family == MPTCP_ADDR_IPVERSION_6) {
			addr.family = AF_INET6;
			addr.addr6 = mp_opt->addr6;
			addr.addr6 = mp_opt.addr6;
		}
#endif
		if (!mp_opt->echo)
		if (!mp_opt.echo)
			mptcp_pm_add_addr_received(msk, &addr);
		mp_opt->add_addr = 0;
		mp_opt.add_addr = 0;
	}

	if (!mp_opt->dss)
	if (!mp_opt.dss)
		return;

	/* we can't wait for recvmsg() to update the ack_seq, otherwise
	 * monodirectional flows will stuck
	 */
	if (mp_opt->use_ack)
		update_una(msk, mp_opt);
	if (mp_opt.use_ack)
		update_una(msk, &mp_opt);

	mpext = skb_ext_add(skb, SKB_EXT_MPTCP);
	if (!mpext)
@@ -861,8 +847,8 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb,

	memset(mpext, 0, sizeof(*mpext));

	if (mp_opt->use_map) {
		if (mp_opt->mpc_map) {
	if (mp_opt.use_map) {
		if (mp_opt.mpc_map) {
			/* this is an MP_CAPABLE carrying MPTCP data
			 * we know this map the first chunk of data
			 */
@@ -872,13 +858,14 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb,
			mpext->subflow_seq = 1;
			mpext->dsn64 = 1;
			mpext->mpc_map = 1;
			mpext->data_fin = 0;
		} else {
			mpext->data_seq = mp_opt->data_seq;
			mpext->subflow_seq = mp_opt->subflow_seq;
			mpext->dsn64 = mp_opt->dsn64;
			mpext->data_fin = mp_opt->data_fin;
			mpext->data_seq = mp_opt.data_seq;
			mpext->subflow_seq = mp_opt.subflow_seq;
			mpext->dsn64 = mp_opt.dsn64;
			mpext->data_fin = mp_opt.data_fin;
		}
		mpext->data_len = mp_opt->data_len;
		mpext->data_len = mp_opt.data_len;
		mpext->use_map = 1;
	}
}
+3 −3
Original line number Diff line number Diff line
@@ -1334,7 +1334,7 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk)
#endif

struct sock *mptcp_sk_clone(const struct sock *sk,
			    const struct tcp_options_received *opt_rx,
			    const struct mptcp_options_received *mp_opt,
			    struct request_sock *req)
{
	struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
@@ -1373,9 +1373,9 @@ struct sock *mptcp_sk_clone(const struct sock *sk,

	msk->write_seq = subflow_req->idsn + 1;
	atomic64_set(&msk->snd_una, msk->write_seq);
	if (opt_rx->mptcp.mp_capable) {
	if (mp_opt->mp_capable) {
		msk->can_ack = true;
		msk->remote_key = opt_rx->mptcp.sndr_key;
		msk->remote_key = mp_opt->sndr_key;
		mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq);
		ack_seq++;
		msk->ack_seq = ack_seq;
Loading