Commit f296234c authored by Peter Krystad's avatar Peter Krystad Committed by David S. Miller
Browse files

mptcp: Add handling of incoming MP_JOIN requests



Process the MP_JOIN option in a SYN packet with the same flow
as MP_CAPABLE but when the third ACK is received add the
subflow to the MPTCP socket subflow list instead of adding it to
the TCP socket accept queue.

The subflow is added at the end of the subflow list so it will not
interfere with the existing subflows operation and no data is
expected to be transmitted on it.

Co-developed-by: default avatarFlorian Westphal <fw@strlen.de>
Signed-off-by: default avatarFlorian Westphal <fw@strlen.de>
Co-developed-by: default avatarPaolo Abeni <pabeni@redhat.com>
Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
Signed-off-by: default avatarPeter Krystad <peter.krystad@linux.intel.com>
Signed-off-by: default avatarMat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 1b1c7a0e
Loading
Loading
Loading
Loading
+7 −1
Original line number Diff line number Diff line
@@ -92,7 +92,13 @@ struct mptcp_options_received {
		add_addr : 1,
		rm_addr : 1,
		family : 4,
		echo : 1;
		echo : 1,
		backup : 1;
	u32	token;
	u32	nonce;
	u64	thmac;
	u8	hmac[20];
	u8	join_id;
	u8	use_map:1,
		dsn64:1,
		data_fin:1,
+11 −0
Original line number Diff line number Diff line
@@ -42,6 +42,10 @@ struct mptcp_out_options {
	u8 addr_id;
	u64 ahmac;
	u8 rm_id;
	u8 join_id;
	u8 backup;
	u32 nonce;
	u64 thmac;
	struct mptcp_ext ext_copy;
#endif
};
@@ -115,6 +119,8 @@ static inline bool mptcp_skb_can_collapse(const struct sk_buff *to,
				 skb_ext_find(from, SKB_EXT_MPTCP));
}

bool mptcp_sk_is_subflow(const struct sock *sk);

#else

static inline void mptcp_init(void)
@@ -181,6 +187,11 @@ static inline bool mptcp_skb_can_collapse(const struct sk_buff *to,
	return true;
}

static inline bool mptcp_sk_is_subflow(const struct sock *sk)
{
	return false;
}

#endif /* CONFIG_MPTCP */

#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+6 −0
Original line number Diff line number Diff line
@@ -774,6 +774,12 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
	if (!child)
		goto listen_overflow;

	if (own_req && sk_is_mptcp(child) && mptcp_sk_is_subflow(child)) {
		reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
		inet_csk_reqsk_queue_drop_and_put(sk, req);
		return child;
	}

	sock_rps_save_rxhash(child, skb);
	tcp_synack_rtt_meas(child, req);
	*req_stolen = !own_req;
+95 −12
Original line number Diff line number Diff line
@@ -96,6 +96,38 @@ void mptcp_parse_option(const struct sk_buff *skb, const unsigned char *ptr,
			 mp_opt->rcvr_key, mp_opt->data_len);
		break;

	case MPTCPOPT_MP_JOIN:
		mp_opt->mp_join = 1;
		if (opsize == TCPOLEN_MPTCP_MPJ_SYN) {
			mp_opt->backup = *ptr++ & MPTCPOPT_BACKUP;
			mp_opt->join_id = *ptr++;
			mp_opt->token = get_unaligned_be32(ptr);
			ptr += 4;
			mp_opt->nonce = get_unaligned_be32(ptr);
			ptr += 4;
			pr_debug("MP_JOIN bkup=%u, id=%u, token=%u, nonce=%u",
				 mp_opt->backup, mp_opt->join_id,
				 mp_opt->token, mp_opt->nonce);
		} else if (opsize == TCPOLEN_MPTCP_MPJ_SYNACK) {
			mp_opt->backup = *ptr++ & MPTCPOPT_BACKUP;
			mp_opt->join_id = *ptr++;
			mp_opt->thmac = get_unaligned_be64(ptr);
			ptr += 8;
			mp_opt->nonce = get_unaligned_be32(ptr);
			ptr += 4;
			pr_debug("MP_JOIN bkup=%u, id=%u, thmac=%llu, nonce=%u",
				 mp_opt->backup, mp_opt->join_id,
				 mp_opt->thmac, mp_opt->nonce);
		} else if (opsize == TCPOLEN_MPTCP_MPJ_ACK) {
			ptr += 2;
			memcpy(mp_opt->hmac, ptr, MPTCPOPT_HMAC_LEN);
			pr_debug("MP_JOIN hmac");
		} else {
			pr_warn("MP_JOIN bad option size");
			mp_opt->mp_join = 0;
		}
		break;

	case MPTCPOPT_DSS:
		pr_debug("DSS");
		ptr++;
@@ -572,37 +604,80 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
		pr_debug("subflow_req=%p, local_key=%llu",
			 subflow_req, subflow_req->local_key);
		return true;
	} else if (subflow_req->mp_join) {
		opts->suboptions = OPTION_MPTCP_MPJ_SYNACK;
		opts->backup = subflow_req->backup;
		opts->join_id = subflow_req->local_id;
		opts->thmac = subflow_req->thmac;
		opts->nonce = subflow_req->local_nonce;
		pr_debug("req=%p, bkup=%u, id=%u, thmac=%llu, nonce=%u",
			 subflow_req, opts->backup, opts->join_id,
			 opts->thmac, opts->nonce);
		*size = TCPOLEN_MPTCP_MPJ_SYNACK;
		return true;
	}
	return false;
}

static bool check_fully_established(struct mptcp_subflow_context *subflow,
static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk,
				    struct mptcp_subflow_context *subflow,
				    struct sk_buff *skb,
				    struct mptcp_options_received *mp_opt)
{
	/* here we can process OoO, in-window pkts, only in-sequence 4th ack
	 * are relevant
	 * will make the subflow fully established
	 */
	if (likely(subflow->fully_established ||
		   TCP_SKB_CB(skb)->seq != subflow->ssn_offset + 1))
		return true;
	if (likely(subflow->fully_established)) {
		/* on passive sockets, check for 3rd ack retransmission
		 * note that msk is always set by subflow_syn_recv_sock()
		 * for mp_join subflows
		 */
		if (TCP_SKB_CB(skb)->seq == subflow->ssn_offset + 1 &&
		    TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq &&
		    subflow->mp_join && mp_opt->mp_join &&
		    READ_ONCE(msk->pm.server_side))
			tcp_send_ack(sk);
		goto fully_established;
	}

	if (mp_opt->use_ack)
	/* we should process OoO packets before the first subflow is fully
	 * established, but not expected for MP_JOIN subflows
	 */
	if (TCP_SKB_CB(skb)->seq != subflow->ssn_offset + 1)
		return subflow->mp_capable;

	if (mp_opt->use_ack) {
		/* subflows are fully established as soon as we get any
		 * additional ack.
		 */
		subflow->fully_established = 1;
		goto fully_established;
	}

	if (subflow->can_ack)
		return true;
	WARN_ON_ONCE(subflow->can_ack);

	/* If the first established packet does not contain MP_CAPABLE + data
	 * then fallback to TCP
	 */
	if (!mp_opt->mp_capable) {
		subflow->mp_capable = 0;
		tcp_sk(mptcp_subflow_tcp_sock(subflow))->is_mptcp = 0;
		tcp_sk(sk)->is_mptcp = 0;
		return false;
	}

	subflow->fully_established = 1;
	subflow->remote_key = mp_opt->sndr_key;
	subflow->can_ack = 1;

fully_established:
	if (likely(subflow->pm_notified))
		return true;

	subflow->pm_notified = 1;
	if (subflow->mp_join)
		mptcp_pm_subflow_established(msk, subflow);
	else
		mptcp_pm_fully_established(msk);
	return true;
}

@@ -641,7 +716,7 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb,
	struct mptcp_ext *mpext;

	mp_opt = &opt_rx->mptcp;
	if (!check_fully_established(subflow, skb, mp_opt))
	if (!check_fully_established(msk, sk, subflow, skb, mp_opt))
		return;

	if (mp_opt->add_addr && add_addr_hmac_valid(msk, mp_opt)) {
@@ -700,8 +775,6 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb,
	}

	mpext->data_fin = mp_opt->data_fin;

	mptcp_pm_fully_established(msk);
}

void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts)
@@ -787,6 +860,16 @@ mp_capable_done:
				      0, opts->rm_id);
	}

	if (OPTION_MPTCP_MPJ_SYNACK & opts->suboptions) {
		*ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
				      TCPOLEN_MPTCP_MPJ_SYNACK,
				      opts->backup, opts->join_id);
		put_unaligned_be64(opts->thmac, ptr);
		ptr += 2;
		put_unaligned_be32(opts->nonce, ptr);
		ptr += 1;
	}

	if (opts->ext_copy.use_ack || opts->ext_copy.use_map) {
		struct mptcp_ext *mpext = &opts->ext_copy;
		u8 len = TCPOLEN_MPTCP_DSS_BASE;
+79 −17
Original line number Diff line number Diff line
@@ -104,19 +104,6 @@ set_state:
	return ssock;
}

static struct sock *mptcp_subflow_get(const struct mptcp_sock *msk)
{
	struct mptcp_subflow_context *subflow;

	sock_owned_by_me((const struct sock *)msk);

	mptcp_for_each_subflow(msk, subflow) {
		return mptcp_subflow_tcp_sock(subflow);
	}

	return NULL;
}

static void __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
			     struct sk_buff *skb,
			     unsigned int offset, size_t copy_len)
@@ -391,6 +378,43 @@ out:
	return ret;
}

static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
{
	struct mptcp_subflow_context *subflow;
	struct sock *backup = NULL;

	sock_owned_by_me((const struct sock *)msk);

	mptcp_for_each_subflow(msk, subflow) {
		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);

		if (!sk_stream_memory_free(ssk)) {
			struct socket *sock = ssk->sk_socket;

			if (sock) {
				clear_bit(MPTCP_SEND_SPACE, &msk->flags);
				smp_mb__after_atomic();

				/* enables sk->write_space() callbacks */
				set_bit(SOCK_NOSPACE, &sock->flags);
			}

			return NULL;
		}

		if (subflow->backup) {
			if (!backup)
				backup = ssk;

			continue;
		}

		return ssk;
	}

	return backup;
}

static void ssk_check_wmem(struct mptcp_sock *msk, struct sock *ssk)
{
	struct socket *sock;
@@ -438,10 +462,17 @@ fallback:
		return ret >= 0 ? ret + copied : (copied ? copied : ret);
	}

	ssk = mptcp_subflow_get(msk);
	if (!ssk) {
		release_sock(sk);
		return -ENOTCONN;
	ssk = mptcp_subflow_get_send(msk);
	while (!sk_stream_memory_free(sk) || !ssk) {
		ret = sk_stream_wait_memory(sk, &timeo);
		if (ret)
			goto out;

		ssk = mptcp_subflow_get_send(msk);
		if (list_empty(&msk->conn_list)) {
			ret = -ENOTCONN;
			goto out;
		}
	}

	pr_debug("conn_list->subflow=%p", ssk);
@@ -1070,6 +1101,37 @@ static void mptcp_sock_graft(struct sock *sk, struct socket *parent)
	write_unlock_bh(&sk->sk_callback_lock);
}

bool mptcp_finish_join(struct sock *sk)
{
	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
	struct mptcp_sock *msk = mptcp_sk(subflow->conn);
	struct sock *parent = (void *)msk;
	struct socket *parent_sock;

	pr_debug("msk=%p, subflow=%p", msk, subflow);

	/* mptcp socket already closing? */
	if (inet_sk_state_load(parent) != TCP_ESTABLISHED)
		return false;

	if (!msk->pm.server_side)
		return true;

	/* passive connection, attach to msk socket */
	parent_sock = READ_ONCE(parent->sk_socket);
	if (parent_sock && !sk->sk_socket)
		mptcp_sock_graft(sk, parent_sock);

	return mptcp_pm_allow_new_subflow(msk);
}

bool mptcp_sk_is_subflow(const struct sock *sk)
{
	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);

	return subflow->mp_join == 1;
}

static bool mptcp_memory_free(const struct sock *sk, int wake)
{
	struct mptcp_sock *msk = mptcp_sk(sk);
Loading