Commit a79c838f authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'mptcp-simplify-mptcp_accept'



Paolo Abeni says:

====================
mptcp: simplify mptcp_accept()

Currently we allocate the MPTCP master socket at accept time.

The above makes mptcp_accept() quite complex, and requires checks is several
places for NULL MPTCP master socket.

These series simplify the MPTCP accept implementation, moving the master socket
allocation at syn-ack time, so that we drop unneeded checks with the follow-up
patch.

v1 -> v2:
- rebased on top of 2398e399
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 7a1d0e61 dc093db5
Loading
Loading
Loading
Loading
+2 −12
Original line number Diff line number Diff line
@@ -336,7 +336,6 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
	unsigned int ack_size;
	bool ret = false;
	bool can_ack;
	u64 ack_seq;
	u8 tcp_fin;

	if (skb) {
@@ -368,16 +367,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
	can_ack = true;
	opts->ext_copy.use_ack = 0;
	msk = mptcp_sk(subflow->conn);
	if (likely(msk && READ_ONCE(msk->can_ack))) {
		ack_seq = msk->ack_seq;
	} else if (subflow->can_ack) {
		mptcp_crypto_key_sha(subflow->remote_key, NULL, &ack_seq);
		ack_seq++;
	} else {
		can_ack = false;
	}

	if (unlikely(!can_ack)) {
	if (!READ_ONCE(msk->can_ack)) {
		*size = ALIGN(dss_size, 4);
		return ret;
	}
@@ -390,7 +380,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,

	dss_size += ack_size;

	opts->ext_copy.data_ack = ack_seq;
	opts->ext_copy.data_ack = msk->ack_seq;
	opts->ext_copy.ack64 = 1;
	opts->ext_copy.use_ack = 1;

+45 −38
Original line number Diff line number Diff line
@@ -820,9 +820,12 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk)
}
#endif

static struct sock *mptcp_sk_clone_lock(const struct sock *sk)
struct sock *mptcp_sk_clone(const struct sock *sk, struct request_sock *req)
{
	struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
	struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC);
	struct mptcp_sock *msk;
	u64 ack_seq;

	if (!nsk)
		return NULL;
@@ -832,6 +835,36 @@ static struct sock *mptcp_sk_clone_lock(const struct sock *sk)
		inet_sk(nsk)->pinet6 = mptcp_inet6_sk(nsk);
#endif

	__mptcp_init_sock(nsk);

	msk = mptcp_sk(nsk);
	msk->local_key = subflow_req->local_key;
	msk->token = subflow_req->token;
	msk->subflow = NULL;

	if (unlikely(mptcp_token_new_accept(subflow_req->token, nsk))) {
		bh_unlock_sock(nsk);

		/* we can't call into mptcp_close() here - possible BH context
		 * free the sock directly
		 */
		nsk->sk_prot->destroy(nsk);
		sk_free(nsk);
		return NULL;
	}

	msk->write_seq = subflow_req->idsn + 1;
	if (subflow_req->remote_key_valid) {
		msk->can_ack = true;
		msk->remote_key = subflow_req->remote_key;
		mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq);
		ack_seq++;
		msk->ack_seq = ack_seq;
	}
	bh_unlock_sock(nsk);

	/* keep a single reference */
	__sock_put(nsk);
	return nsk;
}

@@ -859,40 +892,26 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
		struct mptcp_subflow_context *subflow;
		struct sock *new_mptcp_sock;
		struct sock *ssk = newsk;
		u64 ack_seq;

		subflow = mptcp_subflow_ctx(newsk);
		lock_sock(sk);
		new_mptcp_sock = subflow->conn;

		local_bh_disable();
		new_mptcp_sock = mptcp_sk_clone_lock(sk);
		if (!new_mptcp_sock) {
			*err = -ENOBUFS;
			local_bh_enable();
			release_sock(sk);
			mptcp_subflow_shutdown(newsk, SHUT_RDWR + 1, 0, 0);
			tcp_close(newsk, 0);
			return NULL;
		/* is_mptcp should be false if subflow->conn is missing, see
		 * subflow_syn_recv_sock()
		 */
		if (WARN_ON_ONCE(!new_mptcp_sock)) {
			tcp_sk(newsk)->is_mptcp = 0;
			return newsk;
		}

		__mptcp_init_sock(new_mptcp_sock);
		/* acquire the 2nd reference for the owning socket */
		sock_hold(new_mptcp_sock);

		local_bh_disable();
		bh_lock_sock(new_mptcp_sock);
		msk = mptcp_sk(new_mptcp_sock);
		msk->local_key = subflow->local_key;
		msk->token = subflow->token;
		msk->subflow = NULL;
		msk->first = newsk;

		mptcp_token_update_accept(newsk, new_mptcp_sock);

		msk->write_seq = subflow->idsn + 1;
		if (subflow->can_ack) {
			msk->can_ack = true;
			msk->remote_key = subflow->remote_key;
			mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq);
			ack_seq++;
			msk->ack_seq = ack_seq;
		}
		newsk = new_mptcp_sock;
		mptcp_copy_inaddrs(newsk, ssk);
		list_add(&subflow->node, &msk->conn_list);
@@ -903,18 +922,6 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
		inet_sk_state_store(new_mptcp_sock, TCP_SYN_RECV);
		bh_unlock_sock(new_mptcp_sock);
		local_bh_enable();
		release_sock(sk);

		/* the subflow can already receive packet, avoid racing with
		 * the receive path and process the pending ones
		 */
		lock_sock(ssk);
		subflow->rel_write_seq = 1;
		subflow->tcp_sock = ssk;
		subflow->conn = new_mptcp_sock;
		if (unlikely(!skb_queue_empty(&ssk->sk_receive_queue)))
			mptcp_subflow_data_available(ssk);
		release_sock(ssk);
	}

	return newsk;
+2 −2
Original line number Diff line number Diff line
@@ -193,6 +193,7 @@ void mptcp_proto_init(void);
int mptcp_proto_v6_init(void);
#endif

struct sock *mptcp_sk_clone(const struct sock *sk, struct request_sock *req);
void mptcp_get_options(const struct sk_buff *skb,
		       struct tcp_options_received *opt_rx);

@@ -202,8 +203,7 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk);
int mptcp_token_new_request(struct request_sock *req);
void mptcp_token_destroy_request(u32 token);
int mptcp_token_new_connect(struct sock *sk);
int mptcp_token_new_accept(u32 token);
void mptcp_token_update_accept(struct sock *sk, struct sock *conn);
int mptcp_token_new_accept(u32 token, struct sock *conn);
void mptcp_token_destroy(u32 token);

void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn);
+28 −22
Original line number Diff line number Diff line
@@ -112,7 +112,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)

	subflow->icsk_af_ops->sk_rx_dst_set(sk, skb);

	if (subflow->conn && !subflow->conn_finished) {
	if (!subflow->conn_finished) {
		pr_debug("subflow=%p, remote_key=%llu", mptcp_subflow_ctx(sk),
			 subflow->remote_key);
		mptcp_finish_connect(sk);
@@ -182,6 +182,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
	struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk);
	struct mptcp_subflow_request_sock *subflow_req;
	struct tcp_options_received opt_rx;
	struct sock *new_msk = NULL;
	struct sock *child;

	pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
@@ -197,7 +198,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
			 * out-of-order pkt, which will not carry the MP_CAPABLE
			 * opt even on mptcp enabled paths
			 */
			goto create_child;
			goto create_msk;
		}

		opt_rx.mptcp.mp_capable = 0;
@@ -207,7 +208,13 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
			subflow_req->remote_key_valid = 1;
		} else {
			subflow_req->mp_capable = 0;
			goto create_child;
		}

create_msk:
		new_msk = mptcp_sk_clone(listener->conn, req);
		if (!new_msk)
			subflow_req->mp_capable = 0;
	}

create_child:
@@ -221,22 +228,22 @@ create_child:
		 * handshake
		 */
		if (!ctx)
			return child;
			goto out;

		if (ctx->mp_capable) {
			if (mptcp_token_new_accept(ctx->token))
				goto close_child;
			/* new mpc subflow takes ownership of the newly
			 * created mptcp socket
			 */
			ctx->conn = new_msk;
			new_msk = NULL;
		}
	}

out:
	/* dispose of the left over mptcp master, if any */
	if (unlikely(new_msk))
		sock_put(new_msk);
	return child;

close_child:
	pr_debug("closing child socket");
	tcp_send_active_reset(child, GFP_ATOMIC);
	inet_csk_prepare_forced_close(child);
	tcp_done(child);
	return NULL;
}

static struct inet_connection_sock_af_ops subflow_specific;
@@ -432,9 +439,6 @@ static bool subflow_check_data_avail(struct sock *ssk)
	if (subflow->data_avail)
		return true;

	if (!subflow->conn)
		return false;

	msk = mptcp_sk(subflow->conn);
	for (;;) {
		u32 map_remaining;
@@ -554,10 +558,9 @@ static void subflow_data_ready(struct sock *sk)
	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
	struct sock *parent = subflow->conn;

	if (!parent || !subflow->mp_capable) {
	if (!subflow->mp_capable) {
		subflow->tcp_data_ready(sk);

		if (parent)
		parent->sk_data_ready(parent);
		return;
	}
@@ -572,7 +575,7 @@ static void subflow_write_space(struct sock *sk)
	struct sock *parent = subflow->conn;

	sk_stream_write_space(sk);
	if (parent && sk_stream_is_writeable(sk)) {
	if (sk_stream_is_writeable(sk)) {
		set_bit(MPTCP_SEND_SPACE, &mptcp_sk(parent)->flags);
		smp_mb__after_atomic();
		/* set SEND_SPACE before sk_stream_write_space clears NOSPACE */
@@ -687,7 +690,7 @@ static bool subflow_is_done(const struct sock *sk)
static void subflow_state_change(struct sock *sk)
{
	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
	struct sock *parent = READ_ONCE(subflow->conn);
	struct sock *parent = subflow->conn;

	__subflow_state_change(sk);

@@ -695,10 +698,10 @@ static void subflow_state_change(struct sock *sk)
	 * a fin packet carrying a DSS can be unnoticed if we don't trigger
	 * the data available machinery here.
	 */
	if (parent && subflow->mp_capable && mptcp_subflow_data_available(sk))
	if (subflow->mp_capable && mptcp_subflow_data_available(sk))
		mptcp_data_ready(parent, sk);

	if (parent && !(parent->sk_shutdown & RCV_SHUTDOWN) &&
	if (!(parent->sk_shutdown & RCV_SHUTDOWN) &&
	    !subflow->rx_eof && subflow_is_done(sk)) {
		subflow->rx_eof = 1;
		parent->sk_shutdown |= RCV_SHUTDOWN;
@@ -793,6 +796,9 @@ static void subflow_ulp_clone(const struct request_sock *req,
	new_ctx->tcp_data_ready = old_ctx->tcp_data_ready;
	new_ctx->tcp_state_change = old_ctx->tcp_state_change;
	new_ctx->tcp_write_space = old_ctx->tcp_write_space;
	new_ctx->rel_write_seq = 1;
	new_ctx->tcp_sock = newsk;

	new_ctx->mp_capable = 1;
	new_ctx->fourth_ack = subflow_req->remote_key_valid;
	new_ctx->can_ack = subflow_req->remote_key_valid;
+2 −29
Original line number Diff line number Diff line
@@ -128,45 +128,18 @@ int mptcp_token_new_connect(struct sock *sk)
 *
 * Called when a SYN packet creates a new logical connection, i.e.
 * is not a join request.
 *
 * We don't have an mptcp socket yet at that point.
 * This is paired with mptcp_token_update_accept, called on accept().
 */
int mptcp_token_new_accept(u32 token)
int mptcp_token_new_accept(u32 token, struct sock *conn)
{
	int err;

	spin_lock_bh(&token_tree_lock);
	err = radix_tree_insert(&token_tree, token, &token_used);
	err = radix_tree_insert(&token_tree, token, conn);
	spin_unlock_bh(&token_tree_lock);

	return err;
}

/**
 * mptcp_token_update_accept - update token to map to mptcp socket
 * @conn: the new struct mptcp_sock
 * @sk: the initial subflow for this mptcp socket
 *
 * Called when the first mptcp socket is created on accept to
 * refresh the dummy mapping (done to reserve the token) with
 * the mptcp_socket structure that wasn't allocated before.
 */
void mptcp_token_update_accept(struct sock *sk, struct sock *conn)
{
	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
	void __rcu **slot;

	spin_lock_bh(&token_tree_lock);
	slot = radix_tree_lookup_slot(&token_tree, subflow->token);
	WARN_ON_ONCE(!slot);
	if (slot) {
		WARN_ON_ONCE(rcu_access_pointer(*slot) != &token_used);
		radix_tree_replace_slot(&token_tree, slot, conn);
	}
	spin_unlock_bh(&token_tree_lock);
}

/**
 * mptcp_token_destroy_request - remove mptcp connection/token
 * @token - token of mptcp connection to remove