Commit 9f120e76 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'mptcp-prereq'

Mat Martineau says:

====================
Multipath TCP: Prerequisites

v6 -> v7: Rename/move ULP clone helper to make inline-friendly (patch 5)

v5 -> v6: Fix BPF accessors for sk_type and sk_protocol (patch 2), fix
the width of an __unused bitfield (patch 6), and add some commit message
and comment text (patches 5 & 7).

v4 -> v5: Cover letter subject fix. No changes to commits.

v3 -> v4: Update coalesce/collapse of incoming MPTCP skbs (patch 7)

v2 -> v3: Ensure sk_type alignment in struct sock (patch 2)

v1 -> v2: sk_pacing_shift left as a regular struct member (patch 2), and
modified SACK space check based on recent -net fix (patch 9).

The MPTCP upstreaming community has been collaborating on an
upstreamable MPTCP implementation that complies with RFC 8684. A minimal
set of features to comply with the specification involves a sizeable set
of code changes, so David requested that we split this work in to
multiple, smaller patch sets to build up MPTCP infrastructure.

The minimal MPTCP feature set we are proposing for review in the v5.6
timeframe begins with these three parts:

Part 1 (this patch set): MPTCP prerequisites. Introduce some MPTCP
definitions, additional ULP and skb extension features, TCP option space
checking, and a few exported symbols.

Part 2: Single subflow implementation and self tests.

Part 3: Switch from MPTCP v0 (RFC 6824) to MPTCP v1 (new RFC 8684,
publication expected in the next few days).

Additional patches for multiple subflow support, path management, active
backup, and other features are in the pipeline for submission after
making progress with the above reviews.

Clone/fetch:
https://github.com/multipath-tcp/mptcp_net-next.git (tag: netdev-v7-part1)

Browse:
https://github.com/multipath-tcp/mptcp_net-next/tree/netdev-v7-part1

Thank you for your review. You can find us at mptcp@lists.01.org and
https://is.gd/mptcp_upstream


====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 6b3acfc3 8b69a803
Loading
Loading
Loading
Loading
+10 −0
Original line number Diff line number Diff line
@@ -11573,6 +11573,16 @@ F: net/ipv6/calipso.c
F:	net/netfilter/xt_CONNSECMARK.c
F:	net/netfilter/xt_SECMARK.c
NETWORKING [MPTCP]
M:	Mat Martineau <mathew.j.martineau@linux.intel.com>
M:	Matthieu Baerts <matthieu.baerts@tessares.net>
L:	netdev@vger.kernel.org
L:	mptcp@lists.01.org
W:	https://github.com/multipath-tcp/mptcp_net-next/wiki
B:	https://github.com/multipath-tcp/mptcp_net-next/issues
S:	Maintained
F:	include/net/mptcp.h
NETWORKING [TCP]
M:	Eric Dumazet <edumazet@google.com>
L:	netdev@vger.kernel.org
+6 −0
Original line number Diff line number Diff line
@@ -4096,6 +4096,9 @@ enum skb_ext_id {
#endif
#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
	TC_SKB_EXT,
#endif
#if IS_ENABLED(CONFIG_MPTCP)
	SKB_EXT_MPTCP,
#endif
	SKB_EXT_NUM, /* must be last */
};
@@ -4117,6 +4120,9 @@ struct skb_ext {
	char data[0] __aligned(8);
};

struct skb_ext *__skb_ext_alloc(void);
void *__skb_ext_set(struct sk_buff *skb, enum skb_ext_id id,
		    struct skb_ext *ext);
void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id);
void __skb_ext_del(struct sk_buff *skb, enum skb_ext_id id);
void __skb_ext_put(struct skb_ext *ext);

include/net/mptcp.h

0 → 100644
+85 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0 */
/*
 * Multipath TCP
 *
 * Copyright (c) 2017 - 2019, Intel Corporation.
 */

#ifndef __NET_MPTCP_H
#define __NET_MPTCP_H

#include <linux/skbuff.h>
#include <linux/types.h>

/* MPTCP sk_buff extension data */
struct mptcp_ext {
	u64		data_ack;
	u64		data_seq;
	u32		subflow_seq;
	u16		data_len;
	u8		use_map:1,
			dsn64:1,
			data_fin:1,
			use_ack:1,
			ack64:1,
			__unused:3;
	/* one byte hole */
};

#ifdef CONFIG_MPTCP

/* move the skb extension owership, with the assumption that 'to' is
 * newly allocated
 */
static inline void mptcp_skb_ext_move(struct sk_buff *to,
				      struct sk_buff *from)
{
	if (!skb_ext_exist(from, SKB_EXT_MPTCP))
		return;

	if (WARN_ON_ONCE(to->active_extensions))
		skb_ext_put(to);

	to->active_extensions = from->active_extensions;
	to->extensions = from->extensions;
	from->active_extensions = 0;
}

static inline bool mptcp_ext_matches(const struct mptcp_ext *to_ext,
				     const struct mptcp_ext *from_ext)
{
	/* MPTCP always clears the ext when adding it to the skb, so
	 * holes do not bother us here
	 */
	return !from_ext ||
	       (to_ext && from_ext &&
	        !memcmp(from_ext, to_ext, sizeof(struct mptcp_ext)));
}

/* check if skbs can be collapsed.
 * MPTCP collapse is allowed if neither @to or @from carry an mptcp data
 * mapping, or if the extension of @to is the same as @from.
 * Collapsing is not possible if @to lacks an extension, but @from carries one.
 */
static inline bool mptcp_skb_can_collapse(const struct sk_buff *to,
					  const struct sk_buff *from)
{
	return mptcp_ext_matches(skb_ext_find(to, SKB_EXT_MPTCP),
				 skb_ext_find(from, SKB_EXT_MPTCP));
}

#else

static inline void mptcp_skb_ext_move(struct sk_buff *to,
				      const struct sk_buff *from)
{
}

static inline bool mptcp_skb_can_collapse(const struct sk_buff *to,
					  const struct sk_buff *from)
{
	return true;
}

#endif /* CONFIG_MPTCP */
#endif /* __NET_MPTCP_H */
+6 −21
Original line number Diff line number Diff line
@@ -436,31 +436,15 @@ struct sock {
	 * Because of non atomicity rules, all
	 * changes are protected by socket lock.
	 */
	unsigned int		__sk_flags_offset[0];
#ifdef __BIG_ENDIAN_BITFIELD
#define SK_FL_PROTO_SHIFT  16
#define SK_FL_PROTO_MASK   0x00ff0000

#define SK_FL_TYPE_SHIFT   0
#define SK_FL_TYPE_MASK    0x0000ffff
#else
#define SK_FL_PROTO_SHIFT  8
#define SK_FL_PROTO_MASK   0x0000ff00

#define SK_FL_TYPE_SHIFT   16
#define SK_FL_TYPE_MASK    0xffff0000
#endif

	unsigned int		sk_padding : 1,
	u8			sk_padding : 1,
				sk_kern_sock : 1,
				sk_no_check_tx : 1,
				sk_no_check_rx : 1,
				sk_userlocks : 4,
				sk_protocol  : 8,
				sk_type      : 16;
#define SK_PROTOCOL_MAX U8_MAX
	u16			sk_gso_max_segs;
				sk_userlocks : 4;
	u8			sk_pacing_shift;
	u16			sk_type;
	u16			sk_protocol;
	u16			sk_gso_max_segs;
	unsigned long	        sk_lingertime;
	struct proto		*sk_prot_creator;
	rwlock_t		sk_callback_lock;
@@ -1480,6 +1464,7 @@ static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
	sk_mem_uncharge(sk, skb->truesize);
	if (static_branch_unlikely(&tcp_tx_skb_cache_key) &&
	    !sk->sk_tx_skb_cache && !skb_cloned(skb)) {
		skb_ext_reset(skb);
		skb_zcopy_clear(skb, true);
		sk->sk_tx_skb_cache = skb;
		return;
+20 −0
Original line number Diff line number Diff line
@@ -39,6 +39,7 @@
#include <net/tcp_states.h>
#include <net/inet_ecn.h>
#include <net/dst.h>
#include <net/mptcp.h>

#include <linux/seq_file.h>
#include <linux/memcontrol.h>
@@ -182,6 +183,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
#define TCPOPT_SACK             5       /* SACK Block */
#define TCPOPT_TIMESTAMP	8	/* Better RTT estimations/PAWS */
#define TCPOPT_MD5SIG		19	/* MD5 Signature (RFC2385) */
#define TCPOPT_MPTCP		30	/* Multipath TCP (RFC6824) */
#define TCPOPT_FASTOPEN		34	/* Fast open (RFC7413) */
#define TCPOPT_EXP		254	/* Experimental */
/* Magic number to be after the option value for sharing TCP
@@ -328,6 +330,9 @@ int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
			size_t size, int flags);
ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
		 size_t size, int flags);
int tcp_send_mss(struct sock *sk, int *size_goal, int flags);
void tcp_push(struct sock *sk, int flags, int mss_now, int nonagle,
	      int size_goal);
void tcp_release_cb(struct sock *sk);
void tcp_wfree(struct sk_buff *skb);
void tcp_write_timer_handler(struct sock *sk);
@@ -977,6 +982,13 @@ static inline bool tcp_skb_can_collapse_to(const struct sk_buff *skb)
	return likely(!TCP_SKB_CB(skb)->eor);
}

static inline bool tcp_skb_can_collapse(const struct sk_buff *to,
					const struct sk_buff *from)
{
	return likely(tcp_skb_can_collapse_to(to) &&
		      mptcp_skb_can_collapse(to, from));
}

/* Events passed to congestion control interface */
enum tcp_ca_event {
	CA_EVENT_TX_START,	/* first transmit when no packets in flight */
@@ -2002,6 +2014,11 @@ struct tcp_request_sock_ops {
			   enum tcp_synack_type synack_type);
};

extern const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops;
#if IS_ENABLED(CONFIG_IPV6)
extern const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops;
#endif

#ifdef CONFIG_SYN_COOKIES
static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops,
					 const struct sock *sk, struct sk_buff *skb,
@@ -2153,6 +2170,9 @@ struct tcp_ulp_ops {
	/* diagnostic */
	int (*get_info)(const struct sock *sk, struct sk_buff *skb);
	size_t (*get_info_size)(const struct sock *sk);
	/* clone ulp */
	void (*clone)(const struct request_sock *req, struct sock *newsk,
		      const gfp_t priority);

	char		name[TCP_ULP_NAME_MAX];
	struct module	*owner;
Loading