Commit 2c47a65b authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'tcp-implement-SACK-compression'



Eric Dumazet says:

====================
tcp: implement SACK compression

When TCP receives an out-of-order packet, it immediately sends
a SACK packet, generating network load but also forcing the
receiver to send 1-MSS pathological packets, increasing its
RTX queue length/depth, and thus processing time.

Wifi networks suffer from this aggressive behavior, but generally
speaking, all these SACK packets add fuel to the fire when networks
are under congestion.

This patch series adds SACK compression, but the infrastructure
could be leveraged to also compress ACK in the future.

v2: Addressed Neal feedback.
    Added two sysctls to allow fine tuning, or even disabling the feature.

v3: take rtt = min(srtt, rcv_rtt) as Yuchung suggested, because rcv_rtt
    can be over estimated for RPC (or sender limited)
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 64a2658b 9c21d2fc
Loading
Loading
Loading
Loading
+13 −0
Original line number Diff line number Diff line
@@ -525,6 +525,19 @@ tcp_rmem - vector of 3 INTEGERs: min, default, max
tcp_sack - BOOLEAN
	Enable select acknowledgments (SACKS).

tcp_comp_sack_delay_ns - LONG INTEGER
	TCP tries to reduce number of SACK sent, using a timer
	based on 5% of SRTT, capped by this sysctl, in nano seconds.
	The default is 1ms, based on TSO autosizing period.

	Default : 1,000,000 ns (1 ms)

tcp_comp_sack_nr - INTEGER
	Max numer of SACK that can be compressed.
	Using 0 disables SACK compression.

	Detault : 44

tcp_slow_start_after_idle - BOOLEAN
	If set, provide RFC2861 behavior and time out the congestion
	window after an idle period.  An idle period is defined at
+2 −0
Original line number Diff line number Diff line
@@ -218,6 +218,7 @@ struct tcp_sock {
		   reord:1;	 /* reordering detected */
	} rack;
	u16	advmss;		/* Advertised MSS			*/
	u8	compressed_ack;
	u32	chrono_start;	/* Start time in jiffies of a TCP chrono */
	u32	chrono_stat[3];	/* Time in jiffies for chrono_stat stats */
	u8	chrono_type:2,	/* current chronograph type */
@@ -297,6 +298,7 @@ struct tcp_sock {
	u32	sacked_out;	/* SACK'd packets			*/

	struct hrtimer	pacing_timer;
	struct hrtimer	compressed_ack_timer;

	/* from STCP, retrans queue hinting */
	struct sk_buff* lost_skb_hint;
+2 −0
Original line number Diff line number Diff line
@@ -160,6 +160,8 @@ struct netns_ipv4 {
	int sysctl_tcp_pacing_ca_ratio;
	int sysctl_tcp_wmem[3];
	int sysctl_tcp_rmem[3];
	int sysctl_tcp_comp_sack_nr;
	unsigned long sysctl_tcp_comp_sack_delay_ns;
	struct inet_timewait_death_row tcp_death_row;
	int sysctl_max_syn_backlog;
	int sysctl_tcp_fastopen;
+4 −1
Original line number Diff line number Diff line
@@ -559,7 +559,10 @@ void tcp_init_xmit_timers(struct sock *);
static inline void tcp_clear_xmit_timers(struct sock *sk)
{
	if (hrtimer_try_to_cancel(&tcp_sk(sk)->pacing_timer) == 1)
		sock_put(sk);
		__sock_put(sk);

	if (hrtimer_try_to_cancel(&tcp_sk(sk)->compressed_ack_timer) == 1)
		__sock_put(sk);

	inet_csk_clear_xmit_timers(sk);
}
+1 −0
Original line number Diff line number Diff line
@@ -278,6 +278,7 @@ enum
	LINUX_MIB_TCPMTUPSUCCESS,		/* TCPMTUPSuccess */
	LINUX_MIB_TCPDELIVERED,			/* TCPDelivered */
	LINUX_MIB_TCPDELIVEREDCE,		/* TCPDeliveredCE */
	LINUX_MIB_TCPACKCOMPRESSED,		/* TCPAckCompressed */
	__LINUX_MIB_MAX
};

Loading