Commit 1b2e7884 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'tcp-sack-compression-changes'



Eric Dumazet says:

====================
tcp: sack compression changes

Patch series refines SACK compression.

We had issues with missing SACK when TCP option space is tight.

Uses hrtimer slack to improve performance.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 3857c776 a70437cc
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
@@ -651,6 +651,14 @@ tcp_comp_sack_delay_ns - LONG INTEGER

	Default : 1,000,000 ns (1 ms)

tcp_comp_sack_slack_ns - LONG INTEGER
	This sysctl control the slack used when arming the
	timer used by SACK compression. This gives extra time
	for small RTT flows, and reduces system overhead by allowing
	opportunistic reduction of timer interrupts.

	Default : 100,000 ns (100 us)

tcp_comp_sack_nr - INTEGER
	Max number of SACK that can be compressed.
	Using 0 disables SACK compression.
+1 −0
Original line number Diff line number Diff line
@@ -268,6 +268,7 @@ struct tcp_sock {
	} rack;
	u16	advmss;		/* Advertised MSS			*/
	u8	compressed_ack;
	u8	dup_ack_counter;
	u32	chrono_start;	/* Start time in jiffies of a TCP chrono */
	u32	chrono_stat[3];	/* Time in jiffies for chrono_stat stats */
	u8	chrono_type:2,	/* current chronograph type */
+1 −0
Original line number Diff line number Diff line
@@ -173,6 +173,7 @@ struct netns_ipv4 {
	int sysctl_tcp_rmem[3];
	int sysctl_tcp_comp_sack_nr;
	unsigned long sysctl_tcp_comp_sack_delay_ns;
	unsigned long sysctl_tcp_comp_sack_slack_ns;
	struct inet_timewait_death_row tcp_death_row;
	int sysctl_max_syn_backlog;
	int sysctl_tcp_fastopen;
+7 −0
Original line number Diff line number Diff line
@@ -1329,6 +1329,13 @@ static struct ctl_table ipv4_net_table[] = {
		.mode		= 0644,
		.proc_handler	= proc_doulongvec_minmax,
	},
	{
		.procname	= "tcp_comp_sack_slack_ns",
		.data		= &init_net.ipv4.sysctl_tcp_comp_sack_slack_ns,
		.maxlen		= sizeof(unsigned long),
		.mode		= 0644,
		.proc_handler	= proc_doulongvec_minmax,
	},
	{
		.procname	= "tcp_comp_sack_nr",
		.data		= &init_net.ipv4.sysctl_tcp_comp_sack_nr,
+40 −11
Original line number Diff line number Diff line
@@ -4327,6 +4327,33 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
	}
}

static void tcp_sack_compress_send_ack(struct sock *sk)
{
	struct tcp_sock *tp = tcp_sk(sk);

	if (!tp->compressed_ack)
		return;

	if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1)
		__sock_put(sk);

	/* Since we have to send one ack finally,
	 * substract one from tp->compressed_ack to keep
	 * LINUX_MIB_TCPACKCOMPRESSED accurate.
	 */
	NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
		      tp->compressed_ack - 1);

	tp->compressed_ack = 0;
	tcp_send_ack(sk);
}

/* Reasonable amount of sack blocks included in TCP SACK option
 * The max is 4, but this becomes 3 if TCP timestamps are there.
 * Given that SACK packets might be lost, be conservative and use 2.
 */
#define TCP_SACK_BLOCKS_EXPECTED 2

static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
{
	struct tcp_sock *tp = tcp_sk(sk);
@@ -4339,6 +4366,8 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)

	for (this_sack = 0; this_sack < cur_sacks; this_sack++, sp++) {
		if (tcp_sack_extend(sp, seq, end_seq)) {
			if (this_sack >= TCP_SACK_BLOCKS_EXPECTED)
				tcp_sack_compress_send_ack(sk);
			/* Rotate this_sack to the first one. */
			for (; this_sack > 0; this_sack--, sp--)
				swap(*sp, *(sp - 1));
@@ -4348,6 +4377,9 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
		}
	}

	if (this_sack >= TCP_SACK_BLOCKS_EXPECTED)
		tcp_sack_compress_send_ack(sk);

	/* Could not find an adjacent existing SACK, build a new one,
	 * put it at the front, and shift everyone else down.  We
	 * always know there is at least one SACK present already here.
@@ -4355,8 +4387,6 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
	 * If the sack array is full, forget about the last one.
	 */
	if (this_sack >= TCP_NUM_SACKS) {
		if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
			tcp_send_ack(sk);
		this_sack--;
		tp->rx_opt.num_sacks--;
		sp--;
@@ -5275,15 +5305,13 @@ send_now:

	if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) {
		tp->compressed_ack_rcv_nxt = tp->rcv_nxt;
		if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
			NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
				      tp->compressed_ack - TCP_FASTRETRANS_THRESH);
		tp->compressed_ack = 0;
		tp->dup_ack_counter = 0;
	}

	if (++tp->compressed_ack <= TCP_FASTRETRANS_THRESH)
	if (tp->dup_ack_counter < TCP_FASTRETRANS_THRESH) {
		tp->dup_ack_counter++;
		goto send_now;

	}
	tp->compressed_ack++;
	if (hrtimer_is_queued(&tp->compressed_ack_timer))
		return;

@@ -5296,7 +5324,8 @@ send_now:
	delay = min_t(unsigned long, sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns,
		      rtt * (NSEC_PER_USEC >> 3)/20);
	sock_hold(sk);
	hrtimer_start(&tp->compressed_ack_timer, ns_to_ktime(delay),
	hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay),
			       sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns,
			       HRTIMER_MODE_REL_PINNED_SOFT);
}

Loading