Commit 3f233809 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'tcp-address-KCSAN-reports-in-tcp_poll-part-I'



Eric Dumazet says:

====================
tcp: address KCSAN reports in tcp_poll() (part I)

This all started with a KCSAN report (included
in "tcp: annotate tp->rcv_nxt lockless reads" changelog)

tcp_poll() runs in a lockless way. This means that about
all accesses of tcp socket fields done in tcp_poll() context
need annotations otherwise KCSAN will complain about data-races.

While doing this detective work, I found a more serious bug,
addressed by the first patch ("tcp: add rcu protection around
tp->fastopen_rsk").
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 8caf8a91 ab4e846a
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
@@ -393,7 +393,7 @@ struct tcp_sock {
	/* fastopen_rsk points to request_sock that resulted in this big
	 * socket. Used to retransmit SYNACKs etc.
	 */
	struct request_sock *fastopen_rsk;
	struct request_sock __rcu *fastopen_rsk;
	u32	*saved_syn;
};

@@ -447,8 +447,8 @@ static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk)

static inline bool tcp_passive_fastopen(const struct sock *sk)
{
	return (sk->sk_state == TCP_SYN_RECV &&
		tcp_sk(sk)->fastopen_rsk != NULL);
	return sk->sk_state == TCP_SYN_RECV &&
	       rcu_access_pointer(tcp_sk(sk)->fastopen_rsk) != NULL;
}

static inline void fastopen_queue_tune(struct sock *sk, int backlog)
+19 −10
Original line number Diff line number Diff line
@@ -878,12 +878,17 @@ static inline bool sk_acceptq_is_full(const struct sock *sk)
 */
static inline int sk_stream_min_wspace(const struct sock *sk)
{
	return sk->sk_wmem_queued >> 1;
	return READ_ONCE(sk->sk_wmem_queued) >> 1;
}

static inline int sk_stream_wspace(const struct sock *sk)
{
	return sk->sk_sndbuf - sk->sk_wmem_queued;
	return READ_ONCE(sk->sk_sndbuf) - READ_ONCE(sk->sk_wmem_queued);
}

static inline void sk_wmem_queued_add(struct sock *sk, int val)
{
	WRITE_ONCE(sk->sk_wmem_queued, sk->sk_wmem_queued + val);
}

void sk_stream_write_space(struct sock *sk);
@@ -1207,7 +1212,7 @@ static inline void sk_refcnt_debug_release(const struct sock *sk)

static inline bool __sk_stream_memory_free(const struct sock *sk, int wake)
{
	if (sk->sk_wmem_queued >= sk->sk_sndbuf)
	if (READ_ONCE(sk->sk_wmem_queued) >= READ_ONCE(sk->sk_sndbuf))
		return false;

	return sk->sk_prot->stream_memory_free ?
@@ -1467,7 +1472,7 @@ DECLARE_STATIC_KEY_FALSE(tcp_tx_skb_cache_key);
static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
{
	sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
	sk->sk_wmem_queued -= skb->truesize;
	sk_wmem_queued_add(sk, -skb->truesize);
	sk_mem_uncharge(sk, skb->truesize);
	if (static_branch_unlikely(&tcp_tx_skb_cache_key) &&
	    !sk->sk_tx_skb_cache && !skb_cloned(skb)) {
@@ -2014,7 +2019,7 @@ static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *fro
	skb->len	     += copy;
	skb->data_len	     += copy;
	skb->truesize	     += copy;
	sk->sk_wmem_queued   += copy;
	sk_wmem_queued_add(sk, copy);
	sk_mem_charge(sk, copy);
	return 0;
}
@@ -2220,10 +2225,14 @@ static inline void sk_wake_async(const struct sock *sk, int how, int band)

static inline void sk_stream_moderate_sndbuf(struct sock *sk)
{
	if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK)) {
		sk->sk_sndbuf = min(sk->sk_sndbuf, sk->sk_wmem_queued >> 1);
		sk->sk_sndbuf = max_t(u32, sk->sk_sndbuf, SOCK_MIN_SNDBUF);
	}
	u32 val;

	if (sk->sk_userlocks & SOCK_SNDBUF_LOCK)
		return;

	val = min(sk->sk_sndbuf, sk->sk_wmem_queued >> 1);

	WRITE_ONCE(sk->sk_sndbuf, max_t(u32, val, SOCK_MIN_SNDBUF));
}

struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
@@ -2251,7 +2260,7 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag);
 */
static inline bool sock_writeable(const struct sock *sk)
{
	return refcount_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf >> 1);
	return refcount_read(&sk->sk_wmem_alloc) < (READ_ONCE(sk->sk_sndbuf) >> 1);
}

static inline gfp_t gfp_any(void)
+4 −3
Original line number Diff line number Diff line
@@ -1380,14 +1380,14 @@ static inline int tcp_win_from_space(const struct sock *sk, int space)
/* Note: caller must be prepared to deal with negative returns */
static inline int tcp_space(const struct sock *sk)
{
	return tcp_win_from_space(sk, sk->sk_rcvbuf -
	return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) -
				  READ_ONCE(sk->sk_backlog.len) -
				  atomic_read(&sk->sk_rmem_alloc));
}

static inline int tcp_full_space(const struct sock *sk)
{
	return tcp_win_from_space(sk, sk->sk_rcvbuf);
	return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf));
}

extern void tcp_openreq_init_rwin(struct request_sock *req,
@@ -1917,7 +1917,8 @@ static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp)
static inline bool tcp_stream_memory_free(const struct sock *sk, int wake)
{
	const struct tcp_sock *tp = tcp_sk(sk);
	u32 notsent_bytes = tp->write_seq - tp->snd_nxt;
	u32 notsent_bytes = READ_ONCE(tp->write_seq) -
			    READ_ONCE(tp->snd_nxt);

	return (notsent_bytes << wake) < tcp_notsent_lowat(tp);
}
+2 −2
Original line number Diff line number Diff line
@@ -82,7 +82,7 @@ TRACE_EVENT(sock_rcvqueue_full,
	TP_fast_assign(
		__entry->rmem_alloc = atomic_read(&sk->sk_rmem_alloc);
		__entry->truesize   = skb->truesize;
		__entry->sk_rcvbuf  = sk->sk_rcvbuf;
		__entry->sk_rcvbuf  = READ_ONCE(sk->sk_rcvbuf);
	),

	TP_printk("rmem_alloc=%d truesize=%u sk_rcvbuf=%d",
@@ -115,7 +115,7 @@ TRACE_EVENT(sock_exceed_buf_limit,
		__entry->rmem_alloc = atomic_read(&sk->sk_rmem_alloc);
		__entry->sysctl_wmem = sk_get_wmem0(sk, prot);
		__entry->wmem_alloc = refcount_read(&sk->sk_wmem_alloc);
		__entry->wmem_queued = sk->sk_wmem_queued;
		__entry->wmem_queued = READ_ONCE(sk->sk_wmem_queued);
		__entry->kind = kind;
	),

+1 −1
Original line number Diff line number Diff line
@@ -640,7 +640,7 @@ int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
		skb->len += copied;
		skb->truesize += truesize;
		if (sk && sk->sk_type == SOCK_STREAM) {
			sk->sk_wmem_queued += truesize;
			sk_wmem_queued_add(sk, truesize);
			sk_mem_charge(sk, truesize);
		} else {
			refcount_add(truesize, &skb->sk->sk_wmem_alloc);
Loading