Commit b6c6712a authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller
Browse files

net: sk_dst_cache RCUification



With latest CONFIG_PROVE_RCU stuff, I felt more comfortable to make this
work.

sk->sk_dst_cache is currently protected by a rwlock (sk_dst_lock)

This rwlock is readlocked for a very small amount of time, and dst
entries are already freed after RCU grace period. This calls for RCU
again :)

This patch converts sk_dst_lock to a spinlock, and use RCU for readers.

__sk_dst_get() is supposed to be called with rcu_read_lock() or if
socket locked by user, so use appropriate rcu_dereference_check()
condition (rcu_read_lock_held() || sock_owned_by_user(sk))

This patch avoids two atomic ops per tx packet on UDP connected sockets,
for example, and permits sk_dst_lock to be much less dirtied.

Signed-off-by: default avatarEric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 7a161ea9
Loading
Loading
Loading
Loading
+0 −15
Original line number Diff line number Diff line
@@ -225,21 +225,6 @@ static inline void dst_confirm(struct dst_entry *dst)
		neigh_confirm(dst->neighbour);
}

static inline void dst_negative_advice(struct dst_entry **dst_p,
				       struct sock *sk)
{
	struct dst_entry * dst = *dst_p;
	if (dst && dst->ops->negative_advice) {
		*dst_p = dst->ops->negative_advice(dst);

		if (dst != *dst_p) {
			extern void sk_reset_txq(struct sock *sk);

			sk_reset_txq(sk);
		}
	}
}

static inline void dst_link_failure(struct sk_buff *skb)
{
	struct dst_entry *dst = skb_dst(skb);
+2 −2
Original line number Diff line number Diff line
@@ -152,9 +152,9 @@ static inline void __ip6_dst_store(struct sock *sk, struct dst_entry *dst,
static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst,
				 struct in6_addr *daddr, struct in6_addr *saddr)
{
	write_lock(&sk->sk_dst_lock);
	spin_lock(&sk->sk_dst_lock);
	__ip6_dst_store(sk, dst, daddr, saddr);
	write_unlock(&sk->sk_dst_lock);
	spin_unlock(&sk->sk_dst_lock);
}

static inline int ipv6_unicast_destination(struct sk_buff *skb)
+30 −17
Original line number Diff line number Diff line
@@ -262,7 +262,7 @@ struct sock {
#ifdef CONFIG_XFRM
	struct xfrm_policy	*sk_policy[2];
#endif
	rwlock_t		sk_dst_lock;
	spinlock_t		sk_dst_lock;
	atomic_t		sk_rmem_alloc;
	atomic_t		sk_wmem_alloc;
	atomic_t		sk_omem_alloc;
@@ -1192,7 +1192,8 @@ extern unsigned long sock_i_ino(struct sock *sk);
static inline struct dst_entry *
__sk_dst_get(struct sock *sk)
{
	return sk->sk_dst_cache;
	return rcu_dereference_check(sk->sk_dst_cache, rcu_read_lock_held() ||
						       sock_owned_by_user(sk));
}

static inline struct dst_entry *
@@ -1200,50 +1201,62 @@ sk_dst_get(struct sock *sk)
{
	struct dst_entry *dst;

	read_lock(&sk->sk_dst_lock);
	dst = sk->sk_dst_cache;
	rcu_read_lock();
	dst = rcu_dereference(sk->sk_dst_cache);
	if (dst)
		dst_hold(dst);
	read_unlock(&sk->sk_dst_lock);
	rcu_read_unlock();
	return dst;
}

extern void sk_reset_txq(struct sock *sk);

static inline void dst_negative_advice(struct sock *sk)
{
	struct dst_entry *ndst, *dst = __sk_dst_get(sk);

	if (dst && dst->ops->negative_advice) {
		ndst = dst->ops->negative_advice(dst);

		if (ndst != dst) {
			rcu_assign_pointer(sk->sk_dst_cache, ndst);
			sk_reset_txq(sk);
		}
	}
}

static inline void
__sk_dst_set(struct sock *sk, struct dst_entry *dst)
{
	struct dst_entry *old_dst;

	sk_tx_queue_clear(sk);
	old_dst = sk->sk_dst_cache;
	sk->sk_dst_cache = dst;
	old_dst = rcu_dereference_check(sk->sk_dst_cache,
					lockdep_is_held(&sk->sk_dst_lock));
	rcu_assign_pointer(sk->sk_dst_cache, dst);
	dst_release(old_dst);
}

static inline void
sk_dst_set(struct sock *sk, struct dst_entry *dst)
{
	write_lock(&sk->sk_dst_lock);
	spin_lock(&sk->sk_dst_lock);
	__sk_dst_set(sk, dst);
	write_unlock(&sk->sk_dst_lock);
	spin_unlock(&sk->sk_dst_lock);
}

static inline void
__sk_dst_reset(struct sock *sk)
{
	struct dst_entry *old_dst;

	sk_tx_queue_clear(sk);
	old_dst = sk->sk_dst_cache;
	sk->sk_dst_cache = NULL;
	dst_release(old_dst);
	__sk_dst_set(sk, NULL);
}

static inline void
sk_dst_reset(struct sock *sk)
{
	write_lock(&sk->sk_dst_lock);
	spin_lock(&sk->sk_dst_lock);
	__sk_dst_reset(sk);
	write_unlock(&sk->sk_dst_lock);
	spin_unlock(&sk->sk_dst_lock);
}

extern struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie);
+1 −1
Original line number Diff line number Diff line
@@ -2015,7 +2015,7 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
			if (dev->real_num_tx_queues > 1)
				queue_index = skb_tx_hash(dev, skb);

			if (sk && sk->sk_dst_cache)
			if (sk && rcu_dereference_check(sk->sk_dst_cache, 1))
				sk_tx_queue_set(sk, queue_index);
		}
	}
+4 −4
Original line number Diff line number Diff line
@@ -364,11 +364,11 @@ EXPORT_SYMBOL(sk_reset_txq);

struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
{
	struct dst_entry *dst = sk->sk_dst_cache;
	struct dst_entry *dst = __sk_dst_get(sk);

	if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
		sk_tx_queue_clear(sk);
		sk->sk_dst_cache = NULL;
		rcu_assign_pointer(sk->sk_dst_cache, NULL);
		dst_release(dst);
		return NULL;
	}
@@ -1157,7 +1157,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
		skb_queue_head_init(&newsk->sk_async_wait_queue);
#endif

		rwlock_init(&newsk->sk_dst_lock);
		spin_lock_init(&newsk->sk_dst_lock);
		rwlock_init(&newsk->sk_callback_lock);
		lockdep_set_class_and_name(&newsk->sk_callback_lock,
				af_callback_keys + newsk->sk_family,
@@ -1898,7 +1898,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
	} else
		sk->sk_sleep	=	NULL;

	rwlock_init(&sk->sk_dst_lock);
	spin_lock_init(&sk->sk_dst_lock);
	rwlock_init(&sk->sk_callback_lock);
	lockdep_set_class_and_name(&sk->sk_callback_lock,
			af_callback_keys + sk->sk_family,
Loading