Commit 81c0b3d7 authored by Paul E. McKenney's avatar Paul E. McKenney
Browse files

rcu/nocb: Avoid ->nocb_lock capture by corresponding CPU



A given rcu_data structure's ->nocb_lock can be acquired very frequently
by the corresponding CPU and occasionally by the corresponding no-CBs
grace-period and callbacks kthreads.  In particular, these two kthreads
will have frequent gaps between ->nocb_lock acquisitions that are roughly
a grace period in duration.  This means that any excessive ->nocb_lock
contention will be due to the CPU's acquisitions, and this in turn
enables a very naive contention-avoidance strategy to be quite effective.

This commit therefore modifies rcu_nocb_lock() to first
attempt a raw_spin_trylock(), and to atomically increment a
separate ->nocb_lock_contended across a raw_spin_lock().  This new
->nocb_lock_contended field is checked in __call_rcu_nocb_wake() when
interrupts are enabled, with a spin-wait for contending acquisitions
to complete, thus allowing the kthreads a chance to acquire the lock.

Signed-off-by: default avatarPaul E. McKenney <paulmck@linux.ibm.com>
parent 7f36ef82
Loading
Loading
Loading
Loading
+17 −1
Original line number Diff line number Diff line
@@ -197,6 +197,7 @@ struct rcu_data {
	struct swait_queue_head nocb_cb_wq; /* For nocb kthreads to sleep on. */
	struct task_struct *nocb_gp_kthread;
	raw_spinlock_t nocb_lock;	/* Guard following pair of fields. */
	atomic_t nocb_lock_contended;	/* Contention experienced. */
	int nocb_defer_wakeup;		/* Defer wakeup of nocb_kthread. */
	struct timer_list nocb_timer;	/* Enforce finite deferral. */

@@ -430,7 +431,22 @@ static void rcu_nocb_unlock_irqrestore(struct rcu_data *rdp,
				       unsigned long flags);
#ifdef CONFIG_RCU_NOCB_CPU
static void __init rcu_organize_nocb_kthreads(void);
#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
#define rcu_nocb_lock_irqsave(rdp, flags)				\
do {									\
	if (!rcu_segcblist_is_offloaded(&(rdp)->cblist)) {		\
		local_irq_save(flags);					\
	} else if (!raw_spin_trylock_irqsave(&(rdp)->nocb_lock, (flags))) {\
		atomic_inc(&(rdp)->nocb_lock_contended);		\
		smp_mb__after_atomic(); /* atomic_inc() before lock. */	\
		raw_spin_lock_irqsave(&(rdp)->nocb_lock, (flags));	\
		smp_mb__before_atomic(); /* atomic_dec() after lock. */	\
		atomic_dec(&(rdp)->nocb_lock_contended);		\
	}								\
} while (0)
#else /* #ifdef CONFIG_RCU_NOCB_CPU */
#define rcu_nocb_lock_irqsave(rdp, flags) local_irq_save(flags)
#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */

static void rcu_bind_gp_kthread(void);
static bool rcu_nohz_full_cpu(void);
static void rcu_dynticks_task_enter(void);
+45 −23
Original line number Diff line number Diff line
@@ -1498,14 +1498,36 @@ early_param("rcu_nocb_poll", parse_rcu_nocb_poll);

/*
 * Acquire the specified rcu_data structure's ->nocb_lock, but only
 * if it corresponds to a no-CBs CPU.
 * if it corresponds to a no-CBs CPU.  If the lock isn't immediately
 * available, increment ->nocb_lock_contended to flag the contention.
 */
static void rcu_nocb_lock(struct rcu_data *rdp)
{
	if (rcu_segcblist_is_offloaded(&rdp->cblist)) {
	lockdep_assert_irqs_disabled();
	if (!rcu_segcblist_is_offloaded(&rdp->cblist) ||
	    raw_spin_trylock(&rdp->nocb_lock))
		return;
	atomic_inc(&rdp->nocb_lock_contended);
	smp_mb__after_atomic(); /* atomic_inc() before lock. */
	raw_spin_lock(&rdp->nocb_lock);
	smp_mb__before_atomic(); /* atomic_dec() after lock. */
	atomic_dec(&rdp->nocb_lock_contended);
}

/*
 * Spinwait until the specified rcu_data structure's ->nocb_lock is
 * not contended.  Please note that this is extremely special-purpose,
 * relying on the fact that at most two kthreads and one CPU contend for
 * this lock, and also that the two kthreads are guaranteed to have frequent
 * grace-period-duration time intervals between successive acquisitions
 * of the lock.  This allows us to use an extremely simple throttling
 * mechanism, and further to apply it only to the CPU doing floods of
 * call_rcu() invocations.  Don't try this at home!
 */
static void rcu_nocb_wait_contended(struct rcu_data *rdp)
{
	while (atomic_read(&rdp->nocb_lock_contended))
		cpu_relax();
}

/*
@@ -1575,19 +1597,19 @@ static void wake_nocb_gp(struct rcu_data *rdp, bool force,

	lockdep_assert_held(&rdp->nocb_lock);
	if (!READ_ONCE(rdp_gp->nocb_gp_kthread)) {
		raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
		rcu_nocb_unlock_irqrestore(rdp, flags);
		return;
	}
	if (READ_ONCE(rdp_gp->nocb_gp_sleep) || force) {
		del_timer(&rdp->nocb_timer);
		raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
		rcu_nocb_unlock_irqrestore(rdp, flags);
		smp_mb(); /* enqueue before ->nocb_gp_sleep. */
		raw_spin_lock_irqsave(&rdp_gp->nocb_lock, flags);
		rcu_nocb_lock_irqsave(rdp_gp, flags);
		WRITE_ONCE(rdp_gp->nocb_gp_sleep, false);
		raw_spin_unlock_irqrestore(&rdp_gp->nocb_lock, flags);
		rcu_nocb_unlock_irqrestore(rdp_gp, flags);
		wake_up_process(rdp_gp->nocb_gp_kthread);
	} else {
		raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
		rcu_nocb_unlock_irqrestore(rdp, flags);
	}
}

@@ -1646,23 +1668,23 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
		if (!rdp->nocb_cb_sleep &&
		    rcu_segcblist_ready_cbs(&rdp->cblist)) {
			// Already going full tilt, so don't try to rewake.
			rcu_nocb_unlock_irqrestore(rdp, flags);
		} else if (rcu_segcblist_pend_cbs(&rdp->cblist) &&
			   raw_spin_trylock_rcu_node(rdp->mynode)) {
			rcu_advance_cbs_nowake(rdp->mynode, rdp);
			raw_spin_unlock_rcu_node(rdp->mynode);
			rcu_nocb_unlock_irqrestore(rdp, flags);
		} else {
			wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_FORCE,
					   TPS("WakeOvfIsDeferred"));
			rcu_nocb_unlock_irqrestore(rdp, flags);
		}
		rcu_nocb_unlock_irqrestore(rdp, flags);
	} else {
		trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeNot"));
		rcu_nocb_unlock_irqrestore(rdp, flags);
	}
	if (!irqs_disabled_flags(flags))
	if (!irqs_disabled_flags(flags)) {
		lockdep_assert_irqs_enabled();
		rcu_nocb_wait_contended(rdp);
	}
	return;
}

@@ -1692,7 +1714,7 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
		if (rcu_segcblist_empty(&rdp->cblist))
			continue; /* No callbacks here, try next. */
		rnp = rdp->mynode;
		raw_spin_lock_irqsave(&rdp->nocb_lock, flags);
		rcu_nocb_lock_irqsave(rdp, flags);
		WRITE_ONCE(my_rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
		del_timer(&my_rdp->nocb_timer);
		raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
@@ -1712,7 +1734,7 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
		} else {
			needwake = false;
		}
		raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
		rcu_nocb_unlock_irqrestore(rdp, flags);
		if (needwake) {
			swake_up_one(&rdp->nocb_cb_wq);
			gotcbs = true;
@@ -1741,9 +1763,9 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
		trace_rcu_this_gp(rnp, my_rdp, wait_gp_seq, TPS("EndWait"));
	}
	if (!rcu_nocb_poll) {
		raw_spin_lock_irqsave(&my_rdp->nocb_lock, flags);
		rcu_nocb_lock_irqsave(my_rdp, flags);
		WRITE_ONCE(my_rdp->nocb_gp_sleep, true);
		raw_spin_unlock_irqrestore(&my_rdp->nocb_lock, flags);
		rcu_nocb_unlock_irqrestore(my_rdp, flags);
	}
	WARN_ON(signal_pending(current));
}
@@ -1784,12 +1806,12 @@ static void nocb_cb_wait(struct rcu_data *rdp)
	rcu_do_batch(rdp);
	local_bh_enable();
	lockdep_assert_irqs_enabled();
	raw_spin_lock_irqsave(&rdp->nocb_lock, flags);
	rcu_nocb_lock_irqsave(rdp, flags);
	raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
	needwake_gp = rcu_advance_cbs(rdp->mynode, rdp);
	raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
	if (rcu_segcblist_ready_cbs(&rdp->cblist)) {
		raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
		rcu_nocb_unlock_irqrestore(rdp, flags);
		if (needwake_gp)
			rcu_gp_kthread_wake();
		return;
@@ -1797,7 +1819,7 @@ static void nocb_cb_wait(struct rcu_data *rdp)

	trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("CBSleep"));
	WRITE_ONCE(rdp->nocb_cb_sleep, true);
	raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
	rcu_nocb_unlock_irqrestore(rdp, flags);
	if (needwake_gp)
		rcu_gp_kthread_wake();
	swait_event_interruptible_exclusive(rdp->nocb_cb_wq,
@@ -1839,9 +1861,9 @@ static void do_nocb_deferred_wakeup_common(struct rcu_data *rdp)
	unsigned long flags;
	int ndw;

	raw_spin_lock_irqsave(&rdp->nocb_lock, flags);
	rcu_nocb_lock_irqsave(rdp, flags);
	if (!rcu_nocb_need_deferred_wakeup(rdp)) {
		raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
		rcu_nocb_unlock_irqrestore(rdp, flags);
		return;
	}
	ndw = READ_ONCE(rdp->nocb_defer_wakeup);