Commit 5021b918 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'timers-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull timer fix from Ingo Molnar:
 "Fix a broadcast-timer handling race that can result in spuriously and
  indefinitely delayed hrtimers and even RCU stalls if the system is
  otherwise quiet"

* 'timers-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  tick: broadcast-hrtimer: Fix a race in bc_set_next
parents 714366f8 b9023b91
Loading
Loading
Loading
Loading
+29 −33
Original line number Diff line number Diff line
@@ -42,39 +42,39 @@ static int bc_shutdown(struct clock_event_device *evt)
 */
static int bc_set_next(ktime_t expires, struct clock_event_device *bc)
{
	int bc_moved;
	/*
	 * We try to cancel the timer first. If the callback is on
	 * flight on some other cpu then we let it handle it. If we
	 * were able to cancel the timer nothing can rearm it as we
	 * own broadcast_lock.
	 * This is called either from enter/exit idle code or from the
	 * broadcast handler. In all cases tick_broadcast_lock is held.
	 *
	 * However we can also be called from the event handler of
	 * ce_broadcast_hrtimer itself when it expires. We cannot
	 * restart the timer because we are in the callback, but we
	 * can set the expiry time and let the callback return
	 * HRTIMER_RESTART.
	 * hrtimer_cancel() cannot be called here neither from the
	 * broadcast handler nor from the enter/exit idle code. The idle
	 * code can run into the problem described in bc_shutdown() and the
	 * broadcast handler cannot wait for itself to complete for obvious
	 * reasons.
	 *
	 * Since we are in the idle loop at this point and because
	 * hrtimer_{start/cancel} functions call into tracing,
	 * calls to these functions must be bound within RCU_NONIDLE.
	 * Each caller tries to arm the hrtimer on its own CPU, but if the
	 * hrtimer callbback function is currently running, then
	 * hrtimer_start() cannot move it and the timer stays on the CPU on
	 * which it is assigned at the moment.
	 *
	 * As this can be called from idle code, the hrtimer_start()
	 * invocation has to be wrapped with RCU_NONIDLE() as
	 * hrtimer_start() can call into tracing.
	 */
	RCU_NONIDLE(
		{
			bc_moved = hrtimer_try_to_cancel(&bctimer) >= 0;
			if (bc_moved) {
				hrtimer_start(&bctimer, expires,
					      HRTIMER_MODE_ABS_PINNED_HARD);
			}
		}
	);

	if (bc_moved) {
		/* Bind the "device" to the cpu */
		bc->bound_on = smp_processor_id();
	} else if (bc->bound_on == smp_processor_id()) {
		hrtimer_set_expires(&bctimer, expires);
	}
	RCU_NONIDLE( {
		hrtimer_start(&bctimer, expires, HRTIMER_MODE_ABS_PINNED_HARD);
		/*
		 * The core tick broadcast mode expects bc->bound_on to be set
		 * correctly to prevent a CPU which has the broadcast hrtimer
		 * armed from going deep idle.
		 *
		 * As tick_broadcast_lock is held, nothing can change the cpu
		 * base which was just established in hrtimer_start() above. So
		 * the below access is safe even without holding the hrtimer
		 * base lock.
		 */
		bc->bound_on = bctimer.base->cpu_base->cpu;
	} );
	return 0;
}

@@ -100,10 +100,6 @@ static enum hrtimer_restart bc_handler(struct hrtimer *t)
{
	ce_broadcast_hrtimer.event_handler(&ce_broadcast_hrtimer);

	if (clockevent_state_oneshot(&ce_broadcast_hrtimer))
		if (ce_broadcast_hrtimer.next_event != KTIME_MAX)
			return HRTIMER_RESTART;

	return HRTIMER_NORESTART;
}