Commit 3eeec385 authored by Thomas Gleixner's avatar Thomas Gleixner
Browse files

x86/entry: Provide idtentry_entry/exit_cond_rcu()



After a lengthy discussion [1] it turned out that RCU does not need a full
rcu_irq_enter/exit() when RCU is already watching. All it needs if
NOHZ_FULL is active is to check whether the tick needs to be restarted.

This allows to avoid a separate variant for the pagefault handler which
cannot invoke rcu_irq_enter() on a kernel pagefault which might sleep.

The cond_rcu argument is only temporary and will be removed once the
existing users of idtentry_enter/exit() have been cleaned up. After that
the code can be significantly simplified.

[ mingo: Simplified the control flow ]

Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
Acked-by: default avatar"Paul E. McKenney" <paulmck@kernel.org>
Acked-by: default avatarAndy Lutomirski <luto@kernel.org>
Link: [1] https://lkml.kernel.org/r/20200515235125.628629605@linutronix.de
Link: https://lore.kernel.org/r/20200521202117.181397835@linutronix.de
parent 2ab70319
Loading
Loading
Loading
Loading
+64 −15
Original line number Diff line number Diff line
@@ -512,8 +512,10 @@ SYSCALL_DEFINE0(ni_syscall)
}

/**
 * idtentry_enter - Handle state tracking on idtentry
 * idtentry_enter_cond_rcu - Handle state tracking on idtentry with conditional
 *			     RCU handling
 * @regs:	Pointer to pt_regs of interrupted context
 * @cond_rcu:	Invoke rcu_irq_enter() only if RCU is not watching
 *
 * Invokes:
 *  - lockdep irqflag state tracking as low level ASM entry disabled
@@ -521,40 +523,84 @@ SYSCALL_DEFINE0(ni_syscall)
 *
 *  - Context tracking if the exception hit user mode.
 *
 *  - RCU notification if the exception hit kernel mode.
 *
 *  - The hardirq tracer to keep the state consistent as low level ASM
 *    entry disabled interrupts.
 *
 * For kernel mode entries RCU handling is done conditional. If RCU is
 * watching then the only RCU requirement is to check whether the tick has
 * to be restarted. If RCU is not watching then rcu_irq_enter() has to be
 * invoked on entry and rcu_irq_exit() on exit.
 *
 * Avoiding the rcu_irq_enter/exit() calls is an optimization but also
 * solves the problem of kernel mode pagefaults which can schedule, which
 * is not possible after invoking rcu_irq_enter() without undoing it.
 *
 * For user mode entries enter_from_user_mode() must be invoked to
 * establish the proper context for NOHZ_FULL. Otherwise scheduling on exit
 * would not be possible.
 *
 * Returns: True if RCU has been adjusted on a kernel entry
 *	    False otherwise
 *
 * The return value must be fed into the rcu_exit argument of
 * idtentry_exit_cond_rcu().
 */
void noinstr idtentry_enter(struct pt_regs *regs)
bool noinstr idtentry_enter_cond_rcu(struct pt_regs *regs, bool cond_rcu)
{
	if (user_mode(regs)) {
		enter_from_user_mode();
	} else {
		return false;
	}

	if (!cond_rcu || !__rcu_is_watching()) {
		/*
		 * If RCU is not watching then the same careful
		 * sequence vs. lockdep and tracing is required
		 * as in enter_from_user_mode().
		 *
		 * This only happens for IRQs that hit the idle
		 * loop, i.e. if idle is not using MWAIT.
		 */
		lockdep_hardirqs_off(CALLER_ADDR0);
		rcu_irq_enter();
		instrumentation_begin();
		trace_hardirqs_off_prepare();
		instrumentation_end();

		return true;
	}

	/*
	 * If RCU is watching then RCU only wants to check
	 * whether it needs to restart the tick in NOHZ
	 * mode.
	 */
	instrumentation_begin();
	rcu_irq_enter_check_tick();
	/* Use the combo lockdep/tracing function */
	trace_hardirqs_off();
	instrumentation_end();

	return false;
}

/**
 * idtentry_exit - Common code to handle return from exceptions
 * idtentry_exit_cond_rcu - Handle return from exception with conditional RCU
 *			    handling
 * @regs:	Pointer to pt_regs (exception entry regs)
 * @rcu_exit:	Invoke rcu_irq_exit() if true
 *
 * Depending on the return target (kernel/user) this runs the necessary
 * preemption and work checks if possible and required and returns to
 * preemption and work checks if possible and reguired and returns to
 * the caller with interrupts disabled and no further work pending.
 *
 * This is the last action before returning to the low level ASM code which
 * just needs to return to the appropriate context.
 *
 * Invoked by all exception/interrupt IDTENTRY handlers which are not
 * returning through the paranoid exit path (all except NMI, #DF and the IST
 * variants of #MC and #DB) and are therefore on the thread stack.
 * Counterpart to idtentry_enter_cond_rcu(). The return value of the entry
 * function must be fed into the @rcu_exit argument.
 */
void noinstr idtentry_exit(struct pt_regs *regs)
void noinstr idtentry_exit_cond_rcu(struct pt_regs *regs, bool rcu_exit)
{
	lockdep_assert_irqs_disabled();

@@ -580,6 +626,7 @@ void noinstr idtentry_exit(struct pt_regs *regs)
				if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
					WARN_ON_ONCE(!on_thread_stack());
				instrumentation_begin();
				if (rcu_exit)
					rcu_irq_exit_preempt();
				if (need_resched())
					preempt_schedule_irq();
@@ -602,10 +649,12 @@ void noinstr idtentry_exit(struct pt_regs *regs)
		trace_hardirqs_on_prepare();
		lockdep_hardirqs_on_prepare(CALLER_ADDR0);
		instrumentation_end();
		if (rcu_exit)
			rcu_irq_exit();
		lockdep_hardirqs_on(CALLER_ADDR0);
	} else {
		/* IRQ flags state is correct already. Just tell RCU */
		/* IRQ flags state is correct already. Just tell RCU. */
		if (rcu_exit)
			rcu_irq_exit();
	}
}
+12 −2
Original line number Diff line number Diff line
@@ -7,8 +7,18 @@

#ifndef __ASSEMBLY__

void idtentry_enter(struct pt_regs *regs);
void idtentry_exit(struct pt_regs *regs);
bool idtentry_enter_cond_rcu(struct pt_regs *regs, bool cond_rcu);
void idtentry_exit_cond_rcu(struct pt_regs *regs, bool rcu_exit);

static __always_inline void idtentry_enter(struct pt_regs *regs)
{
	idtentry_enter_cond_rcu(regs, false);
}

static __always_inline void idtentry_exit(struct pt_regs *regs)
{
	idtentry_exit_cond_rcu(regs, true);
}

/**
 * DECLARE_IDTENTRY - Declare functions for simple IDT entry points