Commit de269129 authored by Mahesh Salgaonkar's avatar Mahesh Salgaonkar Committed by Michael Ellerman
Browse files

powerpc/hmi: Fix kernel hang when TB is in error state.



On TOD/TB errors timebase register stops/freezes until HMI error recovery
gets TOD/TB back into running state. On successful recovery, TB starts
running again and udelay() that relies on TB value continues to function
properly. But in case when HMI fails to recover from TOD/TB errors, the
TB register stay freezed. With TB not running the __delay() function
keeps looping and never return. If __delay() is called while in panic
path then system hangs and never reboots after panic.

Signed-off-by: default avatarMahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
parent 0acb5f64
Loading
Loading
Loading
Loading
+10 −0
Original line number Diff line number Diff line
@@ -209,6 +209,7 @@
#define OPAL_SENSOR_GROUP_ENABLE		163
#define OPAL_PCI_GET_PBCQ_TUNNEL_BAR		164
#define OPAL_PCI_SET_PBCQ_TUNNEL_BAR		165
#define OPAL_HANDLE_HMI2			166
#define	OPAL_NX_COPROC_INIT			167
#define OPAL_XIVE_GET_VP_STATE			170
#define OPAL_LAST				170
@@ -635,6 +636,15 @@ struct OpalHMIEvent {
	} u;
};

/* OPAL_HANDLE_HMI2 out_flags */
enum {
	OPAL_HMI_FLAGS_TB_RESYNC	= (1ull << 0), /* Timebase has been resynced */
	OPAL_HMI_FLAGS_DEC_LOST		= (1ull << 1), /* DEC lost, needs to be reprogrammed */
	OPAL_HMI_FLAGS_HDEC_LOST	= (1ull << 2), /* HDEC lost, needs to be reprogrammed */
	OPAL_HMI_FLAGS_TOD_TB_FAIL	= (1ull << 3), /* TOD/TB recovery failed. */
	OPAL_HMI_FLAGS_NEW_EVENT	= (1ull << 63), /* An event has been created */
};

enum {
	OPAL_P7IOC_DIAG_TYPE_NONE	= 0,
	OPAL_P7IOC_DIAG_TYPE_RGC	= 1,
+2 −0
Original line number Diff line number Diff line
@@ -203,6 +203,7 @@ int64_t opal_set_param(uint64_t token, uint32_t param_id, uint64_t buffer,
int64_t opal_sensor_read(uint32_t sensor_hndl, int token, __be32 *sensor_data);
int64_t opal_sensor_read_u64(u32 sensor_hndl, int token, __be64 *sensor_data);
int64_t opal_handle_hmi(void);
int64_t opal_handle_hmi2(__be64 *out_flags);
int64_t opal_register_dump_region(uint32_t id, uint64_t start, uint64_t end);
int64_t opal_unregister_dump_region(uint32_t id);
int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val);
@@ -359,6 +360,7 @@ int opal_power_control_init(void);
extern int opal_machine_check(struct pt_regs *regs);
extern bool opal_mce_check_early_recovery(struct pt_regs *regs);
extern int opal_hmi_exception_early(struct pt_regs *regs);
extern int opal_hmi_exception_early2(struct pt_regs *regs);
extern int opal_handle_hmi_exception(struct pt_regs *regs);

extern void opal_shutdown(void);
+2 −0
Original line number Diff line number Diff line
@@ -36,6 +36,8 @@ extern unsigned long ppc_proc_freq;
extern unsigned long ppc_tb_freq;
#define DEFAULT_TB_FREQ		125000000UL

extern bool tb_invalid;

struct div_result {
	u64 result_high;
	u64 result_low;
+9 −0
Original line number Diff line number Diff line
@@ -150,6 +150,8 @@ EXPORT_SYMBOL_GPL(ppc_proc_freq);
unsigned long ppc_tb_freq;
EXPORT_SYMBOL_GPL(ppc_tb_freq);

bool tb_invalid;

#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
/*
 * Factor for converting from cputime_t (timebase ticks) to
@@ -459,6 +461,13 @@ void __delay(unsigned long loops)
				diff += 1000000000;
			spin_cpu_relax();
		} while (diff < loops);
	} else if (tb_invalid) {
		/*
		 * TB is in error state and isn't ticking anymore.
		 * HMI handler was unable to recover from TB error.
		 * Return immediately, so that kernel won't get stuck here.
		 */
		spin_cpu_relax();
	} else {
		start = get_tbl();
		while (get_tbl() - start < loops)
+1 −0
Original line number Diff line number Diff line
@@ -220,6 +220,7 @@ OPAL_CALL(opal_sensor_read, OPAL_SENSOR_READ);
OPAL_CALL(opal_get_param,			OPAL_GET_PARAM);
OPAL_CALL(opal_set_param,			OPAL_SET_PARAM);
OPAL_CALL(opal_handle_hmi,			OPAL_HANDLE_HMI);
OPAL_CALL(opal_handle_hmi2,			OPAL_HANDLE_HMI2);
OPAL_CALL(opal_config_cpu_idle_state,		OPAL_CONFIG_CPU_IDLE_STATE);
OPAL_CALL(opal_slw_set_reg,			OPAL_SLW_SET_REG);
OPAL_CALL(opal_register_dump_region,		OPAL_REGISTER_DUMP_REGION);
Loading