Commit 86bbbeba authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 RAS updates from Ingo Molnar:
 "The main changes in this cycle were:

   - AMD MCE support/decoding improvements (Yazen Ghannam)

   - general MCE header cleanups and reorganization (Borislav Petkov)"

* 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  Revert "x86/mce/AMD: Collect error info even if valid bits are not set"
  x86/MCE: Cleanup and complete struct mce fields definitions
  x86/mce/AMD: Carve out SMCA get_block_address() code
  x86/mce/AMD: Get address from already initialized block
  x86/mce/AMD, EDAC/mce_amd: Enumerate Reserved SMCA bank type
  x86/mce/AMD: Pass the bank number to smca_get_bank_type()
  x86/mce/AMD: Collect error info even if valid bits are not set
  x86/mce: Issue the 'mcelog --ascii' message only on !AMD
  x86/mce: Convert 'struct mca_config' bools to a bitfield
  x86/mce: Put private structures and definitions into the internal header
parents 486adcea e2efacb6
Loading
Loading
Loading
Loading
+1 −52
Original line number Diff line number Diff line
@@ -138,58 +138,6 @@ struct mce_log_buffer {
	struct mce entry[MCE_LOG_LEN];
};

struct mca_config {
	bool dont_log_ce;
	bool cmci_disabled;
	bool lmce_disabled;
	bool ignore_ce;
	bool disabled;
	bool ser;
	bool recovery;
	bool bios_cmci_threshold;
	u8 banks;
	s8 bootlog;
	int tolerant;
	int monarch_timeout;
	int panic_timeout;
	u32 rip_msr;
};

struct mce_vendor_flags {
	/*
	 * Indicates that overflow conditions are not fatal, when set.
	 */
	__u64 overflow_recov	: 1,

	/*
	 * (AMD) SUCCOR stands for S/W UnCorrectable error COntainment and
	 * Recovery. It indicates support for data poisoning in HW and deferred
	 * error interrupts.
	 */
	      succor		: 1,

	/*
	 * (AMD) SMCA: This bit indicates support for Scalable MCA which expands
	 * the register space for each MCA bank and also increases number of
	 * banks. Also, to accommodate the new banks and registers, the MCA
	 * register space is moved to a new MSR range.
	 */
	      smca		: 1,

	      __reserved_0	: 61;
};

struct mca_msr_regs {
	u32 (*ctl)	(int bank);
	u32 (*status)	(int bank);
	u32 (*addr)	(int bank);
	u32 (*misc)	(int bank);
};

extern struct mce_vendor_flags mce_flags;

extern struct mca_msr_regs msr_ops;

enum mce_notifier_prios {
	MCE_PRIO_FIRST		= INT_MAX,
	MCE_PRIO_SRAO		= INT_MAX - 1,
@@ -346,6 +294,7 @@ enum smca_bank_types {
	SMCA_IF,	/* Instruction Fetch */
	SMCA_L2_CACHE,	/* L2 Cache */
	SMCA_DE,	/* Decoder Unit */
	SMCA_RESERVED,	/* Reserved */
	SMCA_EX,	/* Execution Unit */
	SMCA_FP,	/* Floating Point */
	SMCA_L3_CACHE,	/* L3 Cache */
+28 −24
Original line number Diff line number Diff line
@@ -5,27 +5,31 @@
#include <linux/types.h>
#include <linux/ioctl.h>

/* Fields are zero when not available */
/*
 * Fields are zero when not available. Also, this struct is shared with
 * userspace mcelog and thus must keep existing fields at current offsets.
 * Only add new fields to the end of the structure
 */
struct mce {
	__u64 status;
	__u64 misc;
	__u64 addr;
	__u64 mcgstatus;
	__u64 ip;
	__u64 tsc;	/* cpu time stamp counter */
	__u64 time;	/* wall time_t when error was detected */
	__u8  cpuvendor;	/* cpu vendor as encoded in system.h */
	__u8  inject_flags;	/* software inject flags */
	__u8  severity;
	__u64 status;		/* Bank's MCi_STATUS MSR */
	__u64 misc;		/* Bank's MCi_MISC MSR */
	__u64 addr;		/* Bank's MCi_ADDR MSR */
	__u64 mcgstatus;	/* Machine Check Global Status MSR */
	__u64 ip;		/* Instruction Pointer when the error happened */
	__u64 tsc;		/* CPU time stamp counter */
	__u64 time;		/* Wall time_t when error was detected */
	__u8  cpuvendor;	/* Kernel's X86_VENDOR enum */
	__u8  inject_flags;	/* Software inject flags */
	__u8  severity;		/* Error severity */
	__u8  pad;
	__u32 cpuid;		/* CPUID 1 EAX */
	__u8  cs;		/* code segment */
	__u8  bank;	/* machine check bank */
	__u8  cpu;	/* cpu number; obsolete; use extcpu now */
	__u8  finished;   /* entry is valid */
	__u32 extcpu;	/* linux cpu number that detected the error */
	__u8  cs;		/* Code segment */
	__u8  bank;		/* Machine check bank reporting the error */
	__u8  cpu;		/* CPU number; obsoleted by extcpu */
	__u8  finished;		/* Entry is valid */
	__u32 extcpu;		/* Linux CPU number that detected the error */
	__u32 socketid;		/* CPU socket ID */
	__u32 apicid;	/* CPU initial apic ID */
	__u32 apicid;		/* CPU initial APIC ID */
	__u64 mcgcap;		/* MCGCAP MSR: machine check capabilities of CPU */
	__u64 synd;		/* MCA_SYND MSR: only valid on SMCA systems */
	__u64 ipid;		/* MCA_IPID MSR: only valid on SMCA systems */
+57 −2
Original line number Diff line number Diff line
@@ -113,8 +113,6 @@ static inline void mce_register_injector_chain(struct notifier_block *nb) { }
static inline void mce_unregister_injector_chain(struct notifier_block *nb)	{ }
#endif

extern struct mca_config mca_cfg;

#ifndef CONFIG_X86_64
/*
 * On 32-bit systems it would be difficult to safely unmap a poison page
@@ -130,4 +128,61 @@ static inline void mce_unmap_kpfn(unsigned long pfn) {}
#define mce_unmap_kpfn mce_unmap_kpfn
#endif

struct mca_config {
	bool dont_log_ce;
	bool cmci_disabled;
	bool ignore_ce;

	__u64 lmce_disabled		: 1,
	      disabled			: 1,
	      ser			: 1,
	      recovery			: 1,
	      bios_cmci_threshold	: 1,
	      __reserved		: 59;

	u8 banks;
	s8 bootlog;
	int tolerant;
	int monarch_timeout;
	int panic_timeout;
	u32 rip_msr;
};

extern struct mca_config mca_cfg;

struct mce_vendor_flags {
	/*
	 * Indicates that overflow conditions are not fatal, when set.
	 */
	__u64 overflow_recov	: 1,

	/*
	 * (AMD) SUCCOR stands for S/W UnCorrectable error COntainment and
	 * Recovery. It indicates support for data poisoning in HW and deferred
	 * error interrupts.
	 */
	      succor		: 1,

	/*
	 * (AMD) SMCA: This bit indicates support for Scalable MCA which expands
	 * the register space for each MCA bank and also increases number of
	 * banks. Also, to accommodate the new banks and registers, the MCA
	 * register space is moved to a new MSR range.
	 */
	      smca		: 1,

	      __reserved_0	: 61;
};

extern struct mce_vendor_flags mce_flags;

struct mca_msr_regs {
	u32 (*ctl)	(int bank);
	u32 (*status)	(int bank);
	u32 (*addr)	(int bank);
	u32 (*misc)	(int bank);
};

extern struct mca_msr_regs msr_ops;

#endif /* __X86_MCE_INTERNAL_H__ */
+11 −9
Original line number Diff line number Diff line
@@ -273,6 +273,8 @@ static void __print_mce(struct mce *m)
static void print_mce(struct mce *m)
{
	__print_mce(m);

	if (m->cpuvendor != X86_VENDOR_AMD)
		pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n");
}

@@ -1516,7 +1518,7 @@ static int __mcheck_cpu_cap_init(void)
		mca_cfg.rip_msr = MSR_IA32_MCG_EIP;

	if (cap & MCG_SER_P)
		mca_cfg.ser = true;
		mca_cfg.ser = 1;

	return 0;
}
@@ -1824,12 +1826,12 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
		return;

	if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) {
		mca_cfg.disabled = true;
		mca_cfg.disabled = 1;
		return;
	}

	if (mce_gen_pool_init()) {
		mca_cfg.disabled = true;
		mca_cfg.disabled = 1;
		pr_emerg("Couldn't allocate MCE records pool!\n");
		return;
	}
@@ -1907,11 +1909,11 @@ static int __init mcheck_enable(char *str)
	if (*str == '=')
		str++;
	if (!strcmp(str, "off"))
		cfg->disabled = true;
		cfg->disabled = 1;
	else if (!strcmp(str, "no_cmci"))
		cfg->cmci_disabled = true;
	else if (!strcmp(str, "no_lmce"))
		cfg->lmce_disabled = true;
		cfg->lmce_disabled = 1;
	else if (!strcmp(str, "dont_log_ce"))
		cfg->dont_log_ce = true;
	else if (!strcmp(str, "ignore_ce"))
@@ -1919,9 +1921,9 @@ static int __init mcheck_enable(char *str)
	else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
		cfg->bootlog = (str[0] == 'b');
	else if (!strcmp(str, "bios_cmci_threshold"))
		cfg->bios_cmci_threshold = true;
		cfg->bios_cmci_threshold = 1;
	else if (!strcmp(str, "recovery"))
		cfg->recovery = true;
		cfg->recovery = 1;
	else if (isdigit(str[0])) {
		if (get_option(&str, &cfg->tolerant) == 2)
			get_option(&str, &(cfg->monarch_timeout));
@@ -2403,7 +2405,7 @@ device_initcall_sync(mcheck_init_device);
 */
static int __init mcheck_disable(char *str)
{
	mca_cfg.disabled = true;
	mca_cfg.disabled = 1;
	return 1;
}
__setup("nomce", mcheck_disable);
+52 −26
Original line number Diff line number Diff line
@@ -82,6 +82,7 @@ static struct smca_bank_name smca_names[] = {
	[SMCA_IF]	= { "insn_fetch",	"Instruction Fetch Unit" },
	[SMCA_L2_CACHE]	= { "l2_cache",		"L2 Cache" },
	[SMCA_DE]	= { "decode_unit",	"Decode Unit" },
	[SMCA_RESERVED]	= { "reserved",		"Reserved" },
	[SMCA_EX]	= { "execution_unit",	"Execution Unit" },
	[SMCA_FP]	= { "floating_point",	"Floating Point Unit" },
	[SMCA_L3_CACHE]	= { "l3_cache",		"L3 Cache" },
@@ -110,14 +111,14 @@ const char *smca_get_long_name(enum smca_bank_types t)
}
EXPORT_SYMBOL_GPL(smca_get_long_name);

static enum smca_bank_types smca_get_bank_type(struct mce *m)
static enum smca_bank_types smca_get_bank_type(unsigned int bank)
{
	struct smca_bank *b;

	if (m->bank >= N_SMCA_BANK_TYPES)
	if (bank >= MAX_NR_BANKS)
		return N_SMCA_BANK_TYPES;

	b = &smca_banks[m->bank];
	b = &smca_banks[bank];
	if (!b->hwid)
		return N_SMCA_BANK_TYPES;

@@ -127,6 +128,9 @@ static enum smca_bank_types smca_get_bank_type(struct mce *m)
static struct smca_hwid smca_hwid_mcatypes[] = {
	/* { bank_type, hwid_mcatype, xec_bitmap } */

	/* Reserved type */
	{ SMCA_RESERVED, HWID_MCATYPE(0x00, 0x0), 0x0 },

	/* ZN Core (HWID=0xB0) MCA types */
	{ SMCA_LS,	 HWID_MCATYPE(0xB0, 0x0), 0x1FFFEF },
	{ SMCA_IF,	 HWID_MCATYPE(0xB0, 0x1), 0x3FFF },
@@ -427,22 +431,22 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
	wrmsr(MSR_CU_DEF_ERR, low, high);
}

static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 high,
			     unsigned int bank, unsigned int block)
static u32 smca_get_block_address(unsigned int cpu, unsigned int bank,
				  unsigned int block)
{
	u32 addr = 0, offset = 0;
	u32 low, high;
	u32 addr = 0;

	if (smca_get_bank_type(bank) == SMCA_RESERVED)
		return addr;

	if (!block)
		return MSR_AMD64_SMCA_MCx_MISC(bank);

	if (mce_flags.smca) {
		if (!block) {
			addr = MSR_AMD64_SMCA_MCx_MISC(bank);
		} else {
	/*
			 * For SMCA enabled processors, BLKPTR field of the
			 * first MISC register (MCx_MISC0) indicates presence of
			 * additional MISC register set (MISC1-4).
	 * For SMCA enabled processors, BLKPTR field of the first MISC register
	 * (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4).
	 */
			u32 low, high;

	if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
		return addr;

@@ -451,11 +455,34 @@ static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 hi

	if (!rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
	    (low & MASK_BLKPTR_LO))
				addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
		return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);

	return addr;
}

static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 high,
			     unsigned int bank, unsigned int block)
{
	u32 addr = 0, offset = 0;

	if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS))
		return addr;

	/* Get address from already initialized block. */
	if (per_cpu(threshold_banks, cpu)) {
		struct threshold_bank *bankp = per_cpu(threshold_banks, cpu)[bank];

		if (bankp && bankp->blocks) {
			struct threshold_block *blockp = &bankp->blocks[block];

			if (blockp)
				return blockp->address;
		}
	}

	if (mce_flags.smca)
		return smca_get_block_address(cpu, bank, block);

	/* Fall back to method we used for older processors: */
	switch (block) {
	case 0:
@@ -760,7 +787,7 @@ bool amd_mce_is_memory_error(struct mce *m)
	u8 xec = (m->status >> 16) & 0x1f;

	if (mce_flags.smca)
		return smca_get_bank_type(m) == SMCA_UMC && xec == 0x0;
		return smca_get_bank_type(m->bank) == SMCA_UMC && xec == 0x0;

	return m->bank == 4 && xec == 0x8;
}
@@ -1063,7 +1090,7 @@ static struct kobj_type threshold_ktype = {

static const char *get_name(unsigned int bank, struct threshold_block *b)
{
	unsigned int bank_type;
	enum smca_bank_types bank_type;

	if (!mce_flags.smca) {
		if (b && bank == 4)
@@ -1072,11 +1099,10 @@ static const char *get_name(unsigned int bank, struct threshold_block *b)
		return th_names[bank];
	}

	if (!smca_banks[bank].hwid)
	bank_type = smca_get_bank_type(bank);
	if (bank_type >= N_SMCA_BANK_TYPES)
		return NULL;

	bank_type = smca_banks[bank].hwid->bank_type;

	if (b && bank_type == SMCA_UMC) {
		if (b->block < ARRAY_SIZE(smca_umc_block_names))
			return smca_umc_block_names[b->block];
Loading