Commit 147de147 authored by Chen, Gong's avatar Chen, Gong Committed by Tony Luck
Browse files

ACPI, APEI, CPER: Add UEFI 2.4 support for memory error



In latest UEFI spec(by now it is 2.4) memory error definition
for CPER (UEFI 2.4 Appendix N Common Platform Error Record)
adds some new fields. These fields help people to locate
memory error to an actual DIMM location.

Original-author: Tony Luck <tony.luck@intel.com>
Signed-off-by: default avatarChen, Gong <gong.chen@linux.intel.com>
Reviewed-by: default avatarBorislav Petkov <bp@suse.de>
Reviewed-by: default avatarMauro Carvalho Chehab <m.chehab@samsung.com>
Acked-by: default avatarNaveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: default avatarTony Luck <tony.luck@intel.com>
parent dd6dad42
Loading
Loading
Loading
Loading
+1 −2
Original line number Diff line number Diff line
@@ -42,8 +42,7 @@ void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err)
	struct mce m;

	/* Only corrected MC is reported */
	if (!corrected || !(mem_err->validation_bits &
				CPER_MEM_VALID_PHYSICAL_ADDRESS))
	if (!corrected || !(mem_err->validation_bits & CPER_MEM_VALID_PA))
		return;

	mce_setup(&m);
+4 −3
Original line number Diff line number Diff line
@@ -8,7 +8,7 @@
 * various tables, such as ERST, BERT and HEST etc.
 *
 * For more information about CPER, please refer to Appendix N of UEFI
 * Specification version 2.3.
 * Specification version 2.4.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License version
@@ -191,16 +191,17 @@ static const char *cper_mem_err_type_strs[] = {
	"memory sparing",
	"scrub corrected error",
	"scrub uncorrected error",
	"physical memory map-out event",
};

static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
{
	if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
		printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
	if (mem->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS)
	if (mem->validation_bits & CPER_MEM_VALID_PA)
		printk("%s""physical_address: 0x%016llx\n",
		       pfx, mem->physical_addr);
	if (mem->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS_MASK)
	if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
		printk("%s""physical_address_mask: 0x%016llx\n",
		       pfx, mem->physical_addr_mask);
	if (mem->validation_bits & CPER_MEM_VALID_NODE)
+2 −2
Original line number Diff line number Diff line
@@ -419,7 +419,7 @@ static void ghes_handle_memory_failure(struct acpi_generic_data *gdata, int sev)

	if (sec_sev == GHES_SEV_CORRECTED &&
	    (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED) &&
	    (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS)) {
	    (mem_err->validation_bits & CPER_MEM_VALID_PA)) {
		pfn = mem_err->physical_addr >> PAGE_SHIFT;
		if (pfn_valid(pfn))
			memory_failure_queue(pfn, 0, MF_SOFT_OFFLINE);
@@ -430,7 +430,7 @@ static void ghes_handle_memory_failure(struct acpi_generic_data *gdata, int sev)
	}
	if (sev == GHES_SEV_RECOVERABLE &&
	    sec_sev == GHES_SEV_RECOVERABLE &&
	    mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) {
	    mem_err->validation_bits & CPER_MEM_VALID_PA) {
		pfn = mem_err->physical_addr >> PAGE_SHIFT;
		memory_failure_queue(pfn, 0, 0);
	}
+2 −3
Original line number Diff line number Diff line
@@ -297,15 +297,14 @@ void ghes_edac_report_mem_error(struct ghes *ghes, int sev,
	}

	/* Error address */
	if (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) {
	if (mem_err->validation_bits & CPER_MEM_VALID_PA) {
		e->page_frame_number = mem_err->physical_addr >> PAGE_SHIFT;
		e->offset_in_page = mem_err->physical_addr & ~PAGE_MASK;
	}

	/* Error grain */
	if (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS_MASK) {
	if (mem_err->validation_bits & CPER_MEM_VALID_PA_MASK)
		e->grain = ~(mem_err->physical_addr_mask & ~PAGE_MASK);
	}

	/* Memory error location, mapped on e->location */
	p = e->location;
+9 −2
Original line number Diff line number Diff line
@@ -218,8 +218,8 @@ enum {
#define CPER_PROC_VALID_IP			0x1000

#define CPER_MEM_VALID_ERROR_STATUS		0x0001
#define CPER_MEM_VALID_PHYSICAL_ADDRESS		0x0002
#define CPER_MEM_VALID_PHYSICAL_ADDRESS_MASK	0x0004
#define CPER_MEM_VALID_PA			0x0002
#define CPER_MEM_VALID_PA_MASK			0x0004
#define CPER_MEM_VALID_NODE			0x0008
#define CPER_MEM_VALID_CARD			0x0010
#define CPER_MEM_VALID_MODULE			0x0020
@@ -232,6 +232,9 @@ enum {
#define CPER_MEM_VALID_RESPONDER_ID		0x1000
#define CPER_MEM_VALID_TARGET_ID		0x2000
#define CPER_MEM_VALID_ERROR_TYPE		0x4000
#define CPER_MEM_VALID_RANK_NUMBER		0x8000
#define CPER_MEM_VALID_CARD_HANDLE		0x10000
#define CPER_MEM_VALID_MODULE_HANDLE		0x20000

#define CPER_PCIE_VALID_PORT_TYPE		0x0001
#define CPER_PCIE_VALID_VERSION			0x0002
@@ -347,6 +350,10 @@ struct cper_sec_mem_err {
	__u64	responder_id;
	__u64	target_id;
	__u8	error_type;
	__u8	reserved;
	__u16	rank;
	__u16	mem_array_handle;	/* card handle in UEFI 2.4 */
	__u16	mem_dev_handle;		/* module handle in UEFI 2.4 */
};

struct cper_sec_pcie {