Commit f8851cb2 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'edac_updates_for_5.9' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras

Pull EDAC updates from Tony Luck:
 "Boris is on vacation and aske me to send you the EDAC changes"

* tag 'edac_updates_for_5.9' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras:
  EDAC: Fix reference count leaks
  EDAC: Remove edac_get_dimm_by_index()
  EDAC/ghes: Scan the system once on driver init
  EDAC/ghes: Remove unused members of struct ghes_edac_pvt, rename it to ghes_pvt
  EDAC/ghes: Setup DIMM label from DMI and use it in error reports
  EDAC, {skx,i10nm}: Use CPU stepping macro to pass configurations
  EDAC/mc: Call edac_inc_ue_error() before panic
  EDAC, pnd2: Set MCE_PRIO_EDAC priority for pnd2_mce_dec notifier
parents d4db4e55 0f959e19
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -275,6 +275,7 @@ int edac_device_register_sysfs_main_kobj(struct edac_device_ctl_info *edac_dev)

	/* Error exit stack */
err_kobj_reg:
	kobject_put(&edac_dev->kobj);
	module_put(edac_dev->owner);

err_out:
+2 −2
Original line number Diff line number Diff line
@@ -950,6 +950,8 @@ static void edac_ue_error(struct edac_raw_error_desc *e)
			e->other_detail);
	}

	edac_inc_ue_error(e);

	if (edac_mc_get_panic_on_ue()) {
		panic("UE %s%son %s (%s page:0x%lx offset:0x%lx grain:%ld%s%s)\n",
			e->msg,
@@ -959,8 +961,6 @@ static void edac_ue_error(struct edac_raw_error_desc *e)
			*e->other_detail ? " - " : "",
			e->other_detail);
	}

	edac_inc_ue_error(e);
}

static void edac_inc_csrow(struct edac_raw_error_desc *e, int row, int chan)
+1 −1
Original line number Diff line number Diff line
@@ -386,7 +386,7 @@ static int edac_pci_main_kobj_setup(void)

	/* Error unwind statck */
kobject_init_and_add_fail:
	kfree(edac_pci_top_main_kobj);
	kobject_put(edac_pci_top_main_kobj);

kzalloc_fail:
	module_put(THIS_MODULE);
+193 −130
Original line number Diff line number Diff line
@@ -15,9 +15,7 @@
#include "edac_module.h"
#include <ras/ras_event.h>

struct ghes_edac_pvt {
	struct list_head list;
	struct ghes *ghes;
struct ghes_pvt {
	struct mem_ctl_info *mci;

	/* Buffers for the error handling routine */
@@ -32,7 +30,16 @@ static refcount_t ghes_refcount = REFCOUNT_INIT(0);
 * also provides the necessary (implicit) memory barrier for the SMP
 * case to make the pointer visible on another CPU.
 */
static struct ghes_edac_pvt *ghes_pvt;
static struct ghes_pvt *ghes_pvt;

/*
 * This driver's representation of the system hardware, as collected
 * from DMI.
 */
struct ghes_hw_desc {
	int num_dimms;
	struct dimm_info *dimms;
} ghes_hw;

/* GHES registration mutex */
static DEFINE_MUTEX(ghes_reg_mutex);
@@ -74,44 +81,35 @@ struct memdev_dmi_entry {
	u16 conf_mem_clk_speed;
} __attribute__((__packed__));

struct ghes_edac_dimm_fill {
	struct mem_ctl_info *mci;
	unsigned int count;
};

static void ghes_edac_count_dimms(const struct dmi_header *dh, void *arg)
{
	int *num_dimm = arg;

	if (dh->type == DMI_ENTRY_MEM_DEVICE)
		(*num_dimm)++;
}

static int get_dimm_smbios_index(struct mem_ctl_info *mci, u16 handle)
static struct dimm_info *find_dimm_by_handle(struct mem_ctl_info *mci, u16 handle)
{
	struct dimm_info *dimm;

	mci_for_each_dimm(mci, dimm) {
		if (dimm->smbios_handle == handle)
			return dimm->idx;
			return dimm;
	}

	return -1;
	return NULL;
}

static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg)
static void dimm_setup_label(struct dimm_info *dimm, u16 handle)
{
	struct ghes_edac_dimm_fill *dimm_fill = arg;
	struct mem_ctl_info *mci = dimm_fill->mci;
	const char *bank = NULL, *device = NULL;

	if (dh->type == DMI_ENTRY_MEM_DEVICE) {
		struct memdev_dmi_entry *entry = (struct memdev_dmi_entry *)dh;
		struct dimm_info *dimm = edac_get_dimm(mci, dimm_fill->count, 0, 0);
	dmi_memdev_name(handle, &bank, &device);

	/* both strings must be non-zero */
	if (bank && *bank && device && *device)
		snprintf(dimm->label, sizeof(dimm->label), "%s %s", bank, device);
}

static void assign_dmi_dimm_info(struct dimm_info *dimm, struct memdev_dmi_entry *entry)
{
	u16 rdr_mask = BIT(7) | BIT(13);

	if (entry->size == 0xffff) {
			pr_info("Can't get DIMM%i size\n",
				dimm_fill->count);
		pr_info("Can't get DIMM%i size\n", dimm->idx);
		dimm->nr_pages = MiB_TO_PAGES(32);/* Unknown */
	} else if (entry->size == 0x7fff) {
		dimm->nr_pages = MiB_TO_PAGES(entry->extended_size);
@@ -179,13 +177,11 @@ static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg)
	dimm->dtype = DEV_UNKNOWN;
	dimm->grain = 128;		/* Likely, worse case */

		/*
		 * FIXME: It shouldn't be hard to also fill the DIMM labels
		 */
	dimm_setup_label(dimm, entry->handle);

	if (dimm->nr_pages) {
		edac_dbg(1, "DIMM%i: %s size = %d MB%s\n",
				dimm_fill->count, edac_mem_types[dimm->mtype],
			dimm->idx, edac_mem_types[dimm->mtype],
			PAGES_TO_MiB(dimm->nr_pages),
			(dimm->edac_mode != EDAC_NONE) ? "(ECC)" : "");
		edac_dbg(2, "\ttype %d, detail 0x%02x, width %d(total %d)\n",
@@ -194,16 +190,56 @@ static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg)
	}

	dimm->smbios_handle = entry->handle;
}

		dimm_fill->count++;
static void enumerate_dimms(const struct dmi_header *dh, void *arg)
{
	struct memdev_dmi_entry *entry = (struct memdev_dmi_entry *)dh;
	struct ghes_hw_desc *hw = (struct ghes_hw_desc *)arg;
	struct dimm_info *d;

	if (dh->type != DMI_ENTRY_MEM_DEVICE)
		return;

	/* Enlarge the array with additional 16 */
	if (!hw->num_dimms || !(hw->num_dimms % 16)) {
		struct dimm_info *new;

		new = krealloc(hw->dimms, (hw->num_dimms + 16) * sizeof(struct dimm_info),
			        GFP_KERNEL);
		if (!new) {
			WARN_ON_ONCE(1);
			return;
		}

		hw->dimms = new;
	}

	d = &hw->dimms[hw->num_dimms];
	d->idx = hw->num_dimms;

	assign_dmi_dimm_info(d, entry);

	hw->num_dimms++;
}

static void ghes_scan_system(void)
{
	static bool scanned;

	if (scanned)
		return;

	dmi_walk(enumerate_dimms, &ghes_hw);

	scanned = true;
}

void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
{
	struct edac_raw_error_desc *e;
	struct mem_ctl_info *mci;
	struct ghes_edac_pvt *pvt;
	struct ghes_pvt *pvt;
	unsigned long flags;
	char *p;

@@ -228,7 +264,6 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
	memset(e, 0, sizeof (*e));
	e->error_count = 1;
	e->grain = 1;
	strcpy(e->label, "unknown label");
	e->msg = pvt->msg;
	e->other_detail = pvt->other_detail;
	e->top_layer = -1;
@@ -345,7 +380,7 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
		p += sprintf(p, "bit_pos:%d ", mem_err->bit_pos);
	if (mem_err->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) {
		const char *bank = NULL, *device = NULL;
		int index = -1;
		struct dimm_info *dimm;

		dmi_memdev_name(mem_err->mem_dev_handle, &bank, &device);
		if (bank != NULL && device != NULL)
@@ -354,13 +389,18 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
			p += sprintf(p, "DIMM DMI handle: 0x%.4x ",
				     mem_err->mem_dev_handle);

		index = get_dimm_smbios_index(mci, mem_err->mem_dev_handle);
		if (index >= 0)
			e->top_layer = index;
		dimm = find_dimm_by_handle(mci, mem_err->mem_dev_handle);
		if (dimm) {
			e->top_layer = dimm->idx;
			strcpy(e->label, dimm->label);
		}
	}
	if (p > e->location)
		*(p - 1) = '\0';

	if (!*e->label)
		strcpy(e->label, "unknown memory");

	/* All other fields are mapped on e->other_detail */
	p = pvt->other_detail;
	p += snprintf(p, sizeof(pvt->other_detail),
@@ -455,13 +495,12 @@ static struct acpi_platform_list plat_list[] = {
int ghes_edac_register(struct ghes *ghes, struct device *dev)
{
	bool fake = false;
	int rc = 0, num_dimm = 0;
	struct mem_ctl_info *mci;
	struct ghes_edac_pvt *pvt;
	struct ghes_pvt *pvt;
	struct edac_mc_layer layers[1];
	struct ghes_edac_dimm_fill dimm_fill;
	unsigned long flags;
	int idx = -1;
	int rc = 0;

	if (IS_ENABLED(CONFIG_X86)) {
		/* Check if safe to enable on this system */
@@ -481,20 +520,19 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev)
	if (refcount_inc_not_zero(&ghes_refcount))
		goto unlock;

	/* Get the number of DIMMs */
	dmi_walk(ghes_edac_count_dimms, &num_dimm);
	ghes_scan_system();

	/* Check if we've got a bogus BIOS */
	if (num_dimm == 0) {
	if (!ghes_hw.num_dimms) {
		fake = true;
		num_dimm = 1;
		ghes_hw.num_dimms = 1;
	}

	layers[0].type = EDAC_MC_LAYER_ALL_MEM;
	layers[0].size = num_dimm;
	layers[0].size = ghes_hw.num_dimms;
	layers[0].is_virt_csrow = true;

	mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(struct ghes_edac_pvt));
	mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(struct ghes_pvt));
	if (!mci) {
		pr_info("Can't allocate memory for EDAC data\n");
		rc = -ENOMEM;
@@ -502,7 +540,6 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev)
	}

	pvt		= mci->pvt_info;
	pvt->ghes	= ghes;
	pvt->mci	= mci;

	mci->pdev = dev;
@@ -523,13 +560,34 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev)
		pr_info("So, the end result of using this driver varies from vendor to vendor.\n");
		pr_info("If you find incorrect reports, please contact your hardware vendor\n");
		pr_info("to correct its BIOS.\n");
		pr_info("This system has %d DIMM sockets.\n", num_dimm);
		pr_info("This system has %d DIMM sockets.\n", ghes_hw.num_dimms);
	}

	if (!fake) {
		dimm_fill.count = 0;
		dimm_fill.mci = mci;
		dmi_walk(ghes_edac_dmidecode, &dimm_fill);
		struct dimm_info *src, *dst;
		int i = 0;

		mci_for_each_dimm(mci, dst) {
			src = &ghes_hw.dimms[i];

			dst->idx	   = src->idx;
			dst->smbios_handle = src->smbios_handle;
			dst->nr_pages	   = src->nr_pages;
			dst->mtype	   = src->mtype;
			dst->edac_mode	   = src->edac_mode;
			dst->dtype	   = src->dtype;
			dst->grain	   = src->grain;

			/*
			 * If no src->label, preserve default label assigned
			 * from EDAC core.
			 */
			if (strlen(src->label))
				memcpy(dst->label, src->label, sizeof(src->label));

			i++;
		}

	} else {
		struct dimm_info *dimm = edac_get_dimm(mci, 0, 0, 0);

@@ -542,7 +600,7 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev)

	rc = edac_mc_add_mc(mci);
	if (rc < 0) {
		pr_info("Can't register at EDAC core\n");
		pr_info("Can't register with the EDAC core\n");
		edac_mc_free(mci);
		rc = -ENODEV;
		goto unlock;
@@ -556,6 +614,11 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev)
	refcount_set(&ghes_refcount, 1);

unlock:

	/* Not needed anymore */
	kfree(ghes_hw.dimms);
	ghes_hw.dimms = NULL;

	mutex_unlock(&ghes_reg_mutex);

	return rc;
+5 −7
Original line number Diff line number Diff line
@@ -135,9 +135,11 @@ static struct res_config i10nm_cfg1 = {
};

static const struct x86_cpu_id i10nm_cpuids[] = {
	X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D,	&i10nm_cfg0),
	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X,		&i10nm_cfg0),
	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D,		&i10nm_cfg1),
	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_TREMONT_D,	X86_STEPPINGS(0x0, 0x3), &i10nm_cfg0),
	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_TREMONT_D,	X86_STEPPINGS(0x4, 0xf), &i10nm_cfg1),
	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ICELAKE_X,		X86_STEPPINGS(0x0, 0x3), &i10nm_cfg0),
	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ICELAKE_X,		X86_STEPPINGS(0x4, 0xf), &i10nm_cfg1),
	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ICELAKE_D,		X86_STEPPINGS(0x0, 0xf), &i10nm_cfg1),
	{}
};
MODULE_DEVICE_TABLE(x86cpu, i10nm_cpuids);
@@ -264,10 +266,6 @@ static int __init i10nm_init(void)

	cfg = (struct res_config *)id->driver_data;

	/* Newer steppings have different offset for ATOM_TREMONT_D/ICELAKE_X */
	if (boot_cpu_data.x86_stepping >= 4)
		cfg->busno_cfg_offset = 0xd0;

	rc = skx_get_hi_lo(0x09a2, off, &tolm, &tohm);
	if (rc)
		return rc;
Loading