Commit 9c91e6a5 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull EDAC updates from Borislav Petkov:
 "A lot of changes this time around, details below.

  From the next cycle onwards, we'll switch the EDAC tree to topic
  branches (instead of a single edac-for-next branch) which should make
  the changes handling more flexible, hopefully. We'll see.

  Summary:

   - Rework error logging functions to accept a count of errors
     parameter (Hanna Hawa)

   - Part one of substantial EDAC core + ghes_edac driver cleanup
     (Robert Richter)

   - Print additional useful logging information in skx_* (Tony Luck)

   - Improve amd64_edac hw detection + cleanups (Yazen Ghannam)

   - Misc cleanups, fixes and code improvements"

* tag 'edac_for_5.5' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras: (35 commits)
  EDAC/altera: Use the Altera System Manager driver
  EDAC/altera: Cleanup the ECC Manager
  EDAC/altera: Use fast register IO for S10 IRQs
  EDAC/ghes: Do not warn when incrementing refcount on 0
  EDAC/Documentation: Describe CPER module definition and DIMM ranks
  EDAC: Unify the mc_event tracepoint call
  EDAC/ghes: Remove intermediate buffer pvt->detail_location
  EDAC/ghes: Fix grain calculation
  EDAC/ghes: Use standard kernel macros for page calculations
  EDAC: Remove misleading comment in struct edac_raw_error_desc
  EDAC/mc: Reduce indentation level in edac_mc_handle_error()
  EDAC/mc: Remove needless zero string termination
  EDAC/mc: Do not BUG_ON() in edac_mc_alloc()
  EDAC: Introduce an mci_for_each_dimm() iterator
  EDAC: Remove EDAC_DIMM_OFF() macro
  EDAC: Replace EDAC_DIMM_PTR() macro with edac_get_dimm() function
  EDAC/amd64: Get rid of the ECC disabled long message
  EDAC/ghes: Fix locking and memory barrier issues
  EDAC/amd64: Check for memory before fully initializing an instance
  EDAC/amd64: Use cached data when checking for ECC
  ...
parents 752272f1 5781823f
Loading
Loading
Loading
Loading
+19 −12
Original line number Diff line number Diff line
@@ -330,9 +330,12 @@ There can be multiple csrows and multiple channels.

.. [#f4] Nowadays, the term DIMM (Dual In-line Memory Module) is widely
  used to refer to a memory module, although there are other memory
  packaging alternatives, like SO-DIMM, SIMM, etc. Along this document,
  and inside the EDAC system, the term "dimm" is used for all memory
  modules, even when they use a different kind of packaging.
  packaging alternatives, like SO-DIMM, SIMM, etc. The UEFI
  specification (Version 2.7) defines a memory module in the Common
  Platform Error Record (CPER) section to be an SMBIOS Memory Device
  (Type 17). Along this document, and inside the EDAC subsystem, the term
  "dimm" is used for all memory modules, even when they use a
  different kind of packaging.

Memory controllers allow for several csrows, with 8 csrows being a
typical value. Yet, the actual number of csrows depends on the layout of
@@ -349,12 +352,14 @@ controllers. The following example will assume 2 channels:
	|            |  ``ch0``  |  ``ch1``  |
	+============+===========+===========+
	| ``csrow0`` |  DIMM_A0  |  DIMM_B0  |
	+------------+           |           |
	| ``csrow1`` |           |           |
	|            |   rank0   |   rank0   |
	+------------+     -     |     -     |
	| ``csrow1`` |   rank1   |   rank1   |
	+------------+-----------+-----------+
	| ``csrow2`` |  DIMM_A1  | DIMM_B1   |
	+------------+           |           |
	| ``csrow3`` |           |           |
	|            |   rank0   |   rank0   |
	+------------+     -     |     -     |
	| ``csrow3`` |   rank1   |   rank1   |
	+------------+-----------+-----------+

In the above example, there are 4 physical slots on the motherboard
@@ -374,11 +379,13 @@ which the memory DIMM is placed. Thus, when 1 DIMM is placed in each
Channel, the csrows cross both DIMMs.

Memory DIMMs come single or dual "ranked". A rank is a populated csrow.
Thus, 2 single ranked DIMMs, placed in slots DIMM_A0 and DIMM_B0 above
will have just one csrow (csrow0). csrow1 will be empty. On the other
hand, when 2 dual ranked DIMMs are similarly placed, then both csrow0
and csrow1 will be populated. The pattern repeats itself for csrow2 and
csrow3.
In the example above 2 dual ranked DIMMs are similarly placed. Thus,
both csrow0 and csrow1 are populated. On the other hand, when 2 single
ranked DIMMs are placed in slots DIMM_A0 and DIMM_B0, then they will
have just one csrow (csrow0) and csrow1 will be empty. The pattern
repeats itself for csrow2 and csrow3. Also note that some memory
controllers don't have any logic to identify the memory module, see
``rankX`` directories below.

The representation of the above is reflected in the directory
tree in EDAC's sysfs interface. Starting in directory
+9 −143
Original line number Diff line number Diff line
@@ -14,6 +14,7 @@
#include <linux/interrupt.h>
#include <linux/irqchip/chained_irq.h>
#include <linux/kernel.h>
#include <linux/mfd/altera-sysmgr.h>
#include <linux/mfd/syscon.h>
#include <linux/notifier.h>
#include <linux/of_address.h>
@@ -275,7 +276,6 @@ release:
	return ret;
}

static int socfpga_is_a10(void);
static int altr_sdram_probe(struct platform_device *pdev)
{
	const struct of_device_id *id;
@@ -399,7 +399,7 @@ static int altr_sdram_probe(struct platform_device *pdev)
		goto err;

	/* Only the Arria10 has separate IRQs */
	if (socfpga_is_a10()) {
	if (of_machine_is_compatible("altr,socfpga-arria10")) {
		/* Arria10 specific initialization */
		res = a10_init(mc_vbase);
		if (res < 0)
@@ -502,68 +502,6 @@ module_platform_driver(altr_sdram_edac_driver);

#endif	/* CONFIG_EDAC_ALTERA_SDRAM */

/**************** Stratix 10 EDAC Memory Controller Functions ************/

/**
 * s10_protected_reg_write
 * Write to a protected SMC register.
 * @context: Not used.
 * @reg: Address of register
 * @value: Value to write
 * Return: INTEL_SIP_SMC_STATUS_OK (0) on success
 *	   INTEL_SIP_SMC_REG_ERROR on error
 *	   INTEL_SIP_SMC_RETURN_UNKNOWN_FUNCTION if not supported
 */
static int s10_protected_reg_write(void *context, unsigned int reg,
				   unsigned int val)
{
	struct arm_smccc_res result;
	unsigned long offset = (unsigned long)context;

	arm_smccc_smc(INTEL_SIP_SMC_REG_WRITE, offset + reg, val, 0, 0,
		      0, 0, 0, &result);

	return (int)result.a0;
}

/**
 * s10_protected_reg_read
 * Read the status of a protected SMC register
 * @context: Not used.
 * @reg: Address of register
 * @value: Value read.
 * Return: INTEL_SIP_SMC_STATUS_OK (0) on success
 *	   INTEL_SIP_SMC_REG_ERROR on error
 *	   INTEL_SIP_SMC_RETURN_UNKNOWN_FUNCTION if not supported
 */
static int s10_protected_reg_read(void *context, unsigned int reg,
				  unsigned int *val)
{
	struct arm_smccc_res result;
	unsigned long offset = (unsigned long)context;

	arm_smccc_smc(INTEL_SIP_SMC_REG_READ, offset + reg, 0, 0, 0,
		      0, 0, 0, &result);

	*val = (unsigned int)result.a1;

	return (int)result.a0;
}

static const struct regmap_config s10_sdram_regmap_cfg = {
	.name = "s10_ddr",
	.reg_bits = 32,
	.reg_stride = 4,
	.val_bits = 32,
	.max_register = 0xffd12228,
	.reg_read = s10_protected_reg_read,
	.reg_write = s10_protected_reg_write,
	.use_single_read = true,
	.use_single_write = true,
};

/************** </Stratix10 EDAC Memory Controller Functions> ***********/

/************************* EDAC Parent Probe *************************/

static const struct of_device_id altr_edac_device_of_match[];
@@ -1008,16 +946,6 @@ static int __maybe_unused altr_init_memory_port(void __iomem *ioaddr, int port)
	return ret;
}

static int socfpga_is_a10(void)
{
	return of_machine_is_compatible("altr,socfpga-arria10");
}

static int socfpga_is_s10(void)
{
	return of_machine_is_compatible("altr,socfpga-stratix10");
}

static __init int __maybe_unused
altr_init_a10_ecc_block(struct device_node *np, u32 irq_mask,
			u32 ecc_ctrl_en_mask, bool dual_port)
@@ -1033,34 +961,10 @@ altr_init_a10_ecc_block(struct device_node *np, u32 irq_mask,
	/* Get the ECC Manager - parent of the device EDACs */
	np_eccmgr = of_get_parent(np);

	if (socfpga_is_a10()) {
		ecc_mgr_map = syscon_regmap_lookup_by_phandle(np_eccmgr,
	ecc_mgr_map =
		altr_sysmgr_regmap_lookup_by_phandle(np_eccmgr,
						     "altr,sysmgr-syscon");
	} else {
		struct device_node *sysmgr_np;
		struct resource res;
		uintptr_t base;

		sysmgr_np = of_parse_phandle(np_eccmgr,
					     "altr,sysmgr-syscon", 0);
		if (!sysmgr_np) {
			edac_printk(KERN_ERR, EDAC_DEVICE,
				    "Unable to find altr,sysmgr-syscon\n");
			return -ENODEV;
		}

		if (of_address_to_resource(sysmgr_np, 0, &res)) {
			of_node_put(sysmgr_np);
			return -ENOMEM;
		}

		/* Need physical address for SMCC call */
		base = res.start;

		ecc_mgr_map = regmap_init(NULL, NULL, (void *)base,
					  &s10_sdram_regmap_cfg);
		of_node_put(sysmgr_np);
	}
	of_node_put(np_eccmgr);
	if (IS_ERR(ecc_mgr_map)) {
		edac_printk(KERN_ERR, EDAC_DEVICE,
@@ -1125,9 +1029,6 @@ static int __init __maybe_unused altr_init_a10_ecc_device_type(char *compat)
	int irq;
	struct device_node *child, *np;

	if (!socfpga_is_a10() && !socfpga_is_s10())
		return -ENODEV;

	np = of_find_compatible_node(NULL, NULL,
				     "altr,socfpga-a10-ecc-manager");
	if (!np) {
@@ -2178,33 +2079,9 @@ static int altr_edac_a10_probe(struct platform_device *pdev)
	platform_set_drvdata(pdev, edac);
	INIT_LIST_HEAD(&edac->a10_ecc_devices);

	if (socfpga_is_a10()) {
	edac->ecc_mgr_map =
			syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
		altr_sysmgr_regmap_lookup_by_phandle(pdev->dev.of_node,
						     "altr,sysmgr-syscon");
	} else {
		struct device_node *sysmgr_np;
		struct resource res;
		uintptr_t base;

		sysmgr_np = of_parse_phandle(pdev->dev.of_node,
					     "altr,sysmgr-syscon", 0);
		if (!sysmgr_np) {
			edac_printk(KERN_ERR, EDAC_DEVICE,
				    "Unable to find altr,sysmgr-syscon\n");
			return -ENODEV;
		}

		if (of_address_to_resource(sysmgr_np, 0, &res))
			return -ENOMEM;

		/* Need physical address for SMCC call */
		base = res.start;

		edac->ecc_mgr_map = devm_regmap_init(&pdev->dev, NULL,
						     (void *)base,
						     &s10_sdram_regmap_cfg);
	}

	if (IS_ERR(edac->ecc_mgr_map)) {
		edac_printk(KERN_ERR, EDAC_DEVICE,
@@ -2270,18 +2147,7 @@ static int altr_edac_a10_probe(struct platform_device *pdev)
		if (!of_device_is_available(child))
			continue;

		if (of_device_is_compatible(child, "altr,socfpga-a10-l2-ecc") || 
		    of_device_is_compatible(child, "altr,socfpga-a10-ocram-ecc") ||
		    of_device_is_compatible(child, "altr,socfpga-eth-mac-ecc") ||
		    of_device_is_compatible(child, "altr,socfpga-nand-ecc") ||
		    of_device_is_compatible(child, "altr,socfpga-dma-ecc") ||
		    of_device_is_compatible(child, "altr,socfpga-usb-ecc") ||
		    of_device_is_compatible(child, "altr,socfpga-qspi-ecc") ||
#ifdef CONFIG_EDAC_ALTERA_SDRAM
		    of_device_is_compatible(child, "altr,sdram-edac-s10") ||
#endif
		    of_device_is_compatible(child, "altr,socfpga-sdmmc-ecc"))

		if (of_match_node(altr_edac_a10_device_of_match, child))
			altr_edac_a10_device_add(edac, child);

#ifdef CONFIG_EDAC_ALTERA_SDRAM
+102 −115
Original line number Diff line number Diff line
@@ -16,12 +16,11 @@ module_param(ecc_enable_override, int, 0644);

static struct msr __percpu *msrs;

static struct amd64_family_type *fam_type;

/* Per-node stuff */
static struct ecc_settings **ecc_stngs;

/* Number of Unified Memory Controllers */
static u8 num_umcs;

/*
 * Valid scrub rates for the K8 hardware memory scrubber. We map the scrubbing
 * bandwidth to a valid bit pattern. The 'set' operation finds the 'matching-
@@ -454,7 +453,7 @@ static void get_cs_base_and_mask(struct amd64_pvt *pvt, int csrow, u8 dct,
	for (i = 0; i < pvt->csels[dct].m_cnt; i++)

#define for_each_umc(i) \
	for (i = 0; i < num_umcs; i++)
	for (i = 0; i < fam_type->max_mcs; i++)

/*
 * @input_addr is an InputAddr associated with the node given by mci. Return the
@@ -2224,6 +2223,7 @@ static struct amd64_family_type family_types[] = {
		.ctl_name = "K8",
		.f1_id = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP,
		.f2_id = PCI_DEVICE_ID_AMD_K8_NB_MEMCTL,
		.max_mcs = 2,
		.ops = {
			.early_channel_count	= k8_early_channel_count,
			.map_sysaddr_to_csrow	= k8_map_sysaddr_to_csrow,
@@ -2234,6 +2234,7 @@ static struct amd64_family_type family_types[] = {
		.ctl_name = "F10h",
		.f1_id = PCI_DEVICE_ID_AMD_10H_NB_MAP,
		.f2_id = PCI_DEVICE_ID_AMD_10H_NB_DRAM,
		.max_mcs = 2,
		.ops = {
			.early_channel_count	= f1x_early_channel_count,
			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
@@ -2244,6 +2245,7 @@ static struct amd64_family_type family_types[] = {
		.ctl_name = "F15h",
		.f1_id = PCI_DEVICE_ID_AMD_15H_NB_F1,
		.f2_id = PCI_DEVICE_ID_AMD_15H_NB_F2,
		.max_mcs = 2,
		.ops = {
			.early_channel_count	= f1x_early_channel_count,
			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
@@ -2254,6 +2256,7 @@ static struct amd64_family_type family_types[] = {
		.ctl_name = "F15h_M30h",
		.f1_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F1,
		.f2_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F2,
		.max_mcs = 2,
		.ops = {
			.early_channel_count	= f1x_early_channel_count,
			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
@@ -2264,6 +2267,7 @@ static struct amd64_family_type family_types[] = {
		.ctl_name = "F15h_M60h",
		.f1_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F1,
		.f2_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F2,
		.max_mcs = 2,
		.ops = {
			.early_channel_count	= f1x_early_channel_count,
			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
@@ -2274,6 +2278,7 @@ static struct amd64_family_type family_types[] = {
		.ctl_name = "F16h",
		.f1_id = PCI_DEVICE_ID_AMD_16H_NB_F1,
		.f2_id = PCI_DEVICE_ID_AMD_16H_NB_F2,
		.max_mcs = 2,
		.ops = {
			.early_channel_count	= f1x_early_channel_count,
			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
@@ -2284,6 +2289,7 @@ static struct amd64_family_type family_types[] = {
		.ctl_name = "F16h_M30h",
		.f1_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F1,
		.f2_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F2,
		.max_mcs = 2,
		.ops = {
			.early_channel_count	= f1x_early_channel_count,
			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
@@ -2294,6 +2300,7 @@ static struct amd64_family_type family_types[] = {
		.ctl_name = "F17h",
		.f0_id = PCI_DEVICE_ID_AMD_17H_DF_F0,
		.f6_id = PCI_DEVICE_ID_AMD_17H_DF_F6,
		.max_mcs = 2,
		.ops = {
			.early_channel_count	= f17_early_channel_count,
			.dbam_to_cs		= f17_addr_mask_to_cs_size,
@@ -2303,6 +2310,7 @@ static struct amd64_family_type family_types[] = {
		.ctl_name = "F17h_M10h",
		.f0_id = PCI_DEVICE_ID_AMD_17H_M10H_DF_F0,
		.f6_id = PCI_DEVICE_ID_AMD_17H_M10H_DF_F6,
		.max_mcs = 2,
		.ops = {
			.early_channel_count	= f17_early_channel_count,
			.dbam_to_cs		= f17_addr_mask_to_cs_size,
@@ -2312,6 +2320,7 @@ static struct amd64_family_type family_types[] = {
		.ctl_name = "F17h_M30h",
		.f0_id = PCI_DEVICE_ID_AMD_17H_M30H_DF_F0,
		.f6_id = PCI_DEVICE_ID_AMD_17H_M30H_DF_F6,
		.max_mcs = 8,
		.ops = {
			.early_channel_count	= f17_early_channel_count,
			.dbam_to_cs		= f17_addr_mask_to_cs_size,
@@ -2321,6 +2330,7 @@ static struct amd64_family_type family_types[] = {
		.ctl_name = "F17h_M70h",
		.f0_id = PCI_DEVICE_ID_AMD_17H_M70H_DF_F0,
		.f6_id = PCI_DEVICE_ID_AMD_17H_M70H_DF_F6,
		.max_mcs = 2,
		.ops = {
			.early_channel_count	= f17_early_channel_count,
			.dbam_to_cs		= f17_addr_mask_to_cs_size,
@@ -2838,8 +2848,6 @@ skip:
	edac_dbg(1, "  DIMM type: %s\n", edac_mem_types[pvt->dram_type]);

	determine_ecc_sym_sz(pvt);

	dump_misc_regs(pvt);
}

/*
@@ -2936,6 +2944,7 @@ static int init_csrows_df(struct mem_ctl_info *mci)
			dimm->mtype = pvt->dram_type;
			dimm->edac_mode = edac_mode;
			dimm->dtype = dev_type;
			dimm->grain = 64;
		}
	}

@@ -3012,6 +3021,7 @@ static int init_csrows(struct mem_ctl_info *mci)
			dimm = csrow->channels[j]->dimm;
			dimm->mtype = pvt->dram_type;
			dimm->edac_mode = edac_mode;
			dimm->grain = 64;
		}
	}

@@ -3178,43 +3188,27 @@ static void restore_ecc_error_reporting(struct ecc_settings *s, u16 nid,
		amd64_warn("Error restoring NB MCGCTL settings!\n");
}

/*
 * EDAC requires that the BIOS have ECC enabled before
 * taking over the processing of ECC errors. A command line
 * option allows to force-enable hardware ECC later in
 * enable_ecc_error_reporting().
 */
static const char *ecc_msg =
	"ECC disabled in the BIOS or no ECC capability, module will not load.\n"
	" Either enable ECC checking or force module loading by setting "
	"'ecc_enable_override'.\n"
	" (Note that use of the override may cause unknown side effects.)\n";

static bool ecc_enabled(struct pci_dev *F3, u16 nid)
static bool ecc_enabled(struct amd64_pvt *pvt)
{
	u16 nid = pvt->mc_node_id;
	bool nb_mce_en = false;
	u8 ecc_en = 0, i;
	u32 value;

	if (boot_cpu_data.x86 >= 0x17) {
		u8 umc_en_mask = 0, ecc_en_mask = 0;
		struct amd64_umc *umc;

		for_each_umc(i) {
			u32 base = get_umc_base(i);
			umc = &pvt->umc[i];

			/* Only check enabled UMCs. */
			if (amd_smn_read(nid, base + UMCCH_SDP_CTRL, &value))
				continue;

			if (!(value & UMC_SDP_INIT))
			if (!(umc->sdp_ctrl & UMC_SDP_INIT))
				continue;

			umc_en_mask |= BIT(i);

			if (amd_smn_read(nid, base + UMCCH_UMC_CAP_HI, &value))
				continue;

			if (value & UMC_ECC_ENABLED)
			if (umc->umc_cap_hi & UMC_ECC_ENABLED)
				ecc_en_mask |= BIT(i);
		}

@@ -3227,7 +3221,7 @@ static bool ecc_enabled(struct pci_dev *F3, u16 nid)
		/* Assume UMC MCA banks are enabled. */
		nb_mce_en = true;
	} else {
		amd64_read_pci_cfg(F3, NBCFG, &value);
		amd64_read_pci_cfg(pvt->F3, NBCFG, &value);

		ecc_en = !!(value & NBCFG_ECC_ENABLE);

@@ -3240,10 +3234,9 @@ static bool ecc_enabled(struct pci_dev *F3, u16 nid)
	amd64_info("Node %d: DRAM ECC %s.\n",
		   nid, (ecc_en ? "enabled" : "disabled"));

	if (!ecc_en || !nb_mce_en) {
		amd64_info("%s", ecc_msg);
	if (!ecc_en || !nb_mce_en)
		return false;
	}
	else
		return true;
}

@@ -3278,8 +3271,7 @@ f17h_determine_edac_ctl_cap(struct mem_ctl_info *mci, struct amd64_pvt *pvt)
	}
}

static void setup_mci_misc_attrs(struct mem_ctl_info *mci,
				 struct amd64_family_type *fam)
static void setup_mci_misc_attrs(struct mem_ctl_info *mci)
{
	struct amd64_pvt *pvt = mci->pvt_info;

@@ -3298,7 +3290,7 @@ static void setup_mci_misc_attrs(struct mem_ctl_info *mci,

	mci->edac_cap		= determine_edac_cap(pvt);
	mci->mod_name		= EDAC_MOD_STR;
	mci->ctl_name		= fam->ctl_name;
	mci->ctl_name		= fam_type->ctl_name;
	mci->dev_name		= pci_name(pvt->F3);
	mci->ctl_page_to_phys	= NULL;

@@ -3312,8 +3304,6 @@ static void setup_mci_misc_attrs(struct mem_ctl_info *mci,
 */
static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt)
{
	struct amd64_family_type *fam_type = NULL;

	pvt->ext_model  = boot_cpu_data.x86_model >> 4;
	pvt->stepping	= boot_cpu_data.x86_stepping;
	pvt->model	= boot_cpu_data.x86_model;
@@ -3401,51 +3391,15 @@ static const struct attribute_group *amd64_edac_attr_groups[] = {
	NULL
};

/* Set the number of Unified Memory Controllers in the system. */
static void compute_num_umcs(void)
{
	u8 model = boot_cpu_data.x86_model;

	if (boot_cpu_data.x86 < 0x17)
		return;

	if (model >= 0x30 && model <= 0x3f)
		num_umcs = 8;
	else
		num_umcs = 2;

	edac_dbg(1, "Number of UMCs: %x", num_umcs);
}

static int init_one_instance(unsigned int nid)
static int hw_info_get(struct amd64_pvt *pvt)
{
	struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
	struct amd64_family_type *fam_type = NULL;
	struct mem_ctl_info *mci = NULL;
	struct edac_mc_layer layers[2];
	struct amd64_pvt *pvt = NULL;
	u16 pci_id1, pci_id2;
	int err = 0, ret;

	ret = -ENOMEM;
	pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
	if (!pvt)
		goto err_ret;

	pvt->mc_node_id	= nid;
	pvt->F3 = F3;

	ret = -EINVAL;
	fam_type = per_family_init(pvt);
	if (!fam_type)
		goto err_free;
	int ret = -EINVAL;

	if (pvt->fam >= 0x17) {
		pvt->umc = kcalloc(num_umcs, sizeof(struct amd64_umc), GFP_KERNEL);
		if (!pvt->umc) {
			ret = -ENOMEM;
			goto err_free;
		}
		pvt->umc = kcalloc(fam_type->max_mcs, sizeof(struct amd64_umc), GFP_KERNEL);
		if (!pvt->umc)
			return -ENOMEM;

		pci_id1 = fam_type->f0_id;
		pci_id2 = fam_type->f6_id;
@@ -3454,21 +3408,37 @@ static int init_one_instance(unsigned int nid)
		pci_id2 = fam_type->f2_id;
	}

	err = reserve_mc_sibling_devs(pvt, pci_id1, pci_id2);
	if (err)
		goto err_post_init;
	ret = reserve_mc_sibling_devs(pvt, pci_id1, pci_id2);
	if (ret)
		return ret;

	read_mc_regs(pvt);

	return 0;
}

static void hw_info_put(struct amd64_pvt *pvt)
{
	if (pvt->F0 || pvt->F1)
		free_mc_sibling_devs(pvt);

	kfree(pvt->umc);
}

static int init_one_instance(struct amd64_pvt *pvt)
{
	struct mem_ctl_info *mci = NULL;
	struct edac_mc_layer layers[2];
	int ret = -EINVAL;

	/*
	 * We need to determine how many memory channels there are. Then use
	 * that information for calculating the size of the dynamic instance
	 * tables in the 'mci' structure.
	 */
	ret = -EINVAL;
	pvt->channel_count = pvt->ops->early_channel_count(pvt);
	if (pvt->channel_count < 0)
		goto err_siblings;
		return ret;

	ret = -ENOMEM;
	layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
@@ -3480,24 +3450,18 @@ static int init_one_instance(unsigned int nid)
	 * Always allocate two channels since we can have setups with DIMMs on
	 * only one channel. Also, this simplifies handling later for the price
	 * of a couple of KBs tops.
	 *
	 * On Fam17h+, the number of controllers may be greater than two. So set
	 * the size equal to the maximum number of UMCs.
	 */
	if (pvt->fam >= 0x17)
		layers[1].size = num_umcs;
	else
		layers[1].size = 2;
	layers[1].size = fam_type->max_mcs;
	layers[1].is_virt_csrow = false;

	mci = edac_mc_alloc(nid, ARRAY_SIZE(layers), layers, 0);
	mci = edac_mc_alloc(pvt->mc_node_id, ARRAY_SIZE(layers), layers, 0);
	if (!mci)
		goto err_siblings;
		return ret;

	mci->pvt_info = pvt;
	mci->pdev = &pvt->F3->dev;

	setup_mci_misc_attrs(mci, fam_type);
	setup_mci_misc_attrs(mci);

	if (init_csrows(mci))
		mci->edac_cap = EDAC_FLAG_NONE;
@@ -3505,31 +3469,30 @@ static int init_one_instance(unsigned int nid)
	ret = -ENODEV;
	if (edac_mc_add_mc_with_groups(mci, amd64_edac_attr_groups)) {
		edac_dbg(1, "failed edac_mc_add_mc()\n");
		goto err_add_mc;
		edac_mc_free(mci);
		return ret;
	}

	return 0;
}

err_add_mc:
	edac_mc_free(mci);

err_siblings:
	free_mc_sibling_devs(pvt);

err_post_init:
	if (pvt->fam >= 0x17)
		kfree(pvt->umc);
static bool instance_has_memory(struct amd64_pvt *pvt)
{
	bool cs_enabled = false;
	int cs = 0, dct = 0;

err_free:
	kfree(pvt);
	for (dct = 0; dct < fam_type->max_mcs; dct++) {
		for_each_chip_select(cs, dct, pvt)
			cs_enabled |= csrow_enabled(cs, dct, pvt);
	}

err_ret:
	return ret;
	return cs_enabled;
}

static int probe_one_instance(unsigned int nid)
{
	struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
	struct amd64_pvt *pvt = NULL;
	struct ecc_settings *s;
	int ret;

@@ -3540,8 +3503,29 @@ static int probe_one_instance(unsigned int nid)

	ecc_stngs[nid] = s;

	if (!ecc_enabled(F3, nid)) {
	pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
	if (!pvt)
		goto err_settings;

	pvt->mc_node_id	= nid;
	pvt->F3 = F3;

	fam_type = per_family_init(pvt);
	if (!fam_type)
		goto err_enable;

	ret = hw_info_get(pvt);
	if (ret < 0)
		goto err_enable;

	ret = 0;
	if (!instance_has_memory(pvt)) {
		amd64_info("Node %d: No DIMMs detected.\n", nid);
		goto err_enable;
	}

	if (!ecc_enabled(pvt)) {
		ret = -ENODEV;

		if (!ecc_enable_override)
			goto err_enable;
@@ -3556,7 +3540,7 @@ static int probe_one_instance(unsigned int nid)
			goto err_enable;
	}

	ret = init_one_instance(nid);
	ret = init_one_instance(pvt);
	if (ret < 0) {
		amd64_err("Error probing instance: %d\n", nid);

@@ -3566,9 +3550,15 @@ static int probe_one_instance(unsigned int nid)
		goto err_enable;
	}

	dump_misc_regs(pvt);

	return ret;

err_enable:
	hw_info_put(pvt);
	kfree(pvt);

err_settings:
	kfree(s);
	ecc_stngs[nid] = NULL;

@@ -3595,14 +3585,13 @@ static void remove_one_instance(unsigned int nid)

	restore_ecc_error_reporting(s, nid, F3);

	free_mc_sibling_devs(pvt);

	kfree(ecc_stngs[nid]);
	ecc_stngs[nid] = NULL;

	/* Free the EDAC CORE resources */
	mci->pvt_info = NULL;

	hw_info_put(pvt);
	kfree(pvt);
	edac_mc_free(mci);
}
@@ -3668,8 +3657,6 @@ static int __init amd64_edac_init(void)
	if (!msrs)
		goto err_free;

	compute_num_umcs();

	for (i = 0; i < amd_nb_num(); i++) {
		err = probe_one_instance(i);
		if (err) {
+2 −0
Original line number Diff line number Diff line
@@ -479,6 +479,8 @@ struct low_ops {
struct amd64_family_type {
	const char *ctl_name;
	u16 f0_id, f1_id, f2_id, f6_id;
	/* Maximum number of memory controllers per die/node. */
	u8 max_mcs;
	struct low_ops ops;
};

+1 −6
Original line number Diff line number Diff line
@@ -281,16 +281,11 @@ static int aspeed_probe(struct platform_device *pdev)
	struct device *dev = &pdev->dev;
	struct edac_mc_layer layers[2];
	struct mem_ctl_info *mci;
	struct resource *res;
	void __iomem *regs;
	u32 reg04;
	int rc;

	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
	if (!res)
		return -ENOENT;

	regs = devm_ioremap_resource(dev, res);
	regs = devm_platform_ioremap_resource(pdev, 0);
	if (IS_ERR(regs))
		return PTR_ERR(regs);

Loading