Commit 88474cca authored by Guchun Chen's avatar Guchun Chen Committed by Alex Deucher
Browse files

drm/amdgpu: update ras capability's query based on mem ecc configuration



RAS support capability needs to be updated on top of different
memeory ECC enablement, and remove redundant memory ecc check
in gmc module for vega20 and arcturus.

v2: check HBM ECC enablement and set ras mask accordingly.
v3: avoid to invoke atomfirmware interface to query twice.

Suggested-by: default avatarHawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: default avatarGuchun Chen <guchun.chen@amd.com>
Reviewed-by: default avatarHawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 6397ec58
Loading
Loading
Loading
Loading
+18 −6
Original line number Diff line number Diff line
@@ -1765,15 +1765,27 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev,
	*hw_supported = 0;
	*supported = 0;

	if (amdgpu_sriov_vf(adev) ||
	if (amdgpu_sriov_vf(adev) || !adev->is_atom_fw ||
	    (adev->asic_type != CHIP_VEGA20 &&
	     adev->asic_type != CHIP_ARCTURUS))
		return;

	if (adev->is_atom_fw &&
			(amdgpu_atomfirmware_mem_ecc_supported(adev) ||
			 amdgpu_atomfirmware_sram_ecc_supported(adev)))
		*hw_supported = AMDGPU_RAS_BLOCK_MASK;
	if (amdgpu_atomfirmware_mem_ecc_supported(adev)) {
		DRM_INFO("HBM ECC is active.\n");
		*hw_supported |= (1 << AMDGPU_RAS_BLOCK__UMC |
				1 << AMDGPU_RAS_BLOCK__DF);
	} else
		DRM_INFO("HBM ECC is not presented.\n");

	if (amdgpu_atomfirmware_sram_ecc_supported(adev)) {
		DRM_INFO("SRAM ECC is active.\n");
		*hw_supported |= ~(1 << AMDGPU_RAS_BLOCK__UMC |
				1 << AMDGPU_RAS_BLOCK__DF);
	} else
		DRM_INFO("SRAM ECC is not presented.\n");

	/* hw_supported needs to be aligned with RAS block mask. */
	*hw_supported &= AMDGPU_RAS_BLOCK_MASK;

	*supported = amdgpu_ras_enable == 0 ?
			0 : *hw_supported & amdgpu_ras_mask;
+14 −24
Original line number Diff line number Diff line
@@ -922,31 +922,21 @@ static int gmc_v9_0_late_init(void *handle)
	if (r)
		return r;
	/* Check if ecc is available */
	if (!amdgpu_sriov_vf(adev)) {
		switch (adev->asic_type) {
		case CHIP_VEGA10:
		case CHIP_VEGA20:
		case CHIP_ARCTURUS:
	if (!amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_VEGA10)) {
		r = amdgpu_atomfirmware_mem_ecc_supported(adev);
		if (!r) {
			DRM_INFO("ECC is not present.\n");
			if (adev->df.funcs->enable_ecc_force_par_wr_rmw)
				adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false);
			} else {
		} else
			DRM_INFO("ECC is active.\n");
			}

		r = amdgpu_atomfirmware_sram_ecc_supported(adev);
			if (!r) {
		if (!r)
			DRM_INFO("SRAM ECC is not present.\n");
			} else {
		else
			DRM_INFO("SRAM ECC is active.\n");
	}
			break;
		default:
			break;
		}
	}

	if (adev->mmhub.funcs && adev->mmhub.funcs->reset_ras_error_count)
		adev->mmhub.funcs->reset_ras_error_count(adev);