Commit 5caf466a authored by xinhui pan's avatar xinhui pan Committed by Alex Deucher
Browse files

drm/amdgpu: add new member hw_supported



Currently, it is not clear how ras is supported. Both software and
hardware can set the supported. That is confusing.

Fix it by adding new member hw_supported.

Signed-off-by: default avatarxinhui pan <xinhui.pan@amd.com>
Reviewed-by: default avatarEvan Quan <evan.quan@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 2b9505e3
Loading
Loading
Loading
Loading
+30 −12
Original line number Diff line number Diff line
@@ -469,7 +469,9 @@ static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
static int amdgpu_ras_is_feature_allowed(struct amdgpu_device *adev,
		struct ras_common_if *head)
{
	return amdgpu_ras_enable && (amdgpu_ras_mask & BIT(head->block));
	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);

	return con->hw_supported & BIT(head->block);
}

static int amdgpu_ras_is_feature_enabled(struct amdgpu_device *adev,
@@ -490,6 +492,12 @@ static int __amdgpu_ras_feature_enable(struct amdgpu_device *adev,
	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
	struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);

	/* If hardware does not support ras, then do not create obj.
	 * But if hardware support ras, we can create the obj.
	 * Ras framework checks con->hw_supported to see if it need do
	 * corresponding initialization.
	 * IP checks con->support to see if it need disable ras.
	 */
	if (!amdgpu_ras_is_feature_allowed(adev, head))
		return 0;
	if (!(!!enable ^ !!amdgpu_ras_is_feature_enabled(adev, head)))
@@ -1334,27 +1342,36 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)
}
/* recovery end */

static uint32_t amdgpu_ras_check_supported(struct amdgpu_device *adev)
/*
 * check hardware's ras ability which will be saved in hw_supported.
 * if hardware does not support ras, we can skip some ras initializtion and
 * forbid some ras operations from IP.
 * if software itself, say boot parameter, limit the ras ability. We still
 * need allow IP do some limited operations, like disable. In such case,
 * we have to initialize ras as normal. but need check if operation is
 * allowed or not in each function.
 */
static void amdgpu_ras_check_supported(struct amdgpu_device *adev,
		uint32_t *hw_supported, uint32_t *supported)
{
	uint32_t supported = 0;
	*hw_supported = 0;
	*supported = 0;

	if (amdgpu_ras_enable == 0 ||
			amdgpu_sriov_vf(adev) ||
	if (amdgpu_sriov_vf(adev) ||
			adev->asic_type != CHIP_VEGA20)
		return 0;
		return;

	if (amdgpu_atomfirmware_mem_ecc_supported(adev) ||
			amdgpu_atomfirmware_sram_ecc_supported(adev)) {
		supported = AMDGPU_RAS_BLOCK_MASK;
	}
			amdgpu_atomfirmware_sram_ecc_supported(adev))
		*hw_supported = AMDGPU_RAS_BLOCK_MASK;

	return supported & amdgpu_ras_mask;
	*supported = amdgpu_ras_enable == 0 ?
				0 : *hw_supported & amdgpu_ras_mask;
}

int amdgpu_ras_init(struct amdgpu_device *adev)
{
	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
	uint32_t supported = amdgpu_ras_check_supported(adev);

	if (con)
		return 0;
@@ -1369,7 +1386,8 @@ int amdgpu_ras_init(struct amdgpu_device *adev)

	amdgpu_ras_set_context(adev, con);

	con->supported = supported;
	amdgpu_ras_check_supported(adev, &con->hw_supported,
			&con->supported);
	con->features = 0;
	INIT_LIST_HEAD(&con->head);

+3 −0
Original line number Diff line number Diff line
@@ -81,6 +81,9 @@ typedef int (*ras_ih_cb)(struct amdgpu_device *adev,

struct amdgpu_ras {
	/* ras infrastructure */
	/* for ras itself. */
	uint32_t hw_supported;
	/* for IP to check its ras ability. */
	uint32_t supported;
	uint32_t features;
	struct list_head head;