Commit e3526257 authored by Monk Liu's avatar Monk Liu Committed by Alex Deucher
Browse files

drm/amdgpu: introduce vram lost for reset (v2)



for SOC15/vega10 the BACO reset & mode1 would introduce vram lost
in high end address range, current kmd's vram lost checking cannot
catch it since it only check very ahead visible frame buffer

v2:
cover NV as well

Signed-off-by: default avatarMonk Liu <Monk.Liu@amd.com>
Reviewed-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 514ad791
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -1151,6 +1151,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
#define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1)))
#define amdgpu_asic_need_reset_on_init(adev) (adev)->asic_funcs->need_reset_on_init((adev))
#define amdgpu_asic_get_pcie_replay_count(adev) ((adev)->asic_funcs->get_pcie_replay_count((adev)))
#define amdgpu_inc_vram_lost(adev) atomic_inc(&((adev)->vram_lost_counter));

/* Common functions */
bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
+2 −2
Original line number Diff line number Diff line
@@ -3483,7 +3483,7 @@ error:
	amdgpu_virt_init_data_exchange(adev);
	amdgpu_virt_release_full_gpu(adev, true);
	if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
		atomic_inc(&adev->vram_lost_counter);
		amdgpu_inc_vram_lost(adev);
		r = amdgpu_device_recover_vram(adev);
	}

@@ -3649,7 +3649,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
				vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
				if (vram_lost) {
					DRM_INFO("VRAM is lost due to GPU reset!\n");
					atomic_inc(&tmp_adev->vram_lost_counter);
					amdgpu_inc_vram_lost(tmp_adev);
				}

				r = amdgpu_gtt_mgr_recover(
+5 −2
Original line number Diff line number Diff line
@@ -316,10 +316,13 @@ static int nv_asic_reset(struct amdgpu_device *adev)
	int ret = 0;
	struct smu_context *smu = &adev->smu;

	if (nv_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)
	if (nv_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
		amdgpu_inc_vram_lost(adev);
		ret = smu_baco_reset(smu);
	else
	} else {
		amdgpu_inc_vram_lost(adev);
		ret = nv_asic_mode1_reset(adev);
	}

	return ret;
}
+2 −0
Original line number Diff line number Diff line
@@ -558,10 +558,12 @@ static int soc15_asic_reset(struct amdgpu_device *adev)
{
	switch (soc15_asic_reset_method(adev)) {
		case AMD_RESET_METHOD_BACO:
			amdgpu_inc_vram_lost(adev);
			return soc15_asic_baco_reset(adev);
		case AMD_RESET_METHOD_MODE2:
			return soc15_mode2_reset(adev);
		default:
			amdgpu_inc_vram_lost(adev);
			return soc15_asic_mode1_reset(adev);
	}
}