Commit fdafb359 authored by Evan Quan's avatar Evan Quan Committed by Alex Deucher
Browse files

drm/amdgpu: fix MGPU fan boost enablement for XGMI reset



MGPU fan boost feature should not be enabled until all the
devices from the same hive are all back from reset.

Signed-off-by: default avatarEvan Quan <evan.quan@amd.com>
Reviewed-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 4b22e7e3
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -1216,6 +1216,10 @@ int amdgpu_dm_display_resume(struct amdgpu_device *adev );
static inline int amdgpu_dm_display_resume(struct amdgpu_device *adev) { return 0; }
#endif


void amdgpu_register_gpu_instance(struct amdgpu_device *adev);
void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev);

#include "amdgpu_object.h"

/* used by df_v3_6.c and amdgpu_pmu.c */
+13 −0
Original line number Diff line number Diff line
@@ -3559,6 +3559,12 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
				if (vram_lost)
					amdgpu_device_fill_reset_magic(tmp_adev);

				/*
				 * Add this ASIC as tracked as reset was already
				 * complete successfully.
				 */
				amdgpu_register_gpu_instance(tmp_adev);

				r = amdgpu_device_ip_late_init(tmp_adev);
				if (r)
					goto out;
@@ -3693,6 +3699,13 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
		device_list_handle = &device_list;
	}

	/*
	 * Mark these ASICs to be reseted as untracked first
	 * And add them back after reset completed
	 */
	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head)
		amdgpu_unregister_gpu_instance(tmp_adev);

	/* block all schedulers and reset given job's ring */
	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+2 −2
Original line number Diff line number Diff line
@@ -44,7 +44,7 @@
#include "amdgpu_display.h"
#include "amdgpu_ras.h"

static void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev)
void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev)
{
	struct amdgpu_gpu_instance *gpu_instance;
	int i;
@@ -105,7 +105,7 @@ done_free:
	dev->dev_private = NULL;
}

static void amdgpu_register_gpu_instance(struct amdgpu_device *adev)
void amdgpu_register_gpu_instance(struct amdgpu_device *adev)
{
	struct amdgpu_gpu_instance *gpu_instance;