Commit 9593f4d6 authored by Rajneesh Bhardwaj's avatar Rajneesh Bhardwaj Committed by Alex Deucher
Browse files

drm/amdkfd: refactor runtime pm for baco



So far the kfd driver implemented same routines for runtime and system
wide suspend and resume (s2idle or mem). During system wide suspend the
kfd aquires an atomic lock that prevents any more user processes to
create queues and interact with kfd driver and amd gpu. This mechanism
created problem when amdgpu device is runtime suspended with BACO
enabled. Any application that relies on kfd driver fails to load because
the driver reports a locked kfd device since gpu is runtime suspended.

However, in an ideal case, when gpu is runtime  suspended the kfd driver
should be able to:

 - auto resume amdgpu driver whenever a client requests compute service
 - prevent runtime suspend for amdgpu  while kfd is in use

This change refactors the amdgpu and amdkfd drivers to support BACO and
runtime power management.

Reviewed-by: default avatarOak Zeng <oak.zeng@amd.com>
Reviewed-by: default avatarFelix Kuehling <felix.kuehling@amd.com>
Signed-off-by: default avatarRajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 3c1224c0
Loading
Loading
Loading
Loading
+6 −6
Original line number Diff line number Diff line
@@ -178,18 +178,18 @@ void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
		kgd2kfd_interrupt(adev->kfd.dev, ih_ring_entry);
}

void amdgpu_amdkfd_suspend(struct amdgpu_device *adev)
void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm)
{
	if (adev->kfd.dev)
		kgd2kfd_suspend(adev->kfd.dev);
		kgd2kfd_suspend(adev->kfd.dev, run_pm);
}

int amdgpu_amdkfd_resume(struct amdgpu_device *adev)
int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm)
{
	int r = 0;

	if (adev->kfd.dev)
		r = kgd2kfd_resume(adev->kfd.dev);
		r = kgd2kfd_resume(adev->kfd.dev, run_pm);

	return r;
}
@@ -713,11 +713,11 @@ void kgd2kfd_exit(void)
{
}

void kgd2kfd_suspend(struct kfd_dev *kfd)
void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
{
}

int kgd2kfd_resume(struct kfd_dev *kfd)
int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
{
	return 0;
}
+4 −4
Original line number Diff line number Diff line
@@ -122,8 +122,8 @@ struct amdkfd_process_info {
int amdgpu_amdkfd_init(void);
void amdgpu_amdkfd_fini(void);

void amdgpu_amdkfd_suspend(struct amdgpu_device *adev);
int amdgpu_amdkfd_resume(struct amdgpu_device *adev);
void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm);
int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm);
void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
			const void *ih_ring_entry);
void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
@@ -249,8 +249,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
			 struct drm_device *ddev,
			 const struct kgd2kfd_shared_resources *gpu_resources);
void kgd2kfd_device_exit(struct kfd_dev *kfd);
void kgd2kfd_suspend(struct kfd_dev *kfd);
int kgd2kfd_resume(struct kfd_dev *kfd);
void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm);
int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm);
int kgd2kfd_pre_reset(struct kfd_dev *kfd);
int kgd2kfd_post_reset(struct kfd_dev *kfd);
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry);
+2 −2
Original line number Diff line number Diff line
@@ -3341,7 +3341,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
		}
	}

	amdgpu_amdkfd_suspend(adev);
	amdgpu_amdkfd_suspend(adev, !fbcon);

	amdgpu_ras_suspend(adev);

@@ -3425,7 +3425,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
			}
		}
	}
	r = amdgpu_amdkfd_resume(adev);
	r = amdgpu_amdkfd_resume(adev, !fbcon);
	if (r)
		return r;

+17 −12
Original line number Diff line number Diff line
@@ -710,7 +710,7 @@ out:
void kgd2kfd_device_exit(struct kfd_dev *kfd)
{
	if (kfd->init_complete) {
		kgd2kfd_suspend(kfd);
		kgd2kfd_suspend(kfd, false);
		device_queue_manager_uninit(kfd->dqm);
		kfd_interrupt_exit(kfd);
		kfd_topology_remove_device(kfd);
@@ -731,7 +731,7 @@ int kgd2kfd_pre_reset(struct kfd_dev *kfd)

	kfd->dqm->ops.pre_reset(kfd->dqm);

	kgd2kfd_suspend(kfd);
	kgd2kfd_suspend(kfd, false);

	kfd_signal_reset_event(kfd);
	return 0;
@@ -765,21 +765,23 @@ bool kfd_is_locked(void)
	return  (atomic_read(&kfd_locked) > 0);
}

void kgd2kfd_suspend(struct kfd_dev *kfd)
void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
{
	if (!kfd->init_complete)
		return;

	/* for runtime suspend, skip locking kfd */
	if (!run_pm) {
		/* For first KFD device suspend all the KFD processes */
		if (atomic_inc_return(&kfd_locked) == 1)
			kfd_suspend_all_processes();
	}

	kfd->dqm->ops.stop(kfd->dqm);

	kfd_iommu_suspend(kfd);
}

int kgd2kfd_resume(struct kfd_dev *kfd)
int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
{
	int ret, count;

@@ -790,10 +792,13 @@ int kgd2kfd_resume(struct kfd_dev *kfd)
	if (ret)
		return ret;

	/* for runtime resume, skip unlocking kfd */
	if (!run_pm) {
		count = atomic_dec_return(&kfd_locked);
		WARN_ONCE(count < 0, "KFD suspend / resume ref. error");
		if (count == 0)
			ret = kfd_resume_all_processes();
	}

	return ret;
}
+1 −0
Original line number Diff line number Diff line
@@ -650,6 +650,7 @@ struct kfd_process_device {
	 * function.
	 */
	bool already_dequeued;
	bool runtime_inuse;

	/* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */
	enum kfd_pdd_bound bound;
Loading