Commit 30514dec authored by Chunming Zhou's avatar Chunming Zhou Committed by Alex Deucher
Browse files

drm/amdgpu: fix dependency issue



The problem is that executing the jobs in the right order doesn't give you the right result
because consecutive jobs executed on the same engine are pipelined.
In other words job B does it buffer read before job A has written it's result.

Signed-off-by: default avatarChunming Zhou <David1.Zhou@amd.com>
Reviewed-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent cb3696fd
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -1129,6 +1129,7 @@ struct amdgpu_job {
	void			*owner;
	uint64_t		fence_ctx; /* the fence_context this job uses */
	bool                    vm_needs_flush;
	bool			need_pipeline_sync;
	unsigned		vm_id;
	uint64_t		vm_pd_addr;
	uint32_t		gds_base, gds_size;
+2 −0
Original line number Diff line number Diff line
@@ -160,6 +160,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
		dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
		return r;
	}
	if (ring->funcs->emit_pipeline_sync && job && job->need_pipeline_sync)
		amdgpu_ring_emit_pipeline_sync(ring);

	if (vm) {
		r = amdgpu_vm_flush(ring, job);
+4 −0
Original line number Diff line number Diff line
@@ -57,6 +57,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
	(*job)->vm = vm;
	(*job)->ibs = (void *)&(*job)[1];
	(*job)->num_ibs = num_ibs;
	(*job)->need_pipeline_sync = false;

	amdgpu_sync_create(&(*job)->sync);

@@ -152,6 +153,9 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job)
		fence = amdgpu_sync_get_fence(&job->sync);
	}

	if (amd_sched_dependency_optimized(fence, sched_job->s_entity))
		job->need_pipeline_sync = true;

	return fence;
}

+1 −1
Original line number Diff line number Diff line
@@ -614,7 +614,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
	if (ring->funcs->init_cond_exec)
		patch_offset = amdgpu_ring_init_cond_exec(ring);

	if (ring->funcs->emit_pipeline_sync)
	if (ring->funcs->emit_pipeline_sync && !job->need_pipeline_sync)
		amdgpu_ring_emit_pipeline_sync(ring);

	if (ring->funcs->emit_vm_flush && vm_flush_needed) {
+17 −0
Original line number Diff line number Diff line
@@ -236,6 +236,23 @@ static void amd_sched_entity_clear_dep(struct dma_fence *f, struct dma_fence_cb
	dma_fence_put(f);
}

bool amd_sched_dependency_optimized(struct dma_fence* fence,
				    struct amd_sched_entity *entity)
{
	struct amd_gpu_scheduler *sched = entity->sched;
	struct amd_sched_fence *s_fence;

	if (!fence || dma_fence_is_signaled(fence))
		return false;
	if (fence->context == entity->fence_context)
		return true;
	s_fence = to_amd_sched_fence(fence);
	if (s_fence && s_fence->sched == sched)
		return true;

	return false;
}

static bool amd_sched_entity_add_dependency_cb(struct amd_sched_entity *entity)
{
	struct amd_gpu_scheduler *sched = entity->sched;
Loading