Commit e095fc17 authored by Christian König's avatar Christian König Committed by Alex Deucher
Browse files

drm/amdgpu: explicitely sync to VM updates v2



Allows us to reduce the overhead while syncing to fences a bit.

v2: also drop adev parameter from the functions

Signed-off-by: default avatarChristian König <christian.koenig@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 6ceeb144
Loading
Loading
Loading
Loading
+4 −4
Original line number Diff line number Diff line
@@ -358,7 +358,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
	if (ret)
		return ret;

	return amdgpu_sync_fence(NULL, sync, vm->last_update, false);
	return amdgpu_sync_fence(sync, vm->last_update, false);
}

static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
@@ -751,7 +751,7 @@ static int unmap_bo_from_gpuvm(struct amdgpu_device *adev,

	amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);

	amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false);
	amdgpu_sync_fence(sync, bo_va->last_pt_update, false);

	return 0;
}
@@ -770,7 +770,7 @@ static int update_gpuvm_pte(struct amdgpu_device *adev,
		return ret;
	}

	return amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false);
	return amdgpu_sync_fence(sync, bo_va->last_pt_update, false);
}

static int map_bo_to_gpuvm(struct amdgpu_device *adev,
@@ -2045,7 +2045,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
			pr_debug("Memory eviction: Validate BOs failed. Try again\n");
			goto validate_map_fail;
		}
		ret = amdgpu_sync_fence(NULL, &sync_obj, bo->tbo.moving, false);
		ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving, false);
		if (ret) {
			pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
			goto validate_map_fail;
+6 −13
Original line number Diff line number Diff line
@@ -797,29 +797,23 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
	if (r)
		return r;

	r = amdgpu_sync_fence(adev, &p->job->sync,
			      fpriv->prt_va->last_pt_update, false);
	r = amdgpu_sync_vm_fence(&p->job->sync, fpriv->prt_va->last_pt_update);
	if (r)
		return r;

	if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
		struct dma_fence *f;

		bo_va = fpriv->csa_va;
		BUG_ON(!bo_va);
		r = amdgpu_vm_bo_update(adev, bo_va, false);
		if (r)
			return r;

		f = bo_va->last_pt_update;
		r = amdgpu_sync_fence(adev, &p->job->sync, f, false);
		r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update);
		if (r)
			return r;
	}

	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
		struct dma_fence *f;

		/* ignore duplicates */
		bo = ttm_to_amdgpu_bo(e->tv.bo);
		if (!bo)
@@ -833,8 +827,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
		if (r)
			return r;

		f = bo_va->last_pt_update;
		r = amdgpu_sync_fence(adev, &p->job->sync, f, false);
		r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update);
		if (r)
			return r;
	}
@@ -847,7 +840,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
	if (r)
		return r;

	r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_update, false);
	r = amdgpu_sync_vm_fence(&p->job->sync, vm->last_update);
	if (r)
		return r;

@@ -989,7 +982,7 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
			dma_fence_put(old);
		}

		r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, true);
		r = amdgpu_sync_fence(&p->job->sync, fence, true);
		dma_fence_put(fence);
		if (r)
			return r;
@@ -1011,7 +1004,7 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
		return r;
	}

	r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, true);
	r = amdgpu_sync_fence(&p->job->sync, fence, true);
	dma_fence_put(fence);

	return r;
+6 −7
Original line number Diff line number Diff line
@@ -206,7 +206,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm,
	int r;

	if (ring->vmid_wait && !dma_fence_is_signaled(ring->vmid_wait))
		return amdgpu_sync_fence(adev, sync, ring->vmid_wait, false);
		return amdgpu_sync_fence(sync, ring->vmid_wait, false);

	fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL);
	if (!fences)
@@ -241,7 +241,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm,
			return -ENOMEM;
		}

		r = amdgpu_sync_fence(adev, sync, &array->base, false);
		r = amdgpu_sync_fence(sync, &array->base, false);
		dma_fence_put(ring->vmid_wait);
		ring->vmid_wait = &array->base;
		return r;
@@ -294,7 +294,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
		tmp = amdgpu_sync_peek_fence(&(*id)->active, ring);
		if (tmp) {
			*id = NULL;
			r = amdgpu_sync_fence(adev, sync, tmp, false);
			r = amdgpu_sync_fence(sync, tmp, false);
			return r;
		}
		needs_flush = true;
@@ -303,7 +303,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
	/* Good we can use this VMID. Remember this submission as
	* user of the VMID.
	*/
	r = amdgpu_sync_fence(ring->adev, &(*id)->active, fence, false);
	r = amdgpu_sync_fence(&(*id)->active, fence, false);
	if (r)
		return r;

@@ -375,7 +375,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
		/* Good, we can use this VMID. Remember this submission as
		 * user of the VMID.
		 */
		r = amdgpu_sync_fence(ring->adev, &(*id)->active, fence, false);
		r = amdgpu_sync_fence(&(*id)->active, fence, false);
		if (r)
			return r;

@@ -435,8 +435,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
			id = idle;

			/* Remember this submission as user of the VMID */
			r = amdgpu_sync_fence(ring->adev, &id->active,
					      fence, false);
			r = amdgpu_sync_fence(&id->active, fence, false);
			if (r)
				goto error;

+1 −2
Original line number Diff line number Diff line
@@ -193,8 +193,7 @@ static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job,
	fence = amdgpu_sync_get_fence(&job->sync, &explicit);
	if (fence && explicit) {
		if (drm_sched_dependency_optimized(fence, s_entity)) {
			r = amdgpu_sync_fence(ring->adev, &job->sched_sync,
					      fence, false);
			r = amdgpu_sync_fence(&job->sched_sync, fence, false);
			if (r)
				DRM_ERROR("Error adding fence (%d)\n", r);
		}
+28 −10
Original line number Diff line number Diff line
@@ -129,7 +129,8 @@ static void amdgpu_sync_keep_later(struct dma_fence **keep,
 * Tries to add the fence to an existing hash entry. Returns true when an entry
 * was found, false otherwise.
 */
static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f, bool explicit)
static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f,
				  bool explicit)
{
	struct amdgpu_sync_entry *e;

@@ -151,19 +152,18 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f,
 * amdgpu_sync_fence - remember to sync to this fence
 *
 * @sync: sync object to add fence to
 * @fence: fence to sync to
 * @f: fence to sync to
 * @explicit: if this is an explicit dependency
 *
 * Add the fence to the sync object.
 */
int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
		      struct dma_fence *f, bool explicit)
int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f,
		      bool explicit)
{
	struct amdgpu_sync_entry *e;

	if (!f)
		return 0;
	if (amdgpu_sync_same_dev(adev, f) &&
	    amdgpu_sync_get_owner(f) == AMDGPU_FENCE_OWNER_VM)
		amdgpu_sync_keep_later(&sync->last_vm_update, f);

	if (amdgpu_sync_add_later(sync, f, explicit))
		return 0;
@@ -179,6 +179,24 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
	return 0;
}

/**
 * amdgpu_sync_vm_fence - remember to sync to this VM fence
 *
 * @adev: amdgpu device
 * @sync: sync object to add fence to
 * @fence: the VM fence to add
 *
 * Add the fence to the sync object and remember it as VM update.
 */
int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence)
{
	if (!fence)
		return 0;

	amdgpu_sync_keep_later(&sync->last_vm_update, fence);
	return amdgpu_sync_fence(sync, fence, false);
}

/**
 * amdgpu_sync_resv - sync to a reservation object
 *
@@ -204,7 +222,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,

	/* always sync to the exclusive fence */
	f = dma_resv_get_excl(resv);
	r = amdgpu_sync_fence(adev, sync, f, false);
	r = amdgpu_sync_fence(sync, f, false);

	flist = dma_resv_get_list(resv);
	if (!flist || r)
@@ -239,7 +257,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
				continue;
		}

		r = amdgpu_sync_fence(adev, sync, f, false);
		r = amdgpu_sync_fence(sync, f, false);
		if (r)
			break;
	}
@@ -340,7 +358,7 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone)
	hash_for_each_safe(source->fences, i, tmp, e, node) {
		f = e->fence;
		if (!dma_fence_is_signaled(f)) {
			r = amdgpu_sync_fence(NULL, clone, f, e->explicit);
			r = amdgpu_sync_fence(clone, f, e->explicit);
			if (r)
				return r;
		} else {
Loading