Commit 2624dd15 authored by Chunming Zhou's avatar Chunming Zhou Committed by Alex Deucher
Browse files

drm/amdgpu: add timeline support in amdgpu CS v3



syncobj wait/signal operation is appending in command submission.
v2: separate to two kinds in/out_deps functions
v3: fix checking for timeline syncobj

Signed-off-by: default avatarChunming Zhou <david1.zhou@amd.com>
Cc: Tobias Hector <Tobias.Hector@amd.com>
Cc: Jason Ekstrand <jason@jlekstrand.net>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: default avatarLionel Landwerlin <lionel.g.landwerlin@intel.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent ecc4946f
Loading
Loading
Loading
Loading
+8 −2
Original line number Diff line number Diff line
@@ -436,6 +436,12 @@ struct amdgpu_cs_chunk {
	void			*kdata;
};

struct amdgpu_cs_post_dep {
	struct drm_syncobj *syncobj;
	struct dma_fence_chain *chain;
	u64 point;
};

struct amdgpu_cs_parser {
	struct amdgpu_device	*adev;
	struct drm_file		*filp;
@@ -465,8 +471,8 @@ struct amdgpu_cs_parser {
	/* user fence */
	struct amdgpu_bo_list_entry	uf_entry;

	unsigned num_post_dep_syncobjs;
	struct drm_syncobj **post_dep_syncobjs;
	unsigned			num_post_deps;
	struct amdgpu_cs_post_dep	*post_deps;
};

static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p,
+128 −24
Original line number Diff line number Diff line
@@ -215,6 +215,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
		case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
		case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
		case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
		case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
		case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
			break;

		default:
@@ -804,9 +806,11 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
		ttm_eu_backoff_reservation(&parser->ticket,
					   &parser->validated);

	for (i = 0; i < parser->num_post_dep_syncobjs; i++)
		drm_syncobj_put(parser->post_dep_syncobjs[i]);
	kfree(parser->post_dep_syncobjs);
	for (i = 0; i < parser->num_post_deps; i++) {
		drm_syncobj_put(parser->post_deps[i].syncobj);
		kfree(parser->post_deps[i].chain);
	}
	kfree(parser->post_deps);

	dma_fence_put(parser->fence);

@@ -1117,13 +1121,18 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
}

static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
						 uint32_t handle)
						 uint32_t handle, u64 point,
						 u64 flags)
{
	int r;
	struct dma_fence *fence;
	r = drm_syncobj_find_fence(p->filp, handle, 0, 0, &fence);
	if (r)
	int r;

	r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence);
	if (r) {
		DRM_ERROR("syncobj %u failed to find fence @ %llu (%d)!\n",
			  handle, point, r);
		return r;
	}

	r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, true);
	dma_fence_put(fence);
@@ -1134,46 +1143,118 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p,
					    struct amdgpu_cs_chunk *chunk)
{
	struct drm_amdgpu_cs_chunk_sem *deps;
	unsigned num_deps;
	int i, r;
	struct drm_amdgpu_cs_chunk_sem *deps;

	deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
	num_deps = chunk->length_dw * 4 /
		sizeof(struct drm_amdgpu_cs_chunk_sem);
	for (i = 0; i < num_deps; ++i) {
		r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle,
							  0, 0);
		if (r)
			return r;
	}

	return 0;
}


static int amdgpu_cs_process_syncobj_timeline_in_dep(struct amdgpu_cs_parser *p,
						     struct amdgpu_cs_chunk *chunk)
{
	struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
	unsigned num_deps;
	int i, r;

	syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
	num_deps = chunk->length_dw * 4 /
		sizeof(struct drm_amdgpu_cs_chunk_syncobj);
	for (i = 0; i < num_deps; ++i) {
		r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle);
		r = amdgpu_syncobj_lookup_and_add_to_sync(p,
							  syncobj_deps[i].handle,
							  syncobj_deps[i].point,
							  syncobj_deps[i].flags);
		if (r)
			return r;
	}

	return 0;
}

static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p,
					     struct amdgpu_cs_chunk *chunk)
{
	struct drm_amdgpu_cs_chunk_sem *deps;
	unsigned num_deps;
	int i;
	struct drm_amdgpu_cs_chunk_sem *deps;

	deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
	num_deps = chunk->length_dw * 4 /
		sizeof(struct drm_amdgpu_cs_chunk_sem);

	p->post_dep_syncobjs = kmalloc_array(num_deps,
					     sizeof(struct drm_syncobj *),
	p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
				     GFP_KERNEL);
	p->num_post_deps = 0;

	if (!p->post_deps)
		return -ENOMEM;


	for (i = 0; i < num_deps; ++i) {
		p->post_deps[i].syncobj =
			drm_syncobj_find(p->filp, deps[i].handle);
		if (!p->post_deps[i].syncobj)
			return -EINVAL;
		p->post_deps[i].chain = NULL;
		p->post_deps[i].point = 0;
		p->num_post_deps++;
	}

	return 0;
}


static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p,
						      struct amdgpu_cs_chunk
						      *chunk)
{
	struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
	unsigned num_deps;
	int i;

	syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
	num_deps = chunk->length_dw * 4 /
		sizeof(struct drm_amdgpu_cs_chunk_syncobj);

	p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
				     GFP_KERNEL);
	p->num_post_dep_syncobjs = 0;
	p->num_post_deps = 0;

	if (!p->post_dep_syncobjs)
	if (!p->post_deps)
		return -ENOMEM;

	for (i = 0; i < num_deps; ++i) {
		p->post_dep_syncobjs[i] = drm_syncobj_find(p->filp, deps[i].handle);
		if (!p->post_dep_syncobjs[i])
		struct amdgpu_cs_post_dep *dep = &p->post_deps[i];

		dep->chain = NULL;
		if (syncobj_deps[i].point) {
			dep->chain = kmalloc(sizeof(*dep->chain), GFP_KERNEL);
			if (!dep->chain)
				return -ENOMEM;
		}

		dep->syncobj = drm_syncobj_find(p->filp,
						syncobj_deps[i].handle);
		if (!dep->syncobj) {
			kfree(dep->chain);
			return -EINVAL;
		p->num_post_dep_syncobjs++;
		}
		dep->point = syncobj_deps[i].point;
		p->num_post_deps++;
	}

	return 0;
}

@@ -1187,19 +1268,33 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,

		chunk = &p->chunks[i];

		if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES ||
		    chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {
		switch (chunk->chunk_id) {
		case AMDGPU_CHUNK_ID_DEPENDENCIES:
		case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
			r = amdgpu_cs_process_fence_dep(p, chunk);
			if (r)
				return r;
		} else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_IN) {
			break;
		case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
			r = amdgpu_cs_process_syncobj_in_dep(p, chunk);
			if (r)
				return r;
		} else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_OUT) {
			break;
		case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
			r = amdgpu_cs_process_syncobj_out_dep(p, chunk);
			if (r)
				return r;
			break;
		case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
			r = amdgpu_cs_process_syncobj_timeline_in_dep(p, chunk);
			if (r)
				return r;
			break;
		case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
			r = amdgpu_cs_process_syncobj_timeline_out_dep(p, chunk);
			if (r)
				return r;
			break;
		}
	}

@@ -1210,8 +1305,17 @@ static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
{
	int i;

	for (i = 0; i < p->num_post_dep_syncobjs; ++i)
		drm_syncobj_replace_fence(p->post_dep_syncobjs[i], p->fence);
	for (i = 0; i < p->num_post_deps; ++i) {
		if (p->post_deps[i].chain && p->post_deps[i].point) {
			drm_syncobj_add_point(p->post_deps[i].syncobj,
					      p->post_deps[i].chain,
					      p->fence, p->post_deps[i].point);
			p->post_deps[i].chain = NULL;
		} else {
			drm_syncobj_replace_fence(p->post_deps[i].syncobj,
						  p->fence);
		}
	}
}

static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
+8 −0
Original line number Diff line number Diff line
@@ -528,6 +528,8 @@ struct drm_amdgpu_gem_va {
#define AMDGPU_CHUNK_ID_SYNCOBJ_OUT     0x05
#define AMDGPU_CHUNK_ID_BO_HANDLES      0x06
#define AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES	0x07
#define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT    0x08
#define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL  0x09

struct drm_amdgpu_cs_chunk {
	__u32		chunk_id;
@@ -608,6 +610,12 @@ struct drm_amdgpu_cs_chunk_sem {
	__u32 handle;
};

struct drm_amdgpu_cs_chunk_syncobj {
       __u32 handle;
       __u32 flags;
       __u64 point;
};

#define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ	0
#define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD	1
#define AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD	2