Commit f77c9aff authored by Huang Rui's avatar Huang Rui Committed by Alex Deucher
Browse files

drm/amdgpu: Fix per-IB secure flag GFX hang



Since commit "Move to a per-IB secure flag (TMZ)",
we've been seeing hangs in GFX. We need to send
FRAME CONTROL stop/start back-to-back, every time
we flip the TMZ flag. That is, when we transition
from TMZ to non-TMZ we have to send a stop with
TMZ followed by a start with non-TMZ, and
similarly for transitioning from non-TMZ into TMZ.

This patch implements this, thus fixing the GFX
hang.

v1 -> v2:
As suggested by Luben, and accept part of implemetation from this patch:
- Put "secure" closed to the loop and use optimization
- Change "secure" to bool again, and move "secure == -1" out of loop.
v3: Small fixes/optimizations.

Reported-and-Tested-by: default avatarPierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Signed-off-by: default avatarHuang Rui <ray.huang@amd.com>
Reviewed-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Signed-off-by: default avatarLuben Tuikov <luben.tuikov@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent b71a564e
Loading
Loading
Loading
Loading
+14 −14
Original line number Diff line number Diff line
@@ -218,7 +218,14 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
		amdgpu_ring_emit_cntxcntl(ring, status);
	}

	/* Setup initial TMZiness and send it off.
	 */
	secure = false;
	if (job && ring->funcs->emit_frame_cntl) {
		secure = ib->flags & AMDGPU_IB_FLAGS_SECURE;
		amdgpu_ring_emit_frame_cntl(ring, true, secure);
	}

	for (i = 0; i < num_ibs; ++i) {
		ib = &ibs[i];

@@ -230,27 +237,20 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
		    !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
			continue;

		/* If this IB is TMZ, add frame TMZ start packet,
		 * else, turn off TMZ.
		 */
		if (ib->flags & AMDGPU_IB_FLAGS_SECURE && ring->funcs->emit_tmz) {
			if (!secure) {
				secure = true;
				amdgpu_ring_emit_tmz(ring, true);
		if (job && ring->funcs->emit_frame_cntl) {
			if (secure != !!(ib->flags & AMDGPU_IB_FLAGS_SECURE)) {
				amdgpu_ring_emit_frame_cntl(ring, false, secure);
				secure = !secure;
				amdgpu_ring_emit_frame_cntl(ring, true, secure);
			}
		} else if (secure) {
			secure = false;
			amdgpu_ring_emit_tmz(ring, false);
		}

		amdgpu_ring_emit_ib(ring, job, ib, status);
		status &= ~AMDGPU_HAVE_CTX_SWITCH;
	}

	if (secure) {
		secure = false;
		amdgpu_ring_emit_tmz(ring, false);
	}
	if (job && ring->funcs->emit_frame_cntl)
		amdgpu_ring_emit_frame_cntl(ring, false, secure);

#ifdef CONFIG_X86_64
	if (!(adev->flags & AMD_IS_APU))
+3 −2
Original line number Diff line number Diff line
@@ -177,7 +177,8 @@ struct amdgpu_ring_funcs {
	void (*emit_reg_write_reg_wait)(struct amdgpu_ring *ring,
					uint32_t reg0, uint32_t reg1,
					uint32_t ref, uint32_t mask);
	void (*emit_tmz)(struct amdgpu_ring *ring, bool start);
	void (*emit_frame_cntl)(struct amdgpu_ring *ring, bool start,
				bool secure);
	/* Try to soft recover the ring to make the fence signal */
	void (*soft_recovery)(struct amdgpu_ring *ring, unsigned vmid);
	int (*preempt_ib)(struct amdgpu_ring *ring);
@@ -256,7 +257,7 @@ struct amdgpu_ring {
#define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
#define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
#define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
#define amdgpu_ring_emit_frame_cntl(r, b, s) (r)->funcs->emit_frame_cntl((r), (b), (s))
#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
+8 −7
Original line number Diff line number Diff line
@@ -3037,7 +3037,7 @@ static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev);
static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume);
static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start);
static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);

static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
{
@@ -7599,12 +7599,13 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
					   sizeof(de_payload) >> 2);
}

static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
				    bool secure)
{
	if (amdgpu_is_tmz(ring->adev)) {
	uint32_t v = secure ? FRAME_TMZ : 0;

	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
		amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1));
	}
	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
}

static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
@@ -8058,7 +8059,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
	.init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec,
	.patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec,
	.preempt_ib = gfx_v10_0_ring_preempt_ib,
	.emit_tmz = gfx_v10_0_ring_emit_tmz,
	.emit_frame_cntl = gfx_v10_0_ring_emit_frame_cntl,
	.emit_wreg = gfx_v10_0_ring_emit_wreg,
	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
+7 −6
Original line number Diff line number Diff line
@@ -5442,12 +5442,13 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
}

static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
				   bool secure)
{
	if (amdgpu_is_tmz(ring->adev)) {
	uint32_t v = secure ? FRAME_TMZ : 0;

	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
		amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1));
	}
	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
}

static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
@@ -6699,7 +6700,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
	.emit_tmz = gfx_v9_0_ring_emit_tmz,
	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
	.emit_wreg = gfx_v9_0_ring_emit_wreg,
	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,