Commit fdf83646 authored by Marek Olšák's avatar Marek Olšák Committed by Alex Deucher
Browse files

drm/amdgpu: invalidate L2 before SDMA IBs (v2)



This fixes GPU hangs due to cache coherency issues.

v2: Split the version bump to a separate patch

Signed-off-by: default avatarMarek Olšák <marek.olsak@amd.com>
Reviewed-by: default avatarChristian König <christian.koenig@amd.com>
Tested-by: default avatarPierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
parent c938628c
Loading
Loading
Loading
Loading
+16 −0
Original line number Diff line number Diff line
@@ -73,6 +73,22 @@
#define SDMA_OP_AQL_COPY  0
#define SDMA_OP_AQL_BARRIER_OR  0

#define SDMA_GCR_RANGE_IS_PA		(1 << 18)
#define SDMA_GCR_SEQ(x)			(((x) & 0x3) << 16)
#define SDMA_GCR_GL2_WB			(1 << 15)
#define SDMA_GCR_GL2_INV		(1 << 14)
#define SDMA_GCR_GL2_DISCARD		(1 << 13)
#define SDMA_GCR_GL2_RANGE(x)		(((x) & 0x3) << 11)
#define SDMA_GCR_GL2_US			(1 << 10)
#define SDMA_GCR_GL1_INV		(1 << 9)
#define SDMA_GCR_GLV_INV		(1 << 8)
#define SDMA_GCR_GLK_INV		(1 << 7)
#define SDMA_GCR_GLK_WB			(1 << 6)
#define SDMA_GCR_GLM_INV		(1 << 5)
#define SDMA_GCR_GLM_WB			(1 << 4)
#define SDMA_GCR_GL1_RANGE(x)		(((x) & 0x3) << 2)
#define SDMA_GCR_GLI_INV(x)		(((x) & 0x3) << 0)

/*define for op field*/
#define SDMA_PKT_HEADER_op_offset 0
#define SDMA_PKT_HEADER_op_mask   0x000000FF
+13 −1
Original line number Diff line number Diff line
@@ -382,6 +382,18 @@ static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring *ring,
	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
	uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid);

	/* Invalidate L2, because if we don't do it, we might get stale cache
	 * lines from previous IBs.
	 */
	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_GCR_REQ));
	amdgpu_ring_write(ring, 0);
	amdgpu_ring_write(ring, (SDMA_GCR_GL2_INV |
				 SDMA_GCR_GL2_WB |
				 SDMA_GCR_GLM_INV |
				 SDMA_GCR_GLM_WB) << 16);
	amdgpu_ring_write(ring, 0xffffff80);
	amdgpu_ring_write(ring, 0xffff);

	/* An IB packet must end on a 8 DW boundary--the next dword
	 * must be on a 8-dword boundary. Our IB packet below is 6
	 * dwords long, thus add x number of NOPs, such that, in
@@ -1595,7 +1607,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 +
		10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */
	.emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */
	.emit_ib_size = 5 + 7 + 6, /* sdma_v5_0_ring_emit_ib */
	.emit_ib = sdma_v5_0_ring_emit_ib,
	.emit_fence = sdma_v5_0_ring_emit_fence,
	.emit_pipeline_sync = sdma_v5_0_ring_emit_pipeline_sync,