Commit 2fb7487a authored by Jordan Crouse's avatar Jordan Crouse Committed by Rob Clark
Browse files

drm/msm: Get rid of the REG_ADRENO offsets



As newer GPU families are added it makes less sense to maintain a
"generic" version functions for older families. Move adreno_submit()
and get_rptr() into the target specific code for a2xx, a3xx and a4xx.
Add a parameter to adreno_flush to pass the target specific WPTR register
instead of relying on the generic register.

All of this gets rid of the last of the REG_ADRENO offsets so remove all
all the register definitions and infrastructure.

Signed-off-by: default avatarJordan Crouse <jcrouse@codeaurora.org>
Signed-off-by: default avatarRob Clark <robdclark@chromium.org>
parent d3a569fc
Loading
Loading
Loading
Loading
+50 −15
Original line number Diff line number Diff line
@@ -10,6 +10,48 @@ extern bool hang_debug;
static void a2xx_dump(struct msm_gpu *gpu);
static bool a2xx_idle(struct msm_gpu *gpu);

static void a2xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
{
	struct msm_drm_private *priv = gpu->dev->dev_private;
	struct msm_ringbuffer *ring = submit->ring;
	unsigned int i;

	for (i = 0; i < submit->nr_cmds; i++) {
		switch (submit->cmd[i].type) {
		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
			/* ignore IB-targets */
			break;
		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
			/* ignore if there has not been a ctx switch: */
			if (priv->lastctx == submit->queue->ctx)
				break;
			fallthrough;
		case MSM_SUBMIT_CMD_BUF:
			OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFD, 2);
			OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
			OUT_RING(ring, submit->cmd[i].size);
			OUT_PKT2(ring);
			break;
		}
	}

	OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1);
	OUT_RING(ring, submit->seqno);

	/* wait for idle before cache flush/interrupt */
	OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT3(ring, CP_EVENT_WRITE, 3);
	OUT_RING(ring, CACHE_FLUSH_TS);
	OUT_RING(ring, rbmemptr(ring, fence));
	OUT_RING(ring, submit->seqno);
	OUT_PKT3(ring, CP_INTERRUPT, 1);
	OUT_RING(ring, 0x80000000);

	adreno_flush(gpu, ring, REG_AXXX_CP_RB_WPTR);
}

static bool a2xx_me_init(struct msm_gpu *gpu)
{
	struct msm_ringbuffer *ring = gpu->rb[0];
@@ -53,7 +95,7 @@ static bool a2xx_me_init(struct msm_gpu *gpu)
	OUT_PKT3(ring, CP_SET_PROTECTED_MODE, 1);
	OUT_RING(ring, 1);

	gpu->funcs->flush(gpu, ring);
	adreno_flush(gpu, ring, REG_AXXX_CP_RB_WPTR);
	return a2xx_idle(gpu);
}

@@ -421,16 +463,11 @@ a2xx_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev)
	return aspace;
}

/* Register offset defines for A2XX - copy of A3XX */
static const unsigned int a2xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_AXXX_CP_RB_BASE),
	REG_ADRENO_SKIP(REG_ADRENO_CP_RB_BASE_HI),
	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_AXXX_CP_RB_RPTR_ADDR),
	REG_ADRENO_SKIP(REG_ADRENO_CP_RB_RPTR_ADDR_HI),
	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_AXXX_CP_RB_RPTR),
	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_AXXX_CP_RB_WPTR),
	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_AXXX_CP_RB_CNTL),
};
static u32 a2xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
{
	ring->memptrs->rptr = gpu_read(gpu, REG_AXXX_CP_RB_RPTR);
	return ring->memptrs->rptr;
}

static const struct adreno_gpu_funcs funcs = {
	.base = {
@@ -439,8 +476,7 @@ static const struct adreno_gpu_funcs funcs = {
		.pm_suspend = msm_gpu_pm_suspend,
		.pm_resume = msm_gpu_pm_resume,
		.recover = a2xx_recover,
		.submit = adreno_submit,
		.flush = adreno_flush,
		.submit = a2xx_submit,
		.active_ring = adreno_active_ring,
		.irq = a2xx_irq,
		.destroy = a2xx_destroy,
@@ -450,6 +486,7 @@ static const struct adreno_gpu_funcs funcs = {
		.gpu_state_get = a2xx_gpu_state_get,
		.gpu_state_put = adreno_gpu_state_put,
		.create_address_space = a2xx_create_address_space,
		.get_rptr = a2xx_get_rptr,
	},
};

@@ -491,8 +528,6 @@ struct msm_gpu *a2xx_gpu_init(struct drm_device *dev)
	else
		adreno_gpu->registers = a220_registers;

	adreno_gpu->reg_offsets = a2xx_register_offsets;

	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
	if (ret)
		goto fail;
+63 −14
Original line number Diff line number Diff line
@@ -28,6 +28,61 @@ extern bool hang_debug;
static void a3xx_dump(struct msm_gpu *gpu);
static bool a3xx_idle(struct msm_gpu *gpu);

static void a3xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
{
	struct msm_drm_private *priv = gpu->dev->dev_private;
	struct msm_ringbuffer *ring = submit->ring;
	unsigned int i;

	for (i = 0; i < submit->nr_cmds; i++) {
		switch (submit->cmd[i].type) {
		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
			/* ignore IB-targets */
			break;
		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
			/* ignore if there has not been a ctx switch: */
			if (priv->lastctx == submit->queue->ctx)
				break;
			fallthrough;
		case MSM_SUBMIT_CMD_BUF:
			OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFD, 2);
			OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
			OUT_RING(ring, submit->cmd[i].size);
			OUT_PKT2(ring);
			break;
		}
	}

	OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1);
	OUT_RING(ring, submit->seqno);

	/* Flush HLSQ lazy updates to make sure there is nothing
	 * pending for indirect loads after the timestamp has
	 * passed:
	 */
	OUT_PKT3(ring, CP_EVENT_WRITE, 1);
	OUT_RING(ring, HLSQ_FLUSH);

	/* wait for idle before cache flush/interrupt */
	OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
	OUT_RING(ring, 0x00000000);

	/* BIT(31) of CACHE_FLUSH_TS triggers CACHE_FLUSH_TS IRQ from GPU */
	OUT_PKT3(ring, CP_EVENT_WRITE, 3);
	OUT_RING(ring, CACHE_FLUSH_TS | BIT(31));
	OUT_RING(ring, rbmemptr(ring, fence));
	OUT_RING(ring, submit->seqno);

#if 0
	/* Dummy set-constant to trigger context rollover */
	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
	OUT_RING(ring, CP_REG(REG_A3XX_HLSQ_CL_KERNEL_GROUP_X_REG));
	OUT_RING(ring, 0x00000000);
#endif

	adreno_flush(gpu, ring, REG_AXXX_CP_RB_WPTR);
}

static bool a3xx_me_init(struct msm_gpu *gpu)
{
	struct msm_ringbuffer *ring = gpu->rb[0];
@@ -51,7 +106,7 @@ static bool a3xx_me_init(struct msm_gpu *gpu)
	OUT_RING(ring, 0x00000000);
	OUT_RING(ring, 0x00000000);

	gpu->funcs->flush(gpu, ring);
	adreno_flush(gpu, ring, REG_AXXX_CP_RB_WPTR);
	return a3xx_idle(gpu);
}

@@ -423,16 +478,11 @@ static struct msm_gpu_state *a3xx_gpu_state_get(struct msm_gpu *gpu)
	return state;
}

/* Register offset defines for A3XX */
static const unsigned int a3xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_AXXX_CP_RB_BASE),
	REG_ADRENO_SKIP(REG_ADRENO_CP_RB_BASE_HI),
	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_AXXX_CP_RB_RPTR_ADDR),
	REG_ADRENO_SKIP(REG_ADRENO_CP_RB_RPTR_ADDR_HI),
	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_AXXX_CP_RB_RPTR),
	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_AXXX_CP_RB_WPTR),
	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_AXXX_CP_RB_CNTL),
};
static u32 a3xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
{
	ring->memptrs->rptr = gpu_read(gpu, REG_AXXX_CP_RB_RPTR);
	return ring->memptrs->rptr;
}

static const struct adreno_gpu_funcs funcs = {
	.base = {
@@ -441,8 +491,7 @@ static const struct adreno_gpu_funcs funcs = {
		.pm_suspend = msm_gpu_pm_suspend,
		.pm_resume = msm_gpu_pm_resume,
		.recover = a3xx_recover,
		.submit = adreno_submit,
		.flush = adreno_flush,
		.submit = a3xx_submit,
		.active_ring = adreno_active_ring,
		.irq = a3xx_irq,
		.destroy = a3xx_destroy,
@@ -452,6 +501,7 @@ static const struct adreno_gpu_funcs funcs = {
		.gpu_state_get = a3xx_gpu_state_get,
		.gpu_state_put = adreno_gpu_state_put,
		.create_address_space = adreno_iommu_create_address_space,
		.get_rptr = a3xx_get_rptr,
	},
};

@@ -490,7 +540,6 @@ struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
	gpu->num_perfcntrs = ARRAY_SIZE(perfcntrs);

	adreno_gpu->registers = a3xx_registers;
	adreno_gpu->reg_offsets = a3xx_register_offsets;

	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
	if (ret)
+61 −21
Original line number Diff line number Diff line
@@ -22,6 +22,54 @@ extern bool hang_debug;
static void a4xx_dump(struct msm_gpu *gpu);
static bool a4xx_idle(struct msm_gpu *gpu);

static void a4xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
{
	struct msm_drm_private *priv = gpu->dev->dev_private;
	struct msm_ringbuffer *ring = submit->ring;
	unsigned int i;

	for (i = 0; i < submit->nr_cmds; i++) {
		switch (submit->cmd[i].type) {
		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
			/* ignore IB-targets */
			break;
		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
			/* ignore if there has not been a ctx switch: */
			if (priv->lastctx == submit->queue->ctx)
				break;
			fallthrough;
		case MSM_SUBMIT_CMD_BUF:
			OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFE, 2);
			OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
			OUT_RING(ring, submit->cmd[i].size);
			OUT_PKT2(ring);
			break;
		}
	}

	OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1);
	OUT_RING(ring, submit->seqno);

	/* Flush HLSQ lazy updates to make sure there is nothing
	 * pending for indirect loads after the timestamp has
	 * passed:
	 */
	OUT_PKT3(ring, CP_EVENT_WRITE, 1);
	OUT_RING(ring, HLSQ_FLUSH);

	/* wait for idle before cache flush/interrupt */
	OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
	OUT_RING(ring, 0x00000000);

	/* BIT(31) of CACHE_FLUSH_TS triggers CACHE_FLUSH_TS IRQ from GPU */
	OUT_PKT3(ring, CP_EVENT_WRITE, 3);
	OUT_RING(ring, CACHE_FLUSH_TS | BIT(31));
	OUT_RING(ring, rbmemptr(ring, fence));
	OUT_RING(ring, submit->seqno);

	adreno_flush(gpu, ring, REG_A4XX_CP_RB_WPTR);
}

/*
 * a4xx_enable_hwcg() - Program the clock control registers
 * @device: The adreno device pointer
@@ -129,7 +177,7 @@ static bool a4xx_me_init(struct msm_gpu *gpu)
	OUT_RING(ring, 0x00000000);
	OUT_RING(ring, 0x00000000);

	gpu->funcs->flush(gpu, ring);
	adreno_flush(gpu, ring, REG_A4XX_CP_RB_WPTR);
	return a4xx_idle(gpu);
}

@@ -515,17 +563,6 @@ static struct msm_gpu_state *a4xx_gpu_state_get(struct msm_gpu *gpu)
	return state;
}

/* Register offset defines for A4XX, in order of enum adreno_regs */
static const unsigned int a4xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A4XX_CP_RB_BASE),
	REG_ADRENO_SKIP(REG_ADRENO_CP_RB_BASE_HI),
	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A4XX_CP_RB_RPTR_ADDR),
	REG_ADRENO_SKIP(REG_ADRENO_CP_RB_RPTR_ADDR_HI),
	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A4XX_CP_RB_RPTR),
	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A4XX_CP_RB_WPTR),
	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A4XX_CP_RB_CNTL),
};

static void a4xx_dump(struct msm_gpu *gpu)
{
	printk("status:   %08x\n",
@@ -576,6 +613,12 @@ static int a4xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
	return 0;
}

static u32 a4xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
{
	ring->memptrs->rptr = gpu_read(gpu, REG_A4XX_CP_RB_RPTR);
	return ring->memptrs->rptr;
}

static const struct adreno_gpu_funcs funcs = {
	.base = {
		.get_param = adreno_get_param,
@@ -583,8 +626,7 @@ static const struct adreno_gpu_funcs funcs = {
		.pm_suspend = a4xx_pm_suspend,
		.pm_resume = a4xx_pm_resume,
		.recover = a4xx_recover,
		.submit = adreno_submit,
		.flush = adreno_flush,
		.submit = a4xx_submit,
		.active_ring = adreno_active_ring,
		.irq = a4xx_irq,
		.destroy = a4xx_destroy,
@@ -594,6 +636,7 @@ static const struct adreno_gpu_funcs funcs = {
		.gpu_state_get = a4xx_gpu_state_get,
		.gpu_state_put = adreno_gpu_state_put,
		.create_address_space = adreno_iommu_create_address_space,
		.get_rptr = a4xx_get_rptr,
	},
	.get_timestamp = a4xx_get_timestamp,
};
@@ -631,15 +674,12 @@ struct msm_gpu *a4xx_gpu_init(struct drm_device *dev)

	adreno_gpu->registers = adreno_is_a405(adreno_gpu) ? a405_registers :
							     a4xx_registers;
	adreno_gpu->reg_offsets = a4xx_register_offsets;

	/* if needed, allocate gmem: */
	if (adreno_is_a4xx(adreno_gpu)) {
	ret = adreno_gpu_ocmem_init(dev->dev, adreno_gpu,
				    &a4xx_gpu->ocmem);
	if (ret)
		goto fail;
	}

	if (!gpu->aspace) {
		/* TODO we think it is possible to configure the GPU to
+0 −12
Original line number Diff line number Diff line
@@ -1121,17 +1121,6 @@ static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
	return IRQ_HANDLED;
}

static const u32 a5xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A5XX_CP_RB_BASE),
	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A5XX_CP_RB_BASE_HI),
	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A5XX_CP_RB_RPTR_ADDR),
	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI,
		REG_A5XX_CP_RB_RPTR_ADDR_HI),
	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A5XX_CP_RB_RPTR),
	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A5XX_CP_RB_WPTR),
	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A5XX_CP_RB_CNTL),
};

static const u32 a5xx_registers[] = {
	0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
	0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
@@ -1587,7 +1576,6 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
	gpu = &adreno_gpu->base;

	adreno_gpu->registers = a5xx_registers;
	adreno_gpu->reg_offsets = a5xx_register_offsets;

	a5xx_gpu->lm_leakage = 0x4E001A;

+0 −13
Original line number Diff line number Diff line
@@ -1022,18 +1022,6 @@ static irqreturn_t a6xx_irq(struct msm_gpu *gpu)
	return IRQ_HANDLED;
}

static const u32 a6xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A6XX_CP_RB_BASE),
	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A6XX_CP_RB_BASE_HI),
	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR,
		REG_A6XX_CP_RB_RPTR_ADDR_LO),
	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI,
		REG_A6XX_CP_RB_RPTR_ADDR_HI),
	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A6XX_CP_RB_RPTR),
	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A6XX_CP_RB_WPTR),
	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A6XX_CP_RB_CNTL),
};

static int a6xx_pm_resume(struct msm_gpu *gpu)
{
	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
@@ -1208,7 +1196,6 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
	gpu = &adreno_gpu->base;

	adreno_gpu->registers = NULL;
	adreno_gpu->reg_offsets = a6xx_register_offsets;

	if (adreno_is_a650(adreno_gpu))
		adreno_gpu->base.hw_apriv = true;
Loading