Commit 068cae74 authored by Ben Skeggs's avatar Ben Skeggs
Browse files

drm/nouveau/gr/gf100-: calculate and use sm mapping table



There's a number of places that require this data, so let's separate out
the calculations to ensure they remain consistent.

This is incorrect for GM200 and newer, but will produce the same results
as we did before.

Signed-off-by: default avatarBen Skeggs <bskeggs@redhat.com>
parent d00ffc0c
Loading
Loading
Loading
Loading
+16 −24
Original line number Diff line number Diff line
@@ -1092,23 +1092,18 @@ gf100_grctx_generate_r4060a8(struct gf100_gr *gr)
	struct nvkm_device *device = gr->base.engine.subdev.device;
	const u8 gpcmax = nvkm_rd32(device, 0x022430);
	const u8 tpcmax = nvkm_rd32(device, 0x022434) * gpcmax;
	u8 tpcnr[GPC_MAX], data[TPC_MAX];
	int gpc, tpc, i;

	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
	memset(data, 0x1f, sizeof(data));

	gpc = -1;
	for (tpc = 0; tpc < gr->tpc_total; tpc++) {
		do {
			gpc = (gpc + 1) % gr->gpc_nr;
		} while (!tpcnr[gpc]);
		tpcnr[gpc]--;
		data[tpc] = gpc;
	}
	int i, j, sm = 0;
	u32 data;

	for (i = 0; i < DIV_ROUND_UP(tpcmax, 4); i++)
		nvkm_wr32(device, 0x4060a8 + (i * 4), ((u32 *)data)[i]);
	for (i = 0; i < DIV_ROUND_UP(tpcmax, 4); i++) {
		for (data = 0, j = 0; j < 4; j++) {
			if (sm < gr->sm_nr)
				data |= gr->sm[sm++].gpc << (j * 8);
			else
				data |= 0x1f << (j * 8);
		}
		nvkm_wr32(device, 0x4060a8 + (i * 4), data);
	}
}

void
@@ -1326,16 +1321,13 @@ gf100_grctx_generate_floorsweep(struct gf100_gr *gr)
{
	struct nvkm_device *device = gr->base.engine.subdev.device;
	const struct gf100_grctx_func *func = gr->func->grctx;
	int tpc, gpc, sm, i, j;
	int gpc, sm, i, j;
	u32 data;

	for (tpc = 0, sm = 0; tpc < gr->tpc_max; tpc++) {
		for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
			if (tpc < gr->tpc_nr[gpc])
				func->sm_id(gr, gpc, tpc, sm++);
	for (sm = 0; sm < gr->sm_nr; sm++) {
		func->sm_id(gr, gr->sm[sm].gpc, gr->sm[sm].tpc, sm);
		if (func->tpc_nr)
				func->tpc_nr(gr, gpc);
		}
			func->tpc_nr(gr, gr->sm[sm].gpc);
	}

	for (gpc = 0, i = 0; i < 4; i++) {
+6 −16
Original line number Diff line number Diff line
@@ -49,23 +49,13 @@ gm200_grctx_generate_smid_config(struct gf100_gr *gr)
	const u32 dist_nr = DIV_ROUND_UP(gr->tpc_total, 4);
	u32 dist[TPC_MAX / 4] = {};
	u32 gpcs[GPC_MAX] = {};
	u8  tpcnr[GPC_MAX];
	int tpc, gpc, i;
	u8  sm, i;

	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));

	/* won't result in the same distribution as the binary driver where
	 * some of the gpcs have more tpcs than others, but this shall do
	 * for the moment.  the code for earlier gpus has this issue too.
	 */
	for (gpc = -1, i = 0; i < gr->tpc_total; i++) {
		do {
			gpc = (gpc + 1) % gr->gpc_nr;
		} while(!tpcnr[gpc]);
		tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;

		dist[i / 4] |= ((gpc << 4) | tpc) << ((i % 4) * 8);
		gpcs[gpc] |= i << (tpc * 8);
	for (sm = 0; sm < gr->sm_nr; sm++) {
		const u8 gpc = gr->sm[sm].gpc;
		const u8 tpc = gr->sm[sm].tpc;
		dist[sm / 4] |= ((gpc << 4) | tpc) << ((sm % 4) * 8);
		gpcs[gpc] |= sm << (tpc * 8);
	}

	for (i = 0; i < dist_nr; i++)
+6 −16
Original line number Diff line number Diff line
@@ -95,23 +95,13 @@ gp100_grctx_generate_smid_config(struct gf100_gr *gr)
	struct nvkm_device *device = gr->base.engine.subdev.device;
	const u32 dist_nr = DIV_ROUND_UP(gr->tpc_total, 4);
	u32 dist[TPC_MAX / 4] = {}, gpcs[16] = {};
	u8  tpcnr[GPC_MAX];
	int tpc, gpc, i;
	u8  sm, i;

	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));

	/* won't result in the same distribution as the binary driver where
	 * some of the gpcs have more tpcs than others, but this shall do
	 * for the moment.  the code for earlier gpus has this issue too.
	 */
	for (gpc = -1, i = 0; i < gr->tpc_total; i++) {
		do {
			gpc = (gpc + 1) % gr->gpc_nr;
		} while(!tpcnr[gpc]);
		tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;

		dist[i / 4] |= ((gpc << 4) | tpc) << ((i % 4) * 8);
		gpcs[gpc + (gr->func->gpc_nr * (tpc / 4))] |= i << (tpc * 8);
	for (sm = 0; sm < gr->sm_nr; sm++) {
		const u8 gpc = gr->sm[sm].gpc;
		const u8 tpc = gr->sm[sm].tpc;
		dist[sm / 4] |= ((gpc << 4) | tpc) << ((sm % 4) * 8);
		gpcs[gpc + (gr->func->gpc_nr * (tpc / 4))] |= sm << ((tpc % 4) * 8);
	}

	for (i = 0; i < dist_nr; i++)
+17 −0
Original line number Diff line number Diff line
@@ -1652,6 +1652,21 @@ gf100_gr_init_ctxctl(struct gf100_gr *gr)
	return ret;
}

void
gf100_gr_oneinit_sm_id(struct gf100_gr *gr)
{
	int tpc, gpc;
	for (tpc = 0; tpc < gr->tpc_max; tpc++) {
		for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
			if (tpc < gr->tpc_nr[gpc]) {
				gr->sm[gr->sm_nr].gpc = gpc;
				gr->sm[gr->sm_nr].tpc = tpc;
				gr->sm_nr++;
			}
		}
	}
}

void
gf100_gr_oneinit_tiles(struct gf100_gr *gr)
{
@@ -1769,6 +1784,7 @@ gf100_gr_oneinit(struct nvkm_gr *base)

	memset(gr->tile, 0xff, sizeof(gr->tile));
	gr->func->oneinit_tiles(gr);
	gr->func->oneinit_sm_id(gr);
	return 0;
}

@@ -2204,6 +2220,7 @@ gf100_gr_gpccs_ucode = {
static const struct gf100_gr_func
gf100_gr = {
	.oneinit_tiles = gf100_gr_oneinit_tiles,
	.oneinit_sm_id = gf100_gr_oneinit_sm_id,
	.init = gf100_gr_init,
	.init_gpc_mmu = gf100_gr_init_gpc_mmu,
	.init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
+9 −0
Original line number Diff line number Diff line
@@ -110,6 +110,12 @@ struct gf100_gr {
	u8 screen_tile_row_offset;
	u8 tile[TPC_MAX];

	struct {
		u8 gpc;
		u8 tpc;
	} sm[TPC_MAX];
	u8 sm_nr;

	struct gf100_gr_data mmio_data[4];
	struct gf100_gr_mmio mmio_list[4096/8];
	u32  size;
@@ -125,6 +131,7 @@ void *gf100_gr_dtor(struct nvkm_gr *);
struct gf100_gr_func {
	void (*dtor)(struct gf100_gr *);
	void (*oneinit_tiles)(struct gf100_gr *);
	void (*oneinit_sm_id)(struct gf100_gr *);
	int (*init)(struct gf100_gr *);
	void (*init_gpc_mmu)(struct gf100_gr *);
	void (*init_r405a14)(struct gf100_gr *);
@@ -167,6 +174,7 @@ struct gf100_gr_func {

int gf100_gr_rops(struct gf100_gr *);
void gf100_gr_oneinit_tiles(struct gf100_gr *);
void gf100_gr_oneinit_sm_id(struct gf100_gr *);
int gf100_gr_init(struct gf100_gr *);
void gf100_gr_init_vsc_stream_master(struct gf100_gr *);
void gf100_gr_init_zcull(struct gf100_gr *);
@@ -195,6 +203,7 @@ void gm107_gr_init_400054(struct gf100_gr *);
int gk20a_gr_init(struct gf100_gr *);

void gm200_gr_oneinit_tiles(struct gf100_gr *);
void gm200_gr_oneinit_sm_id(struct gf100_gr *);
int gm200_gr_rops(struct gf100_gr *);
void gm200_gr_init_num_active_ltcs(struct gf100_gr *);
void gm200_gr_init_ds_hww_esr_2(struct gf100_gr *);
Loading