Commit e33fec48 authored by Andres Rodriguez's avatar Andres Rodriguez Committed by Alex Deucher
Browse files

drm/amdgpu: allocate queues horizontally across pipes



Pipes provide better concurrency than queues, therefore we want to make
sure that apps use queues from different pipes whenever possible.

Optimize for the trivial case where an app will consume rings in order,
therefore we don't want adjacent rings to belong to the same pipe.

Reviewed-by: default avatarEdward O'Callaghan <funfunctor@folklore1984.net>
Acked-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Acked-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarAndres Rodriguez <andresx7@gmail.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 5e709562
Loading
Loading
Loading
Loading
+13 −0
Original line number Diff line number Diff line
@@ -1803,6 +1803,19 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
		return NULL;
}

static inline bool amdgpu_is_mec_queue_enabled(struct amdgpu_device *adev,
						int mec, int pipe, int queue)
{
	int bit = 0;

	bit += mec * adev->gfx.mec.num_pipe_per_mec
		* adev->gfx.mec.num_queue_per_pipe;
	bit += pipe * adev->gfx.mec.num_queue_per_pipe;
	bit += queue;

	return test_bit(bit, adev->gfx.mec.queue_bitmap);
}

/*
 * ASICs macro.
 */
+49 −34
Original line number Diff line number Diff line
@@ -4752,11 +4752,42 @@ static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev)
	adev->gfx.config.gb_addr_config = gb_addr_config;
}

static int gfx_v7_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
					int mec, int pipe, int queue)
{
	int r;
	unsigned irq_type;
	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];

	/* mec0 is me1 */
	ring->me = mec + 1;
	ring->pipe = pipe;
	ring->queue = queue;

	ring->ring_obj = NULL;
	ring->use_doorbell = true;
	ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);

	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
		+ ring->pipe;

	/* type-2 packets are deprecated on MEC, use type-3 instead */
	r = amdgpu_ring_init(adev, ring, 1024,
			&adev->gfx.eop_irq, irq_type);
	if (r)
		return r;


	return 0;
}

static int gfx_v7_0_sw_init(void *handle)
{
	struct amdgpu_ring *ring;
	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
	int i, r, ring_id;
	int i, j, k, r, ring_id;

	/* EOP Event */
	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
@@ -4806,40 +4837,24 @@ static int gfx_v7_0_sw_init(void *handle)
			return r;
	}

	/* set up the compute queues */
	for (i = 0, ring_id = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; i++) {
		unsigned irq_type;

		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
	/* set up the compute queues - allocate horizontally across pipes */
	ring_id = 0;
	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
				if (!amdgpu_is_mec_queue_enabled(adev, i, k, j))
					continue;

		ring = &adev->gfx.compute_ring[ring_id];

		/* mec0 is me1 */
		ring->me = ((i / adev->gfx.mec.num_queue_per_pipe)
				/ adev->gfx.mec.num_pipe_per_mec)
				+ 1;
		ring->pipe = (i / adev->gfx.mec.num_queue_per_pipe)
				% adev->gfx.mec.num_pipe_per_mec;
		ring->queue = i % adev->gfx.mec.num_queue_per_pipe;

		ring->ring_obj = NULL;
		ring->use_doorbell = true;
		ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
		sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);

		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
			+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
			+ ring->pipe;

		/* type-2 packets are deprecated on MEC, use type-3 instead */
		r = amdgpu_ring_init(adev, ring, 1024,
				     &adev->gfx.eop_irq, irq_type);
				r = gfx_v7_0_compute_ring_init(adev,
								ring_id,
								i, k, j);
				if (r)
					return r;

				ring_id++;
			}
		}
	}

	/* reserve GDS, GWS and OA resource for gfx */
	r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
+51 −35
Original line number Diff line number Diff line
@@ -2139,9 +2139,44 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
	return 0;
}

static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
					int mec, int pipe, int queue)
{
	int r;
	unsigned irq_type;
	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];

	ring = &adev->gfx.compute_ring[ring_id];

	/* mec0 is me1 */
	ring->me = mec + 1;
	ring->pipe = pipe;
	ring->queue = queue;

	ring->ring_obj = NULL;
	ring->use_doorbell = true;
	ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
				+ (ring_id * GFX8_MEC_HPD_SIZE);
	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);

	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
		+ ring->pipe;

	/* type-2 packets are deprecated on MEC, use type-3 instead */
	r = amdgpu_ring_init(adev, ring, 1024,
			&adev->gfx.eop_irq, irq_type);
	if (r)
		return r;


	return 0;
}

static int gfx_v8_0_sw_init(void *handle)
{
	int i, r, ring_id;
	int i, j, k, r, ring_id;
	struct amdgpu_ring *ring;
	struct amdgpu_kiq *kiq;
	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -2207,44 +2242,25 @@ static int gfx_v8_0_sw_init(void *handle)
			return r;
	}

	/* set up the compute queues */
	for (i = 0, ring_id = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; i++) {
		unsigned irq_type;

		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
	/* set up the compute queues - allocate horizontally across pipes */
	ring_id = 0;
	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
				if (!amdgpu_is_mec_queue_enabled(adev, i, k, j))
					continue;

		if (WARN_ON(ring_id >= AMDGPU_MAX_COMPUTE_RINGS))
			break;

		ring = &adev->gfx.compute_ring[ring_id];

		/* mec0 is me1 */
		ring->me = ((i / adev->gfx.mec.num_queue_per_pipe)
				/ adev->gfx.mec.num_pipe_per_mec)
				+ 1;
		ring->pipe = (i / adev->gfx.mec.num_queue_per_pipe)
				% adev->gfx.mec.num_pipe_per_mec;
		ring->queue = i % adev->gfx.mec.num_queue_per_pipe;

		ring->ring_obj = NULL;
		ring->use_doorbell = true;
		ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id * GFX8_MEC_HPD_SIZE);
		ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
		sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);

		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
			+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
			+ ring->pipe;

		/* type-2 packets are deprecated on MEC, use type-3 instead */
		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
				     irq_type);
				r = gfx_v8_0_compute_ring_init(adev,
								ring_id,
								i, k, j);
				if (r)
					return r;

				ring_id++;
			}
		}
	}

	r = gfx_v8_0_kiq_init(adev);
	if (r) {