Commit d0b63bb3 authored by Andres Rodriguez's avatar Andres Rodriguez Committed by Alex Deucher
Browse files

drm/amdkfd: allow split HQD on per-queue granularity v5



Update the KGD to KFD interface to allow sharing pipes with queue
granularity instead of pipe granularity.

This allows for more interesting pipe/queue splits.

v2: fix overflow check for res.queue_mask
v3: fix shift overflow when setting res.queue_mask
v4: fix comment in is_pipeline_enabled()
v5: clamp res.queue_mask to the first MEC only

Reviewed-by: default avatarEdward O'Callaghan <funfunctor@folklore1984.net>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Acked-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarAndres Rodriguez <andresx7@gmail.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 763a47b8
Loading
Loading
Loading
Loading
+19 −3
Original line number Diff line number Diff line
@@ -95,14 +95,30 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)

void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
{
	int i;
	int last_valid_bit;
	if (adev->kfd) {
		struct kgd2kfd_shared_resources gpu_resources = {
			.compute_vmid_bitmap = 0xFF00,

			.first_compute_pipe = 1,
			.compute_pipe_count = 4 - 1,
			.num_mec = adev->gfx.mec.num_mec,
			.num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec,
			.num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe
		};

		/* this is going to have a few of the MSBs set that we need to
		 * clear */
		bitmap_complement(gpu_resources.queue_bitmap,
				  adev->gfx.mec.queue_bitmap,
				  KGD_MAX_QUEUES);

		/* According to linux/bitmap.h we shouldn't use bitmap_clear if
		 * nbits is not compile time constant */
		last_valid_bit = adev->gfx.mec.num_mec
				* adev->gfx.mec.num_pipe_per_mec
				* adev->gfx.mec.num_queue_per_pipe;
		for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i)
			clear_bit(i, gpu_resources.queue_bitmap);

		amdgpu_doorbell_get_kfd_info(adev,
				&gpu_resources.doorbell_physical_address,
				&gpu_resources.doorbell_aperture_size,
+4 −0
Original line number Diff line number Diff line
@@ -226,6 +226,10 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,

	kfd->shared_resources = *gpu_resources;

	/* We only use the first MEC */
	if (kfd->shared_resources.num_mec > 1)
		kfd->shared_resources.num_mec = 1;

	/* calculate max size of mqds needed for queues */
	size = max_num_of_queues_per_device *
			kfd->device_info->mqd_size_aligned;
+70 −34
Original line number Diff line number Diff line
@@ -63,21 +63,44 @@ enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
	return KFD_MQD_TYPE_CP;
}

unsigned int get_first_pipe(struct device_queue_manager *dqm)
static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe)
{
	int i;
	int pipe_offset = mec * dqm->dev->shared_resources.num_pipe_per_mec
		+ pipe * dqm->dev->shared_resources.num_queue_per_pipe;

	/* queue is available for KFD usage if bit is 1 */
	for (i = 0; i <  dqm->dev->shared_resources.num_queue_per_pipe; ++i)
		if (test_bit(pipe_offset + i,
			      dqm->dev->shared_resources.queue_bitmap))
			return true;
	return false;
}

unsigned int get_mec_num(struct device_queue_manager *dqm)
{
	BUG_ON(!dqm || !dqm->dev);
	return dqm->dev->shared_resources.first_compute_pipe;

	return dqm->dev->shared_resources.num_mec;
}

unsigned int get_pipes_num(struct device_queue_manager *dqm)
unsigned int get_queues_num(struct device_queue_manager *dqm)
{
	BUG_ON(!dqm || !dqm->dev);
	return dqm->dev->shared_resources.compute_pipe_count;
	return bitmap_weight(dqm->dev->shared_resources.queue_bitmap,
				KGD_MAX_QUEUES);
}

static inline unsigned int get_pipes_num_cpsch(void)
unsigned int get_queues_per_pipe(struct device_queue_manager *dqm)
{
	return PIPE_PER_ME_CP_SCHEDULING;
	BUG_ON(!dqm || !dqm->dev);
	return dqm->dev->shared_resources.num_queue_per_pipe;
}

unsigned int get_pipes_per_mec(struct device_queue_manager *dqm)
{
	BUG_ON(!dqm || !dqm->dev);
	return dqm->dev->shared_resources.num_pipe_per_mec;
}

void program_sh_mem_settings(struct device_queue_manager *dqm,
@@ -200,12 +223,16 @@ static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)

	set = false;

	for (pipe = dqm->next_pipe_to_allocate, i = 0; i < get_pipes_num(dqm);
			pipe = ((pipe + 1) % get_pipes_num(dqm)), ++i) {
	for (pipe = dqm->next_pipe_to_allocate, i = 0; i < get_pipes_per_mec(dqm);
			pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) {

		if (!is_pipe_enabled(dqm, 0, pipe))
			continue;

		if (dqm->allocated_queues[pipe] != 0) {
			bit = find_first_bit(
				(unsigned long *)&dqm->allocated_queues[pipe],
				QUEUES_PER_PIPE);
				get_queues_per_pipe(dqm));

			clear_bit(bit,
				(unsigned long *)&dqm->allocated_queues[pipe]);
@@ -222,7 +249,7 @@ static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
	pr_debug("kfd: DQM %s hqd slot - pipe (%d) queue(%d)\n",
				__func__, q->pipe, q->queue);
	/* horizontal hqd allocation */
	dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_num(dqm);
	dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm);

	return 0;
}
@@ -469,36 +496,25 @@ set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid,
						vmid);
}

int init_pipelines(struct device_queue_manager *dqm,
			unsigned int pipes_num, unsigned int first_pipe)
{
	BUG_ON(!dqm || !dqm->dev);

	pr_debug("kfd: In func %s\n", __func__);

	return 0;
}

static void init_interrupts(struct device_queue_manager *dqm)
{
	unsigned int i;

	BUG_ON(dqm == NULL);

	for (i = 0 ; i < get_pipes_num(dqm) ; i++)
		dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd,
				i + get_first_pipe(dqm));
	for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++)
		if (is_pipe_enabled(dqm, 0, i))
			dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, i);
}

static int init_scheduler(struct device_queue_manager *dqm)
{
	int retval;
	int retval = 0;

	BUG_ON(!dqm);

	pr_debug("kfd: In %s\n", __func__);

	retval = init_pipelines(dqm, get_pipes_num(dqm), get_first_pipe(dqm));
	return retval;
}

@@ -509,21 +525,21 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
	BUG_ON(!dqm);

	pr_debug("kfd: In func %s num of pipes: %d\n",
			__func__, get_pipes_num(dqm));
			__func__, get_pipes_per_mec(dqm));

	mutex_init(&dqm->lock);
	INIT_LIST_HEAD(&dqm->queues);
	dqm->queue_count = dqm->next_pipe_to_allocate = 0;
	dqm->sdma_queue_count = 0;
	dqm->allocated_queues = kcalloc(get_pipes_num(dqm),
	dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm),
					sizeof(unsigned int), GFP_KERNEL);
	if (!dqm->allocated_queues) {
		mutex_destroy(&dqm->lock);
		return -ENOMEM;
	}

	for (i = 0; i < get_pipes_num(dqm); i++)
		dqm->allocated_queues[i] = (1 << QUEUES_PER_PIPE) - 1;
	for (i = 0; i < get_pipes_per_mec(dqm); i++)
		dqm->allocated_queues[i] = (1 << get_queues_per_pipe(dqm)) - 1;

	dqm->vmid_bitmap = (1 << VMID_PER_DEVICE) - 1;
	dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1;
@@ -630,18 +646,38 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,

static int set_sched_resources(struct device_queue_manager *dqm)
{
	int i, mec;
	struct scheduling_resources res;
	unsigned int queue_num, queue_mask;

	BUG_ON(!dqm);

	pr_debug("kfd: In func %s\n", __func__);

	queue_num = get_pipes_num_cpsch() * QUEUES_PER_PIPE;
	queue_mask = (1 << queue_num) - 1;
	res.vmid_mask = (1 << VMID_PER_DEVICE) - 1;
	res.vmid_mask <<= KFD_VMID_START_OFFSET;
	res.queue_mask = queue_mask << (get_first_pipe(dqm) * QUEUES_PER_PIPE);

	res.queue_mask = 0;
	for (i = 0; i < KGD_MAX_QUEUES; ++i) {
		mec = (i / dqm->dev->shared_resources.num_queue_per_pipe)
			/ dqm->dev->shared_resources.num_pipe_per_mec;

		if (!test_bit(i, dqm->dev->shared_resources.queue_bitmap))
			continue;

		/* only acquire queues from the first MEC */
		if (mec > 0)
			continue;

		/* This situation may be hit in the future if a new HW
		 * generation exposes more than 64 queues. If so, the
		 * definition of res.queue_mask needs updating */
		if (WARN_ON(i > (sizeof(res.queue_mask)*8))) {
			pr_err("Invalid queue enabled by amdgpu: %d\n", i);
			break;
		}

		res.queue_mask |= (1ull << i);
	}
	res.gws_mask = res.oac_mask = res.gds_heap_base =
						res.gds_heap_size = 0;

@@ -660,7 +696,7 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
	BUG_ON(!dqm);

	pr_debug("kfd: In func %s num of pipes: %d\n",
			__func__, get_pipes_num_cpsch());
			__func__, get_pipes_per_mec(dqm));

	mutex_init(&dqm->lock);
	INIT_LIST_HEAD(&dqm->queues);
+4 −6
Original line number Diff line number Diff line
@@ -30,8 +30,6 @@
#include "kfd_mqd_manager.h"

#define QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS	(500)
#define QUEUES_PER_PIPE				(8)
#define PIPE_PER_ME_CP_SCHEDULING		(3)
#define CIK_VMID_NUM				(8)
#define KFD_VMID_START_OFFSET			(8)
#define VMID_PER_DEVICE				CIK_VMID_NUM
@@ -182,10 +180,10 @@ void device_queue_manager_init_cik(struct device_queue_manager_asic_ops *ops);
void device_queue_manager_init_vi(struct device_queue_manager_asic_ops *ops);
void program_sh_mem_settings(struct device_queue_manager *dqm,
					struct qcm_process_device *qpd);
int init_pipelines(struct device_queue_manager *dqm,
		unsigned int pipes_num, unsigned int first_pipe);
unsigned int get_first_pipe(struct device_queue_manager *dqm);
unsigned int get_pipes_num(struct device_queue_manager *dqm);
unsigned int get_mec_num(struct device_queue_manager *dqm);
unsigned int get_queues_num(struct device_queue_manager *dqm);
unsigned int get_queues_per_pipe(struct device_queue_manager *dqm);
unsigned int get_pipes_per_mec(struct device_queue_manager *dqm);

static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd)
{
+1 −1
Original line number Diff line number Diff line
@@ -151,5 +151,5 @@ static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,

static int initialize_cpsch_cik(struct device_queue_manager *dqm)
{
	return init_pipelines(dqm, get_pipes_num(dqm), get_first_pipe(dqm));
	return 0;
}
Loading