Commit 59d7115d authored by Mukul Joshi's avatar Mukul Joshi Committed by Alex Deucher
Browse files

drm/amdkfd: Move process doorbell allocation into kfd device



Move doorbell allocation for a process into kfd device and
allocate doorbell space in each PDD during process creation.
Currently, KFD manages its own doorbell space but for some
devices, amdgpu would allocate the complete doorbell
space instead of leaving a chunk of doorbell space for KFD to
manage. In a system with mix of such devices, KFD would need
to request process doorbell space based on the type of device,
either from amdgpu or from its own doorbell space.

Signed-off-by: default avatarMukul Joshi <mukul.joshi@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent b7b6c385
Loading
Loading
Loading
Loading
+18 −12
Original line number Diff line number Diff line
@@ -1291,18 +1291,6 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
		return -EINVAL;
	}

	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
		if (args->size != kfd_doorbell_process_slice(dev))
			return -EINVAL;
		offset = kfd_get_process_doorbells(dev, p);
	} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
		if (args->size != PAGE_SIZE)
			return -EINVAL;
		offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
		if (!offset)
			return -ENOMEM;
	}

	mutex_lock(&p->mutex);

	pdd = kfd_bind_process_to_device(dev, p);
@@ -1311,6 +1299,24 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
		goto err_unlock;
	}

	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
		if (args->size != kfd_doorbell_process_slice(dev)) {
			err = -EINVAL;
			goto err_unlock;
		}
		offset = kfd_get_process_doorbells(pdd);
	} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
		if (args->size != PAGE_SIZE) {
			err = -EINVAL;
			goto err_unlock;
		}
		offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
		if (!offset) {
			err = -ENOMEM;
			goto err_unlock;
		}
	}

	err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
		dev->kgd, args->va_addr, args->size,
		pdd->vm, (struct kgd_mem **) &mem, &offset,
+3 −0
Original line number Diff line number Diff line
@@ -583,6 +583,8 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,

	atomic_set(&kfd->sram_ecc_flag, 0);

	ida_init(&kfd->doorbell_ida);

	return kfd;
}

@@ -798,6 +800,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
		kfd_interrupt_exit(kfd);
		kfd_topology_remove_device(kfd);
		kfd_doorbell_fini(kfd);
		ida_destroy(&kfd->doorbell_ida);
		kfd_gtt_sa_fini(kfd);
		amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
		if (kfd->gws)
+1 −2
Original line number Diff line number Diff line
@@ -191,9 +191,8 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
	}

	q->properties.doorbell_off =
		kfd_get_doorbell_dw_offset_in_bar(dev, q->process,
		kfd_get_doorbell_dw_offset_in_bar(dev, qpd_to_pdd(qpd),
					  q->doorbell_id);

	return 0;
}

+20 −20
Original line number Diff line number Diff line
@@ -31,9 +31,6 @@
 * kernel queues using the first doorbell page reserved for the kernel.
 */

static DEFINE_IDA(doorbell_ida);
static unsigned int max_doorbell_slices;

/*
 * Each device exposes a doorbell aperture, a PCI MMIO aperture that
 * receives 32-bit writes that are passed to queues as wptr values.
@@ -84,9 +81,9 @@ int kfd_doorbell_init(struct kfd_dev *kfd)
	else
		return -ENOSPC;

	if (!max_doorbell_slices ||
	    doorbell_process_limit < max_doorbell_slices)
		max_doorbell_slices = doorbell_process_limit;
	if (!kfd->max_doorbell_slices ||
	    doorbell_process_limit < kfd->max_doorbell_slices)
		kfd->max_doorbell_slices = doorbell_process_limit;

	kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address +
				doorbell_start_offset;
@@ -130,6 +127,7 @@ int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
		      struct vm_area_struct *vma)
{
	phys_addr_t address;
	struct kfd_process_device *pdd;

	/*
	 * For simplicitly we only allow mapping of the entire doorbell
@@ -138,9 +136,12 @@ int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
	if (vma->vm_end - vma->vm_start != kfd_doorbell_process_slice(dev))
		return -EINVAL;

	/* Calculate physical address of doorbell */
	address = kfd_get_process_doorbells(dev, process);
	pdd = kfd_get_process_device_data(dev, process);
	if (!pdd)
		return -EINVAL;

	/* Calculate physical address of doorbell */
	address = kfd_get_process_doorbells(pdd);
	vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
				VM_DONTDUMP | VM_PFNMAP;

@@ -226,7 +227,7 @@ void write_kernel_doorbell64(void __iomem *db, u64 value)
}

unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
					struct kfd_process *process,
					struct kfd_process_device *pdd,
					unsigned int doorbell_id)
{
	/*
@@ -236,7 +237,7 @@ unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
	 * units regardless of the ASIC-dependent doorbell size.
	 */
	return kfd->doorbell_base_dw_offset +
		process->doorbell_index
		pdd->doorbell_index
		* kfd_doorbell_process_slice(kfd) / sizeof(u32) +
		doorbell_id * kfd->device_info->doorbell_size / sizeof(u32);
}
@@ -251,25 +252,24 @@ uint64_t kfd_get_number_elems(struct kfd_dev *kfd)

}

phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
					struct kfd_process *process)
phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd)
{
	return dev->doorbell_base +
		process->doorbell_index * kfd_doorbell_process_slice(dev);
	return pdd->dev->doorbell_base +
		pdd->doorbell_index * kfd_doorbell_process_slice(pdd->dev);
}

int kfd_alloc_process_doorbells(struct kfd_process *process)
int kfd_alloc_process_doorbells(struct kfd_dev *kfd, unsigned int *doorbell_index)
{
	int r = ida_simple_get(&doorbell_ida, 1, max_doorbell_slices,
	int r = ida_simple_get(&kfd->doorbell_ida, 1, kfd->max_doorbell_slices,
				GFP_KERNEL);
	if (r > 0)
		process->doorbell_index = r;
		*doorbell_index = r;

	return r;
}

void kfd_free_process_doorbells(struct kfd_process *process)
void kfd_free_process_doorbells(struct kfd_dev *kfd, unsigned int doorbell_index)
{
	if (process->doorbell_index)
		ida_simple_remove(&doorbell_ida, process->doorbell_index);
	if (doorbell_index)
		ida_simple_remove(&kfd->doorbell_ida, doorbell_index);
}
+10 −7
Original line number Diff line number Diff line
@@ -314,6 +314,9 @@ struct kfd_dev {
	spinlock_t smi_lock;

	uint32_t reset_seq_num;

	struct ida doorbell_ida;
	unsigned int max_doorbell_slices;
};

enum kfd_mempool {
@@ -699,6 +702,7 @@ struct kfd_process_device {
	struct attribute attr_evict;

	struct kobject *kobj_stats;
	unsigned int doorbell_index;
};

#define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
@@ -736,7 +740,6 @@ struct kfd_process {
	struct mmu_notifier mmu_notifier;

	uint16_t pasid;
	unsigned int doorbell_index;

	/*
	 * List of kfd_process_device structures,
@@ -869,13 +872,13 @@ u32 read_kernel_doorbell(u32 __iomem *db);
void write_kernel_doorbell(void __iomem *db, u32 value);
void write_kernel_doorbell64(void __iomem *db, u64 value);
unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
					struct kfd_process *process,
					struct kfd_process_device *pdd,
					unsigned int doorbell_id);
phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
					struct kfd_process *process);
int kfd_alloc_process_doorbells(struct kfd_process *process);
void kfd_free_process_doorbells(struct kfd_process *process);

phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd);
int kfd_alloc_process_doorbells(struct kfd_dev *kfd,
				unsigned int *doorbell_index);
void kfd_free_process_doorbells(struct kfd_dev *kfd,
				unsigned int doorbell_index);
/* GTT Sub-Allocator */

int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
Loading