Commit 482f0777 authored by Felix Kuehling's avatar Felix Kuehling Committed by Oded Gabbay
Browse files

drm/amdkfd: Simplify event ID and signal slot management



Signal slots are identical to event IDs.

Replace the used_slot_bitmap and events hash table with an IDR to
allocate and lookup event IDs and signal slots more efficiently.

Signed-off-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Acked-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
parent 50cb7dd9
Loading
Loading
Loading
Loading
+70 −160
Original line number Diff line number Diff line
@@ -41,24 +41,16 @@ struct kfd_event_waiter {
	bool activated;		 /* Becomes true when event is signaled */
};

#define SLOTS_PER_PAGE KFD_SIGNAL_EVENT_LIMIT
#define SLOT_BITMAP_LONGS BITS_TO_LONGS(SLOTS_PER_PAGE)

/*
 * Over-complicated pooled allocator for event notification slots.
 *
 * Each signal event needs a 64-bit signal slot where the signaler will write
 * a 1 before sending an interrupt.l (This is needed because some interrupts
 * a 1 before sending an interrupt. (This is needed because some interrupts
 * do not contain enough spare data bits to identify an event.)
 * We get whole pages from vmalloc and map them to the process VA.
 * Individual signal events are then allocated a slot in a page.
 * We get whole pages and map them to the process VA.
 * Individual signal events use their event_id as slot index.
 */

struct kfd_signal_page {
	uint64_t *kernel_address;
	uint64_t __user *user_address;
	unsigned int free_slots;
	unsigned long used_slot_bitmap[SLOT_BITMAP_LONGS];
};

/*
@@ -73,34 +65,6 @@ static uint64_t *page_slots(struct kfd_signal_page *page)
	return page->kernel_address;
}

static bool allocate_free_slot(struct kfd_process *process,
			       unsigned int *out_slot_index)
{
	struct kfd_signal_page *page = process->signal_page;
	unsigned int slot;

	if (!page || page->free_slots == 0) {
		pr_debug("No free event signal slots were found for process %p\n",
			 process);

		return false;
	}

	slot = find_first_zero_bit(page->used_slot_bitmap, SLOTS_PER_PAGE);

	__set_bit(slot, page->used_slot_bitmap);
	page->free_slots--;

	page_slots(page)[slot] = UNSIGNALED_EVENT_SLOT;

	*out_slot_index = slot;

	pr_debug("Allocated event signal slot in page %p, slot %d\n",
		 page, slot);

	return true;
}

static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p)
{
	void *backing_store;
@@ -110,8 +74,6 @@ static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p)
	if (!page)
		return NULL;

	page->free_slots = SLOTS_PER_PAGE;

	backing_store = (void *) __get_free_pages(GFP_KERNEL,
					get_order(KFD_SIGNAL_EVENT_LIMIT * 8));
	if (!backing_store)
@@ -132,28 +94,26 @@ fail_alloc_signal_store:
	return NULL;
}

static bool allocate_event_notification_slot(struct kfd_process *p,
					     unsigned int *signal_slot_index)
static int allocate_event_notification_slot(struct kfd_process *p,
					    struct kfd_event *ev)
{
	int id;

	if (!p->signal_page) {
		p->signal_page = allocate_signal_page(p);
		if (!p->signal_page)
			return false;
			return -ENOMEM;
	}

	return allocate_free_slot(p, signal_slot_index);
}
	id = idr_alloc(&p->event_idr, ev, 0, KFD_SIGNAL_EVENT_LIMIT,
		       GFP_KERNEL);
	if (id < 0)
		return id;

/* Assumes that the process's event_mutex is locked. */
static void release_event_notification_slot(struct kfd_signal_page *page,
						size_t slot_index)
{
	__clear_bit(slot_index, page->used_slot_bitmap);
	page->free_slots++;
	ev->event_id = id;
	page_slots(p->signal_page)[id] = UNSIGNALED_EVENT_SLOT;

	/* We don't free signal pages, they are retained by the process
	 * and reused until it exits.
	 */
	return 0;
}

/*
@@ -162,89 +122,32 @@ static void release_event_notification_slot(struct kfd_signal_page *page,
 */
static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id)
{
	struct kfd_event *ev;

	hash_for_each_possible(p->events, ev, events, id)
		if (ev->event_id == id)
			return ev;

	return NULL;
}

/*
 * Produce a kfd event id for a nonsignal event.
 * These are arbitrary numbers, so we do a sequential search through
 * the hash table for an unused number.
 */
static u32 make_nonsignal_event_id(struct kfd_process *p)
{
	u32 id;

	for (id = p->next_nonsignal_event_id;
		id < KFD_LAST_NONSIGNAL_EVENT_ID &&
		lookup_event_by_id(p, id);
		id++)
		;

	if (id < KFD_LAST_NONSIGNAL_EVENT_ID) {

		/*
		 * What if id == LAST_NONSIGNAL_EVENT_ID - 1?
		 * Then next_nonsignal_event_id = LAST_NONSIGNAL_EVENT_ID so
		 * the first loop fails immediately and we proceed with the
		 * wraparound loop below.
		 */
		p->next_nonsignal_event_id = id + 1;

		return id;
	}

	for (id = KFD_FIRST_NONSIGNAL_EVENT_ID;
		id < KFD_LAST_NONSIGNAL_EVENT_ID &&
		lookup_event_by_id(p, id);
		id++)
		;


	if (id < KFD_LAST_NONSIGNAL_EVENT_ID) {
		p->next_nonsignal_event_id = id + 1;
		return id;
	}

	p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID;
	return 0;
}

static struct kfd_event *lookup_event_by_page_slot(struct kfd_process *p,
						unsigned int signal_slot)
{
	return lookup_event_by_id(p, signal_slot);
	return idr_find(&p->event_idr, id);
}

static int create_signal_event(struct file *devkfd,
				struct kfd_process *p,
				struct kfd_event *ev)
{
	int ret;

	if (p->signal_event_count == KFD_SIGNAL_EVENT_LIMIT) {
		if (!p->signal_event_limit_reached) {
			pr_warn("Signal event wasn't created because limit was reached\n");
			p->signal_event_limit_reached = true;
		}
		return -ENOMEM;
		return -ENOSPC;
	}

	if (!allocate_event_notification_slot(p, &ev->signal_slot_index)) {
	ret = allocate_event_notification_slot(p, ev);
	if (ret) {
		pr_warn("Signal event wasn't created because out of kernel memory\n");
		return -ENOMEM;
		return ret;
	}

	p->signal_event_count++;

	ev->user_signal_address =
			&p->signal_page->user_address[ev->signal_slot_index];

	ev->event_id = ev->signal_slot_index;

	ev->user_signal_address = &p->signal_page->user_address[ev->event_id];
	pr_debug("Signal event number %zu created with id %d, address %p\n",
			p->signal_event_count, ev->event_id,
			ev->user_signal_address);
@@ -252,16 +155,20 @@ static int create_signal_event(struct file *devkfd,
	return 0;
}

/*
 * No non-signal events are supported yet.
 * We create them as events that never signal.
 * Set event calls from user-mode are failed.
 */
static int create_other_event(struct kfd_process *p, struct kfd_event *ev)
{
	ev->event_id = make_nonsignal_event_id(p);
	if (ev->event_id == 0)
		return -ENOMEM;
	/* Cast KFD_LAST_NONSIGNAL_EVENT to uint32_t. This allows an
	 * intentional integer overflow to -1 without a compiler
	 * warning. idr_alloc treats a negative value as "maximum
	 * signed integer".
	 */
	int id = idr_alloc(&p->event_idr, ev, KFD_FIRST_NONSIGNAL_EVENT_ID,
			   (uint32_t)KFD_LAST_NONSIGNAL_EVENT_ID + 1,
			   GFP_KERNEL);

	if (id < 0)
		return id;
	ev->event_id = id;

	return 0;
}
@@ -269,9 +176,8 @@ static int create_other_event(struct kfd_process *p, struct kfd_event *ev)
void kfd_event_init_process(struct kfd_process *p)
{
	mutex_init(&p->event_mutex);
	hash_init(p->events);
	idr_init(&p->event_idr);
	p->signal_page = NULL;
	p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID;
	p->signal_event_count = 0;
}

@@ -284,25 +190,22 @@ static void destroy_event(struct kfd_process *p, struct kfd_event *ev)
		waiter->event = NULL;
	wake_up_all(&ev->wq);

	if ((ev->type == KFD_EVENT_TYPE_SIGNAL ||
	     ev->type == KFD_EVENT_TYPE_DEBUG) && p->signal_page) {
		release_event_notification_slot(p->signal_page,
						ev->signal_slot_index);
	if (ev->type == KFD_EVENT_TYPE_SIGNAL ||
	    ev->type == KFD_EVENT_TYPE_DEBUG)
		p->signal_event_count--;
	}

	hash_del(&ev->events);
	idr_remove(&p->event_idr, ev->event_id);
	kfree(ev);
}

static void destroy_events(struct kfd_process *p)
{
	struct kfd_event *ev;
	struct hlist_node *tmp;
	unsigned int hash_bkt;
	uint32_t id;

	hash_for_each_safe(p->events, hash_bkt, tmp, ev, events)
	idr_for_each_entry(&p->event_idr, ev, id)
		destroy_event(p, ev);
	idr_destroy(&p->event_idr);
}

/*
@@ -365,7 +268,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
		if (!ret) {
			*event_page_offset = KFD_MMAP_EVENTS_MASK;
			*event_page_offset <<= PAGE_SHIFT;
			*event_slot_index = ev->signal_slot_index;
			*event_slot_index = ev->event_id;
		}
		break;
	default:
@@ -374,8 +277,6 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
	}

	if (!ret) {
		hash_add(p->events, &ev->events, ev->event_id);

		*event_id = ev->event_id;
		*event_trigger_data = ev->event_id;
	} else {
@@ -469,17 +370,7 @@ int kfd_reset_event(struct kfd_process *p, uint32_t event_id)

static void acknowledge_signal(struct kfd_process *p, struct kfd_event *ev)
{
	page_slots(p->signal_page)[ev->signal_slot_index] =
						UNSIGNALED_EVENT_SLOT;
}

static bool is_slot_signaled(struct kfd_process *p, unsigned int index)
{
	if (!p->signal_page)
		return false;
	else
		return page_slots(p->signal_page)[index] !=
			UNSIGNALED_EVENT_SLOT;
	page_slots(p->signal_page)[ev->event_id] = UNSIGNALED_EVENT_SLOT;
}

static void set_event_from_interrupt(struct kfd_process *p,
@@ -518,13 +409,31 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
		 * ignore it, but we could use any bits we did receive to
		 * search faster.
		 */
		unsigned int i;
		uint64_t *slots = page_slots(p->signal_page);
		uint32_t id;

		if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT/2) {
			/* With relatively few events, it's faster to
			 * iterate over the event IDR
			 */
			idr_for_each_entry(&p->event_idr, ev, id) {
				if (id >= KFD_SIGNAL_EVENT_LIMIT)
					break;

		for (i = 0; i < SLOTS_PER_PAGE; i++)
			if (is_slot_signaled(p, i)) {
				ev = lookup_event_by_page_slot(p, i);
				if (slots[id] != UNSIGNALED_EVENT_SLOT)
					set_event_from_interrupt(p, ev);
			}
		} else {
			/* With relatively many events, it's faster to
			 * iterate over the signal slots and lookup
			 * only signaled events from the IDR.
			 */
			for (id = 0; id < KFD_SIGNAL_EVENT_LIMIT; id++)
				if (slots[id] != UNSIGNALED_EVENT_SLOT) {
					ev = lookup_event_by_id(p, id);
					set_event_from_interrupt(p, ev);
				}
		}
	}

	mutex_unlock(&p->event_mutex);
@@ -836,12 +745,13 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
{
	struct kfd_hsa_memory_exception_data *ev_data;
	struct kfd_event *ev;
	int bkt;
	uint32_t id;
	bool send_signal = true;

	ev_data = (struct kfd_hsa_memory_exception_data *) event_data;

	hash_for_each(p->events, bkt, ev, events)
	id = KFD_FIRST_NONSIGNAL_EVENT_ID;
	idr_for_each_entry_continue(&p->event_idr, ev, id)
		if (ev->type == type) {
			send_signal = false;
			dev_dbg(kfd_device,
+7 −7
Original line number Diff line number Diff line
@@ -31,9 +31,13 @@
#include "kfd_priv.h"
#include <uapi/linux/kfd_ioctl.h>

#define KFD_EVENT_ID_NONSIGNAL_MASK 0x80000000U
#define KFD_FIRST_NONSIGNAL_EVENT_ID KFD_EVENT_ID_NONSIGNAL_MASK
#define KFD_LAST_NONSIGNAL_EVENT_ID UINT_MAX
/*
 * IDR supports non-negative integer IDs. Small IDs are used for
 * signal events to match their signal slot. Use the upper half of the
 * ID space for non-signal events.
 */
#define KFD_FIRST_NONSIGNAL_EVENT_ID ((INT_MAX >> 1) + 1)
#define KFD_LAST_NONSIGNAL_EVENT_ID INT_MAX

/*
 * Written into kfd_signal_slot_t to indicate that the event is not signaled.
@@ -47,9 +51,6 @@ struct kfd_event_waiter;
struct signal_page;

struct kfd_event {
	/* All events in process, rooted at kfd_process.events. */
	struct hlist_node events;

	u32 event_id;

	bool signaled;
@@ -60,7 +61,6 @@ struct kfd_event {
	wait_queue_head_t wq; /* List of event waiters. */

	/* Only for signal events. */
	unsigned int signal_slot_index;
	uint64_t __user *user_signal_address;

	/* type specific data */
+3 −3
Original line number Diff line number Diff line
@@ -31,6 +31,7 @@
#include <linux/workqueue.h>
#include <linux/spinlock.h>
#include <linux/kfd_ioctl.h>
#include <linux/idr.h>
#include <kgd_kfd_interface.h>

#include "amd_shared.h"
@@ -538,11 +539,10 @@ struct kfd_process {

	/* Event-related data */
	struct mutex event_mutex;
	/* All events in process hashed by ID, linked on kfd_event.events. */
	DECLARE_HASHTABLE(events, 4);
	/* Event ID allocator and lookup */
	struct idr event_idr;
	/* Event page */
	struct kfd_signal_page *signal_page;
	u32 next_nonsignal_event_id;
	size_t signal_event_count;
	bool signal_event_limit_reached;
};