Commit 32cb59f3 authored by Mukul Joshi's avatar Mukul Joshi Committed by Alex Deucher
Browse files

drm/amdkfd: Track SDMA utilization per process



Track SDMA usage on a per process basis and report it through sysfs.
The value in the sysfs file indicates the amount of time SDMA has
been in-use by this process since the creation of the process.
This value is in microsecond granularity.

Signed-off-by: default avatarMukul Joshi <mukul.joshi@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 2cdc9c20
Loading
Loading
Loading
Loading
+57 −0
Original line number Diff line number Diff line
@@ -153,6 +153,52 @@ void decrement_queue_count(struct device_queue_manager *dqm,
		dqm->active_cp_queue_count--;
}

int read_sdma_queue_counter(struct queue *q, uint64_t *val)
{
	int ret;
	uint64_t tmp = 0;

	if (!q || !val)
		return -EINVAL;
	/*
	 * SDMA activity counter is stored at queue's RPTR + 0x8 location.
	 */
	if (!access_ok((const void __user *)((uint64_t)q->properties.read_ptr +
					sizeof(uint64_t)), sizeof(uint64_t))) {
		pr_err("Can't access sdma queue activity counter\n");
		return -EFAULT;
	}

	ret = get_user(tmp, (uint64_t *)((uint64_t)(q->properties.read_ptr) +
						    sizeof(uint64_t)));
	if (!ret) {
		*val = tmp;
	}

	return ret;
}

static int update_sdma_queue_past_activity_stats(struct kfd_process_device *pdd,
						 struct queue *q)
{
	int ret;
	uint64_t val = 0;

	if (!pdd)
		return -ENODEV;

	ret = read_sdma_queue_counter(q, &val);
	if (ret) {
		pr_err("Failed to read SDMA queue counter for queue: %d\n",
				q->properties.queue_id);
		return ret;
	}

	pdd->sdma_past_activity_counter += val;

	return ret;
}

static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
{
	struct kfd_dev *dev = qpd->dqm->dev;
@@ -487,6 +533,12 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
	if (retval == -ETIME)
		qpd->reset_wavefronts = true;

	/* Get the SDMA queue stats */
        if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
            (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
                update_sdma_queue_past_activity_stats(qpd_to_pdd(qpd), q);
        }

	mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);

	list_del(&q->list);
@@ -1468,6 +1520,11 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
		}
	}

	/* Get the SDMA queue stats */
	if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
	    (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
		update_sdma_queue_past_activity_stats(qpd_to_pdd(qpd), q);
	}
	/*
	 * Unconditionally decrement this counter, regardless of the queue's
	 * type
+2 −0
Original line number Diff line number Diff line
@@ -251,4 +251,6 @@ static inline void dqm_unlock(struct device_queue_manager *dqm)
	mutex_unlock(&dqm->lock_hidden);
}

int read_sdma_queue_counter(struct queue *q, uint64_t *val);

#endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */
+14 −2
Original line number Diff line number Diff line
@@ -629,7 +629,14 @@ enum kfd_pdd_bound {
	PDD_BOUND_SUSPENDED,
};

#define MAX_VRAM_FILENAME_LEN 11
#define MAX_SYSFS_FILENAME_LEN 11

/*
 * SDMA counter runs at 100MHz frequency.
 * We display SDMA activity in microsecond granularity in sysfs.
 * As a result, the divisor is 100.
 */
#define SDMA_ACTIVITY_DIVISOR  100

/* Data that is per-process-per device. */
struct kfd_process_device {
@@ -677,7 +684,12 @@ struct kfd_process_device {
	/* VRAM usage */
	uint64_t vram_usage;
	struct attribute attr_vram;
	char vram_filename[MAX_VRAM_FILENAME_LEN];
	char vram_filename[MAX_SYSFS_FILENAME_LEN];

	/* SDMA activity tracking */
	uint64_t sdma_past_activity_counter;
	struct attribute attr_sdma;
	char sdma_filename[MAX_SYSFS_FILENAME_LEN];
};

#define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
+125 −12
Original line number Diff line number Diff line
@@ -25,6 +25,7 @@
#include <linux/sched.h>
#include <linux/sched/mm.h>
#include <linux/sched/task.h>
#include <linux/mmu_context.h>
#include <linux/slab.h>
#include <linux/amd-iommu.h>
#include <linux/notifier.h>
@@ -76,6 +77,74 @@ struct kfd_procfs_tree {

static struct kfd_procfs_tree procfs;

/*
 * Structure for SDMA activity tracking
 */
struct kfd_sdma_activity_handler_workarea {
	struct work_struct sdma_activity_work;
	struct kfd_process_device *pdd;
	uint64_t sdma_activity_counter;
};

static void kfd_sdma_activity_worker(struct work_struct *work)
{
	struct kfd_sdma_activity_handler_workarea *workarea;
	struct kfd_process_device *pdd;
	uint64_t val;
	struct mm_struct *mm;
	struct queue *q;
	struct qcm_process_device *qpd;
	struct device_queue_manager *dqm;
	int ret = 0;

	workarea = container_of(work, struct kfd_sdma_activity_handler_workarea,
				sdma_activity_work);
	if (!workarea)
		return;

	pdd = workarea->pdd;
	dqm = pdd->dev->dqm;
	qpd = &pdd->qpd;

	if (!pdd || !dqm || !qpd)
		return;

	mm = get_task_mm(pdd->process->lead_thread);
	if (!mm) {
		return;
	}

	use_mm(mm);

	dqm_lock(dqm);

	/*
	 * Total SDMA activity is current SDMA activity + past SDMA activity
	 */
	workarea->sdma_activity_counter = pdd->sdma_past_activity_counter;

	/*
	 * Get the current activity counters for all active SDMA queues
	 */
	list_for_each_entry(q, &qpd->queues_list, list) {
		if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
		    (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
			val = 0;
			ret = read_sdma_queue_counter(q, &val);
			if (ret)
				pr_debug("Failed to read SDMA queue active "
					 "counter for queue id: %d",
					 q->properties.queue_id);
			else
				workarea->sdma_activity_counter += val;
		}
	}

	dqm_unlock(dqm);
	unuse_mm(mm);
	mmput(mm);
}

static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
			       char *buffer)
{
@@ -87,8 +156,24 @@ static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
	} else if (strncmp(attr->name, "vram_", 5) == 0) {
		struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
							      attr_vram);
		if (pdd)
		return snprintf(buffer, PAGE_SIZE, "%llu\n", READ_ONCE(pdd->vram_usage));
	} else if (strncmp(attr->name, "sdma_", 5) == 0) {
		struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
							      attr_sdma);
		struct kfd_sdma_activity_handler_workarea sdma_activity_work_handler;

		INIT_WORK(&sdma_activity_work_handler.sdma_activity_work,
					kfd_sdma_activity_worker);

		sdma_activity_work_handler.pdd = pdd;

		schedule_work(&sdma_activity_work_handler.sdma_activity_work);

		flush_work(&sdma_activity_work_handler.sdma_activity_work);

		return snprintf(buffer, PAGE_SIZE, "%llu\n",
				(sdma_activity_work_handler.sdma_activity_counter)/
				 SDMA_ACTIVITY_DIVISOR);
	} else {
		pr_err("Invalid attribute");
		return -EINVAL;
@@ -210,7 +295,24 @@ int kfd_procfs_add_queue(struct queue *q)
	return 0;
}

int kfd_procfs_add_vram_usage(struct kfd_process *p)
static int kfd_sysfs_create_file(struct kfd_process *p, struct attribute *attr,
				 char *name)
{
	int ret = 0;

	if (!p || !attr || !name)
		return -EINVAL;

	attr->name = name;
	attr->mode = KFD_SYSFS_FILE_MODE;
	sysfs_attr_init(attr);

	ret = sysfs_create_file(p->kobj, attr);

	return ret;
}

int kfd_procfs_add_sysfs_files(struct kfd_process *p)
{
	int ret = 0;
	struct kfd_process_device *pdd;
@@ -221,17 +323,25 @@ int kfd_procfs_add_vram_usage(struct kfd_process *p)
	if (!p->kobj)
		return -EFAULT;

	/* Create proc/<pid>/vram_<gpuid> file for each GPU */
	/*
	 * Create sysfs files for each GPU:
	 * - proc/<pid>/vram_<gpuid>
	 * - proc/<pid>/sdma_<gpuid>
	 */
	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
		snprintf(pdd->vram_filename, MAX_VRAM_FILENAME_LEN, "vram_%u",
		snprintf(pdd->vram_filename, MAX_SYSFS_FILENAME_LEN, "vram_%u",
			 pdd->dev->id);
		pdd->attr_vram.name = pdd->vram_filename;
		pdd->attr_vram.mode = KFD_SYSFS_FILE_MODE;
		sysfs_attr_init(&pdd->attr_vram);
		ret = sysfs_create_file(p->kobj, &pdd->attr_vram);
		ret = kfd_sysfs_create_file(p, &pdd->attr_vram, pdd->vram_filename);
		if (ret)
			pr_warn("Creating vram usage for gpu id %d failed",
				(int)pdd->dev->id);

		snprintf(pdd->sdma_filename, MAX_SYSFS_FILENAME_LEN, "sdma_%u",
			 pdd->dev->id);
		ret = kfd_sysfs_create_file(p, &pdd->attr_sdma, pdd->sdma_filename);
		if (ret)
			pr_warn("Creating sdma usage for gpu id %d failed",
				(int)pdd->dev->id);
	}

	return ret;
@@ -444,9 +554,9 @@ struct kfd_process *kfd_create_process(struct file *filep)
		if (!process->kobj_queues)
			pr_warn("Creating KFD proc/queues folder failed");

		ret = kfd_procfs_add_vram_usage(process);
		ret = kfd_procfs_add_sysfs_files(process);
		if (ret)
			pr_warn("Creating vram usage file for pid %d failed",
			pr_warn("Creating sysfs usage file for pid %d failed",
				(int)process->lead_thread->pid);
	}
out:
@@ -597,8 +707,10 @@ static void kfd_process_wq_release(struct work_struct *work)
		kobject_put(p->kobj_queues);
		p->kobj_queues = NULL;

		list_for_each_entry(pdd, &p->per_device_data, per_device_list)
		list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
			sysfs_remove_file(p->kobj, &pdd->attr_vram);
			sysfs_remove_file(p->kobj, &pdd->attr_sdma);
		}

		kobject_del(p->kobj);
		kobject_put(p->kobj);
@@ -906,6 +1018,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
	pdd->already_dequeued = false;
	pdd->runtime_inuse = false;
	pdd->vram_usage = 0;
	pdd->sdma_past_activity_counter = 0;
	list_add(&pdd->per_device_list, &p->per_device_data);

	/* Init idr used for memory handle translation */