Commit 4327bed2 authored by Philip Cox's avatar Philip Cox Committed by Alex Deucher
Browse files

drm/amdkfd: Add process eviction counters to sysfs



Add per-process eviction counters to sysfs to keep track of
how many eviction events have happened for each process.

v2: rename the stats dir, and track all evictions per process, per device.
v3: Simplify the stats kobject handling and cleanup.
v4: more code cleanup

Signed-off-by: default avatarPhilip Cox <Philip.Cox@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 8a491bb3
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
@@ -653,6 +653,7 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
	pr_info_ratelimited("Evicting PASID 0x%x queues\n",
			    pdd->process->pasid);

	pdd->last_evict_timestamp = get_jiffies_64();
	/* Mark all queues as evicted. Deactivate all active queues on
	 * the qpd.
	 */
@@ -714,6 +715,7 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
		q->properties.is_active = false;
		decrement_queue_count(dqm, q->properties.type);
	}
	pdd->last_evict_timestamp = get_jiffies_64();
	retval = execute_queues_cpsch(dqm,
				qpd->is_debug ?
				KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
@@ -732,6 +734,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
	struct mqd_manager *mqd_mgr;
	struct kfd_process_device *pdd;
	uint64_t pd_base;
	uint64_t eviction_duration;
	int retval, ret = 0;

	pdd = qpd_to_pdd(qpd);
@@ -799,6 +802,8 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
			ret = retval;
	}
	qpd->evicted = 0;
	eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp;
	atomic64_add(eviction_duration, &pdd->evict_duration_counter);
out:
	if (mm)
		mmput(mm);
@@ -812,6 +817,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
	struct queue *q;
	struct kfd_process_device *pdd;
	uint64_t pd_base;
	uint64_t eviction_duration;
	int retval = 0;

	pdd = qpd_to_pdd(qpd);
@@ -845,6 +851,8 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
	retval = execute_queues_cpsch(dqm,
				KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
	qpd->evicted = 0;
	eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp;
	atomic64_add(eviction_duration, &pdd->evict_duration_counter);
out:
	dqm_unlock(dqm);
	return retval;
+8 −1
Original line number Diff line number Diff line
@@ -631,7 +631,7 @@ enum kfd_pdd_bound {
	PDD_BOUND_SUSPENDED,
};

#define MAX_SYSFS_FILENAME_LEN 11
#define MAX_SYSFS_FILENAME_LEN 15

/*
 * SDMA counter runs at 100MHz frequency.
@@ -692,6 +692,13 @@ struct kfd_process_device {
	uint64_t sdma_past_activity_counter;
	struct attribute attr_sdma;
	char sdma_filename[MAX_SYSFS_FILENAME_LEN];

	/* Eviction activity tracking */
	uint64_t last_evict_timestamp;
	atomic64_t evict_duration_counter;
	struct attribute attr_evict;

	struct kobject *kobj_stats;
};

#define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
+100 −0
Original line number Diff line number Diff line
@@ -344,6 +344,26 @@ static ssize_t kfd_procfs_queue_show(struct kobject *kobj,

	return 0;
}
static ssize_t kfd_procfs_stats_show(struct kobject *kobj,
				     struct attribute *attr, char *buffer)
{
	if (strcmp(attr->name, "evicted_ms") == 0) {
		struct kfd_process_device *pdd = container_of(attr,
				struct kfd_process_device,
				attr_evict);
		uint64_t evict_jiffies;

		evict_jiffies = atomic64_read(&pdd->evict_duration_counter);

		return snprintf(buffer,
				PAGE_SIZE,
				"%llu\n",
				jiffies64_to_msecs(evict_jiffies));
	} else
		pr_err("Invalid attribute");

	return 0;
}

static struct attribute attr_queue_size = {
	.name = "size",
@@ -376,6 +396,19 @@ static struct kobj_type procfs_queue_type = {
	.default_attrs = procfs_queue_attrs,
};

static const struct sysfs_ops procfs_stats_ops = {
	.show = kfd_procfs_stats_show,
};

static struct attribute *procfs_stats_attrs[] = {
	NULL
};

static struct kobj_type procfs_stats_type = {
	.sysfs_ops = &procfs_stats_ops,
	.default_attrs = procfs_stats_attrs,
};

int kfd_procfs_add_queue(struct queue *q)
{
	struct kfd_process *proc;
@@ -417,6 +450,58 @@ static int kfd_sysfs_create_file(struct kfd_process *p, struct attribute *attr,
	return ret;
}

static int kfd_procfs_add_sysfs_stats(struct kfd_process *p)
{
	int ret = 0;
	struct kfd_process_device *pdd;
	char stats_dir_filename[MAX_SYSFS_FILENAME_LEN];

	if (!p)
		return -EINVAL;

	if (!p->kobj)
		return -EFAULT;

	/*
	 * Create sysfs files for each GPU:
	 * - proc/<pid>/stats_<gpuid>/
	 * - proc/<pid>/stats_<gpuid>/evicted_ms
	 */
	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
		struct kobject *kobj_stats;

		snprintf(stats_dir_filename, MAX_SYSFS_FILENAME_LEN,
				"stats_%u", pdd->dev->id);
		kobj_stats = kfd_alloc_struct(kobj_stats);
		if (!kobj_stats)
			return -ENOMEM;

		ret = kobject_init_and_add(kobj_stats,
						&procfs_stats_type,
						p->kobj,
						stats_dir_filename);

		if (ret) {
			pr_warn("Creating KFD proc/stats_%s folder failed",
					stats_dir_filename);
			kobject_put(kobj_stats);
			goto err;
		}

		pdd->kobj_stats = kobj_stats;
		pdd->attr_evict.name = "evicted_ms";
		pdd->attr_evict.mode = KFD_SYSFS_FILE_MODE;
		sysfs_attr_init(&pdd->attr_evict);
		ret = sysfs_create_file(kobj_stats, &pdd->attr_evict);
		if (ret)
			pr_warn("Creating eviction stats for gpuid %d failed",
					(int)pdd->dev->id);
	}
err:
	return ret;
}


static int kfd_procfs_add_sysfs_files(struct kfd_process *p)
{
	int ret = 0;
@@ -660,6 +745,16 @@ struct kfd_process *kfd_create_process(struct file *filep)
		if (!process->kobj_queues)
			pr_warn("Creating KFD proc/queues folder failed");

		ret = kfd_procfs_add_sysfs_stats(process);
		if (ret)
			pr_warn("Creating sysfs stats dir for pid %d failed",
				(int)process->lead_thread->pid);

		ret = kfd_procfs_add_sysfs_stats(process);
		if (ret)
			pr_warn("Creating sysfs stats dir for pid %d failed",
				(int)process->lead_thread->pid);

		ret = kfd_procfs_add_sysfs_files(process);
		if (ret)
			pr_warn("Creating sysfs usage file for pid %d failed",
@@ -816,6 +911,10 @@ static void kfd_process_wq_release(struct work_struct *work)
		list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
			sysfs_remove_file(p->kobj, &pdd->attr_vram);
			sysfs_remove_file(p->kobj, &pdd->attr_sdma);
			sysfs_remove_file(p->kobj, &pdd->attr_evict);
			kobject_del(pdd->kobj_stats);
			kobject_put(pdd->kobj_stats);
			pdd->kobj_stats = NULL;
		}

		kobject_del(p->kobj);
@@ -1125,6 +1224,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
	pdd->runtime_inuse = false;
	pdd->vram_usage = 0;
	pdd->sdma_past_activity_counter = 0;
	atomic64_set(&pdd->evict_duration_counter, 0);
	list_add(&pdd->per_device_list, &p->per_device_data);

	/* Init idr used for memory handle translation */