Commit 938a0650 authored by Amber Lin's avatar Amber Lin Committed by Alex Deucher
Browse files

drm/amdkfd: Provide SMI events watch



When the compute is malfunctioning or performance drops, the system admin
will use SMI (System Management Interface) tool to monitor/diagnostic what
went wrong. This patch provides an event watch interface for the user
space to register devices and subscribe events they are interested. After
registered, the user can use annoymous file descriptor's poll function
with wait-time specified and wait for events to happen. Once an event
happens, the user can use read() to retrieve information related to the
event.

VM fault event is done in this patch.

v2: - remove UNREGISTER and add event ENABLE/DISABLE
    - correct kfifo usage
    - move event message API to kfd_ioctl.h
v3: send the event msg in text than in binary
v4: support multiple clients
v5: move events enablement from ioctl to fd write
v6: sparse fix

Signed-off-by: default avatarAmber Lin <Amber.Lin@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 85e7151b
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -53,6 +53,7 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \
		$(AMDKFD_PATH)/kfd_int_process_v9.o \
		$(AMDKFD_PATH)/kfd_dbgdev.o \
		$(AMDKFD_PATH)/kfd_dbgmgr.o \
		$(AMDKFD_PATH)/kfd_smi_events.o \
		$(AMDKFD_PATH)/kfd_crat.o

ifneq ($(CONFIG_AMD_IOMMU_V2),)
+2 −0
Original line number Diff line number Diff line
@@ -24,6 +24,7 @@
#include "kfd_events.h"
#include "cik_int.h"
#include "amdgpu_amdkfd.h"
#include "kfd_smi_events.h"

static bool cik_event_interrupt_isr(struct kfd_dev *dev,
					const uint32_t *ih_ring_entry,
@@ -107,6 +108,7 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev,
		ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) {
		struct kfd_vm_fault_info info;

		kfd_smi_event_update_vmfault(dev, pasid);
		kfd_process_vm_fault(dev->dqm, pasid);

		memset(&info, 0, sizeof(info));
+18 −0
Original line number Diff line number Diff line
@@ -39,6 +39,7 @@
#include "kfd_device_queue_manager.h"
#include "kfd_dbgmgr.h"
#include "amdgpu_amdkfd.h"
#include "kfd_smi_events.h"

static long kfd_ioctl(struct file *, unsigned int, unsigned long);
static int kfd_open(struct inode *, struct file *);
@@ -1740,6 +1741,20 @@ err_unlock:
	return r;
}

/* Handle requests for watching SMI events */
static int kfd_ioctl_smi_events(struct file *filep,
				struct kfd_process *p, void *data)
{
	struct kfd_ioctl_smi_events_args *args = data;
	struct kfd_dev *dev;

	dev = kfd_device_by_id(args->gpuid);
	if (!dev)
		return -EINVAL;

	return kfd_smi_event_open(dev, &args->anon_fd);
}

#define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
	[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
			    .cmd_drv = 0, .name = #ioctl}
@@ -1835,6 +1850,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {

	AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS,
			kfd_ioctl_alloc_queue_gws, 0),

	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SMI_EVENTS,
			kfd_ioctl_smi_events, 0),
};

#define AMDKFD_CORE_IOCTL_COUNT	ARRAY_SIZE(amdkfd_ioctls)
+7 −0
Original line number Diff line number Diff line
@@ -635,6 +635,11 @@ static int kfd_gws_init(struct kfd_dev *kfd)
	return ret;
}

static void kfd_smi_init(struct kfd_dev *dev) {
	INIT_LIST_HEAD(&dev->smi_clients);
	spin_lock_init(&dev->smi_lock);
}

bool kgd2kfd_device_init(struct kfd_dev *kfd,
			 struct drm_device *ddev,
			 const struct kgd2kfd_shared_resources *gpu_resources)
@@ -749,6 +754,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
		goto kfd_topology_add_device_error;
	}

	kfd_smi_init(kfd);

	kfd->init_complete = true;
	dev_info(kfd_device, "added device %x:%x\n", kfd->pdev->vendor,
		 kfd->pdev->device);
+2 −0
Original line number Diff line number Diff line
@@ -24,6 +24,7 @@
#include "kfd_events.h"
#include "soc15_int.h"
#include "kfd_device_queue_manager.h"
#include "kfd_smi_events.h"

static bool event_interrupt_isr_v9(struct kfd_dev *dev,
					const uint32_t *ih_ring_entry,
@@ -117,6 +118,7 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev,
		info.prot_read  = ring_id & 0x10;
		info.prot_write = ring_id & 0x20;

		kfd_smi_event_update_vmfault(dev, pasid);
		kfd_process_vm_fault(dev->dqm, pasid);
		kfd_signal_vm_fault_event(dev, pasid, &info);
	}
Loading