Commit cf04dfd0 authored by Tao Zhou's avatar Tao Zhou Committed by Alex Deucher
Browse files

drm/amdgpu: allow ras interrupt callback to return error data



add error data as parameter for ras interrupt cb and process it

Signed-off-by: default avatarTao Zhou <tao.zhou1@amd.com>
Reviewed-by: default avatarDennis Li <dennis.li@amd.com>
Reviewed-by: default avatarHawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 8c948103
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
@@ -1005,7 +1005,7 @@ static void amdgpu_ras_interrupt_handler(struct ras_manager *obj)
	struct ras_ih_data *data = &obj->ih_data;
	struct amdgpu_iv_entry entry;
	int ret;
	struct ras_err_data err_data = {0, 0};
	struct ras_err_data err_data = {0, 0, 0, NULL};

	while (data->rptr != data->wptr) {
		rmb();
@@ -1020,14 +1020,14 @@ static void amdgpu_ras_interrupt_handler(struct ras_manager *obj)
		 * from the callback to udpate the error type/count, etc
		 */
		if (data->cb) {
			ret = data->cb(obj->adev, &entry);
			ret = data->cb(obj->adev, &err_data, &entry);
			/* ue will trigger an interrupt, and in that case
			 * we need do a reset to recovery the whole system.
			 * But leave IP do that recovery, here we just dispatch
			 * the error.
			 */
			if (ret == AMDGPU_RAS_UE) {
				obj->err_data.ue_count++;
				obj->err_data.ue_count += err_data.ue_count;
			}
			/* Might need get ce count by register, but not all IP
			 * saves ce count, some IP just use one bit or two bits
+19 −18
Original line number Diff line number Diff line
@@ -76,9 +76,6 @@ struct ras_common_if {
	char name[32];
};

typedef int (*ras_ih_cb)(struct amdgpu_device *adev,
		struct amdgpu_iv_entry *entry);

struct amdgpu_ras {
	/* ras infrastructure */
	/* for ras itself. */
@@ -108,21 +105,6 @@ struct amdgpu_ras {
	uint32_t flags;
};

struct ras_ih_data {
	/* interrupt bottom half */
	struct work_struct ih_work;
	int inuse;
	/* IP callback */
	ras_ih_cb cb;
	/* full of entries */
	unsigned char *ring;
	unsigned int ring_size;
	unsigned int element_size;
	unsigned int aligned_element_size;
	unsigned int rptr;
	unsigned int wptr;
};

struct ras_fs_data {
	char sysfs_name[32];
	char debugfs_name[32];
@@ -149,6 +131,25 @@ struct ras_err_handler_data {
	int last_reserved;
};

typedef int (*ras_ih_cb)(struct amdgpu_device *adev,
		struct ras_err_data *err_data,
		struct amdgpu_iv_entry *entry);

struct ras_ih_data {
	/* interrupt bottom half */
	struct work_struct ih_work;
	int inuse;
	/* IP callback */
	ras_ih_cb cb;
	/* full of entries */
	unsigned char *ring;
	unsigned int ring_size;
	unsigned int element_size;
	unsigned int aligned_element_size;
	unsigned int rptr;
	unsigned int wptr;
};

struct ras_manager {
	struct ras_common_if head;
	/* reference count */