Commit bfbe1744 authored by Dave Airlie's avatar Dave Airlie
Browse files

Merge tag 'amd-drm-next-5.8-2020-05-19' of git://people.freedesktop.org/~agd5f/linux into drm-next

amd-drm-next-5.8-2020-05-19:

amdgpu:
- Improved handling for CTF (Critical Thermal Fault) situations
- Clarify AC/DC mode switches
- SR-IOV fixes
- XGMI fixes for RAS
- Misc cleanups
- Add autodump debugfs node to aid in GPU hang debugging

UAPI:
- Add a MEM_SYNC IB flag for handling proper acquire memory semantics if UMDs expect the kernel to handle this
  Used by AMDVLK: https://github.com/GPUOpen-Drivers/pal/blob/dev/src/core/os/amdgpu/amdgpuQueue.cpp#L1262



Signed-off-by: default avatarDave Airlie <airlied@redhat.com>
From: Alex Deucher <alexdeucher@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200519202505.4126-1-alexander.deucher@amd.com
parents 1493bddc 43c8546b
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -989,6 +989,8 @@ struct amdgpu_device {
	char				product_number[16];
	char				product_name[32];
	char				serial[16];

	struct amdgpu_autodump		autodump;
};

static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)
+77 −1
Original line number Diff line number Diff line
@@ -27,7 +27,7 @@
#include <linux/pci.h>
#include <linux/uaccess.h>
#include <linux/pm_runtime.h>

#include <linux/poll.h>
#include <drm/drm_debugfs.h>

#include "amdgpu.h"
@@ -74,7 +74,81 @@ int amdgpu_debugfs_add_files(struct amdgpu_device *adev,
	return 0;
}

int amdgpu_debugfs_wait_dump(struct amdgpu_device *adev)
{
#if defined(CONFIG_DEBUG_FS)
	unsigned long timeout = 600 * HZ;
	int ret;

	wake_up_interruptible(&adev->autodump.gpu_hang);

	ret = wait_for_completion_interruptible_timeout(&adev->autodump.dumping, timeout);
	if (ret == 0) {
		pr_err("autodump: timeout, move on to gpu recovery\n");
		return -ETIMEDOUT;
	}
#endif
	return 0;
}

#if defined(CONFIG_DEBUG_FS)

static int amdgpu_debugfs_autodump_open(struct inode *inode, struct file *file)
{
	struct amdgpu_device *adev = inode->i_private;
	int ret;

	file->private_data = adev;

	mutex_lock(&adev->lock_reset);
	if (adev->autodump.dumping.done) {
		reinit_completion(&adev->autodump.dumping);
		ret = 0;
	} else {
		ret = -EBUSY;
	}
	mutex_unlock(&adev->lock_reset);

	return ret;
}

static int amdgpu_debugfs_autodump_release(struct inode *inode, struct file *file)
{
	struct amdgpu_device *adev = file->private_data;

	complete_all(&adev->autodump.dumping);
	return 0;
}

static unsigned int amdgpu_debugfs_autodump_poll(struct file *file, struct poll_table_struct *poll_table)
{
	struct amdgpu_device *adev = file->private_data;

	poll_wait(file, &adev->autodump.gpu_hang, poll_table);

	if (adev->in_gpu_reset)
		return POLLIN | POLLRDNORM | POLLWRNORM;

	return 0;
}

static const struct file_operations autodump_debug_fops = {
	.owner = THIS_MODULE,
	.open = amdgpu_debugfs_autodump_open,
	.poll = amdgpu_debugfs_autodump_poll,
	.release = amdgpu_debugfs_autodump_release,
};

static void amdgpu_debugfs_autodump_init(struct amdgpu_device *adev)
{
	init_completion(&adev->autodump.dumping);
	complete_all(&adev->autodump.dumping);
	init_waitqueue_head(&adev->autodump.gpu_hang);

	debugfs_create_file("amdgpu_autodump", 0600,
		adev->ddev->primary->debugfs_root,
		adev, &autodump_debug_fops);
}

/**
 * amdgpu_debugfs_process_reg_op - Handle MMIO register reads/writes
@@ -1434,6 +1508,8 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)

	amdgpu_ras_debugfs_create_all(adev);

	amdgpu_debugfs_autodump_init(adev);

	return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_list,
					ARRAY_SIZE(amdgpu_debugfs_list));
}
+6 −0
Original line number Diff line number Diff line
@@ -31,6 +31,11 @@ struct amdgpu_debugfs {
	unsigned		num_files;
};

struct amdgpu_autodump {
	struct completion		dumping;
	struct wait_queue_head		gpu_hang;
};

int amdgpu_debugfs_regs_init(struct amdgpu_device *adev);
int amdgpu_debugfs_init(struct amdgpu_device *adev);
void amdgpu_debugfs_fini(struct amdgpu_device *adev);
@@ -40,3 +45,4 @@ int amdgpu_debugfs_add_files(struct amdgpu_device *adev,
int amdgpu_debugfs_fence_init(struct amdgpu_device *adev);
int amdgpu_debugfs_firmware_init(struct amdgpu_device *adev);
int amdgpu_debugfs_gem_init(struct amdgpu_device *adev);
int amdgpu_debugfs_wait_dump(struct amdgpu_device *adev);
+2 −0
Original line number Diff line number Diff line
@@ -3927,6 +3927,8 @@ static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
	int i, r = 0;
	bool need_full_reset  = *need_full_reset_arg;

	amdgpu_debugfs_wait_dump(adev);

	/* block all schedulers and reset given job's ring */
	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
		struct amdgpu_ring *ring = adev->rings[i];
+10 −0
Original line number Diff line number Diff line
@@ -1188,3 +1188,13 @@ int amdgpu_dpm_set_df_cstate(struct amdgpu_device *adev,

	return ret;
}

int amdgpu_dpm_allow_xgmi_power_down(struct amdgpu_device *adev, bool en)
{
	struct smu_context *smu = &adev->smu;

	if (is_support_sw_smu(adev))
		return smu_allow_xgmi_power_down(smu, en);

	return 0;
}
 No newline at end of file
Loading