Commit b265bdbd authored by Evan Quan's avatar Evan Quan Committed by Alex Deucher
Browse files

drm/amdgpu: added a sysfs interface for thermal throttling related V4



User can check and set the enablement of throttling logging and
the interval between each logging.

V2: simplify the sysfs interface(no string parsing)
V3: add proper lock protection on updating throttling_logging_rs.interval
V4: documentation cosmetic per Luben's suggestion

Signed-off-by: default avatarEvan Quan <evan.quan@amd.com>
Reviewed-by: default avatarChristian König <christian.koenig@amd.com>
Reviewed-by: default avatarLuben Tuikov <luben.tuikov@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent bcdc7c05
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -993,6 +993,9 @@ struct amdgpu_device {
	char				serial[16];

	struct amdgpu_autodump		autodump;

	atomic_t			throttling_logging_enabled;
	struct ratelimit_state		throttling_logging_rs;
};

static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)
+11 −0
Original line number Diff line number Diff line
@@ -3035,6 +3035,17 @@ int amdgpu_device_init(struct amdgpu_device *adev,
	adev->gfx.gfx_off_req_count = 1;
	adev->pm.ac_power = power_supply_is_system_supplied() > 0;

	atomic_set(&adev->throttling_logging_enabled, 1);
	/*
	 * If throttling continues, logging will be performed every minute
	 * to avoid log flooding. "-1" is subtracted since the thermal
	 * throttling interrupt comes every second. Thus, the total logging
	 * interval is 59 seconds(retelimited printk interval) + 1(waiting
	 * for throttling interrupt) = 60 seconds.
	 */
	ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
	ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);

	/* Registers mapping */
	/* TODO: block userspace mapping of io register */
	if (adev->asic_type >= CHIP_BONAIRE) {
+68 −0
Original line number Diff line number Diff line
@@ -1808,6 +1808,73 @@ static ssize_t amdgpu_get_unique_id(struct device *dev,
	return 0;
}

/**
 * DOC: thermal_throttling_logging
 *
 * Thermal throttling pulls down the clock frequency and thus the performance.
 * It's an useful mechanism to protect the chip from overheating. Since it
 * impacts performance, the user controls whether it is enabled and if so,
 * the log frequency.
 *
 * Reading back the file shows you the status(enabled or disabled) and
 * the interval(in seconds) between each thermal logging.
 *
 * Writing an integer to the file, sets a new logging interval, in seconds.
 * The value should be between 1 and 3600. If the value is less than 1,
 * thermal logging is disabled. Values greater than 3600 are ignored.
 */
static ssize_t amdgpu_get_thermal_throttling_logging(struct device *dev,
						     struct device_attribute *attr,
						     char *buf)
{
	struct drm_device *ddev = dev_get_drvdata(dev);
	struct amdgpu_device *adev = ddev->dev_private;

	return snprintf(buf, PAGE_SIZE, "%s: thermal throttling logging %s, with interval %d seconds\n",
			adev->ddev->unique,
			atomic_read(&adev->throttling_logging_enabled) ? "enabled" : "disabled",
			adev->throttling_logging_rs.interval / HZ + 1);
}

static ssize_t amdgpu_set_thermal_throttling_logging(struct device *dev,
						     struct device_attribute *attr,
						     const char *buf,
						     size_t count)
{
	struct drm_device *ddev = dev_get_drvdata(dev);
	struct amdgpu_device *adev = ddev->dev_private;
	long throttling_logging_interval;
	unsigned long flags;
	int ret = 0;

	ret = kstrtol(buf, 0, &throttling_logging_interval);
	if (ret)
		return ret;

	if (throttling_logging_interval > 3600)
		return -EINVAL;

	if (throttling_logging_interval > 0) {
		raw_spin_lock_irqsave(&adev->throttling_logging_rs.lock, flags);
		/*
		 * Reset the ratelimit timer internals.
		 * This can effectively restart the timer.
		 */
		adev->throttling_logging_rs.interval =
			(throttling_logging_interval - 1) * HZ;
		adev->throttling_logging_rs.begin = 0;
		adev->throttling_logging_rs.printed = 0;
		adev->throttling_logging_rs.missed = 0;
		raw_spin_unlock_irqrestore(&adev->throttling_logging_rs.lock, flags);

		atomic_set(&adev->throttling_logging_enabled, 1);
	} else {
		atomic_set(&adev->throttling_logging_enabled, 0);
	}

	return count;
}

static struct amdgpu_device_attr amdgpu_device_attrs[] = {
	AMDGPU_DEVICE_ATTR_RW(power_dpm_state,				ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
	AMDGPU_DEVICE_ATTR_RW(power_dpm_force_performance_level,	ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
@@ -1830,6 +1897,7 @@ static struct amdgpu_device_attr amdgpu_device_attrs[] = {
	AMDGPU_DEVICE_ATTR_RO(pcie_bw,					ATTR_FLAG_BASIC),
	AMDGPU_DEVICE_ATTR_RW(pp_features,				ATTR_FLAG_BASIC),
	AMDGPU_DEVICE_ATTR_RO(unique_id,				ATTR_FLAG_BASIC),
	AMDGPU_DEVICE_ATTR_RW(thermal_throttling_logging,		ATTR_FLAG_BASIC),
};

static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_attr *attr,
+4 −6
Original line number Diff line number Diff line
@@ -1533,11 +1533,6 @@ static int smu_v11_0_irq_process(struct amdgpu_device *adev,
	 */
	uint32_t ctxid = entry->src_data[0];
	uint32_t data;
	/*
	 * if the throttling continues, the logging will be performed every
	 * minute to avoid log flooding.
	 */
	static DEFINE_RATELIMIT_STATE(ratelimit_state, 60 * HZ, 1);

	if (client_id == SOC15_IH_CLIENTID_THM) {
		switch (src_id) {
@@ -1582,7 +1577,10 @@ static int smu_v11_0_irq_process(struct amdgpu_device *adev,
				smu_v11_0_ack_ac_dc_interrupt(&adev->smu);
				break;
			case 0x7:
				if (__ratelimit(&ratelimit_state))
				if (!atomic_read(&adev->throttling_logging_enabled))
					return 0;

				if (__ratelimit(&adev->throttling_logging_rs))
					smu_log_thermal_throttling(smu);

				break;