Commit fddc9fcb authored by Wu Hao's avatar Wu Hao Committed by Moritz Fischer
Browse files

fpga: dfl: fme: add power management support



This patch adds support for power management private feature under
FPGA Management Engine (FME). This private feature driver registers
a hwmon for power (power1_input), thresholds information, e.g.
(power1_max / crit / max_alarm / crit_alarm) and also read-only sysfs
interfaces for other power management information. For configuration,
user could write threshold values via above power1_max / crit sysfs
interface under hwmon too.

Signed-off-by: default avatarLuwei Kang <luwei.kang@intel.com>
Signed-off-by: default avatarXu Yilun <yilun.xu@intel.com>
Signed-off-by: default avatarWu Hao <hao.wu@intel.com>
Acked-by: default avatarGuenter Roeck <linux@roeck-us.net>
Reviewed-by: default avatarMoritz Fischer <mdf@kernel.org>
Signed-off-by: default avatarMoritz Fischer <mdf@kernel.org>
parent 4284c65a
Loading
Loading
Loading
Loading
+68 −0
Original line number Diff line number Diff line
@@ -114,6 +114,7 @@ Contact: Wu Hao <hao.wu@intel.com>
Description:	Read-Only. Read this file to get the name of hwmon device, it
		supports values:
		    'dfl_fme_thermal' - thermal hwmon device name
		    'dfl_fme_power'   - power hwmon device name

What:		/sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/temp1_input
Date:		October 2019
@@ -170,3 +171,70 @@ Description: Read-Only. Read this file to get the policy of hardware threshold1
		(see 'temp1_max'). It only supports two values (policies):
		    0 - AP2 state (90% throttling)
		    1 - AP1 state (50% throttling)

What:		/sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/power1_input
Date:		October 2019
KernelVersion:	5.5
Contact:	Wu Hao <hao.wu@intel.com>
Description:	Read-Only. It returns current FPGA power consumption in uW.

What:		/sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/power1_max
Date:		October 2019
KernelVersion:	5.5
Contact:	Wu Hao <hao.wu@intel.com>
Description:	Read-Write. Read this file to get current hardware power
		threshold1 in uW. If power consumption rises at or above
		this threshold, hardware starts 50% throttling.
		Write this file to set current hardware power threshold1 in uW.
		As hardware only accepts values in Watts, so input value will
		be round down per Watts (< 1 watts part will be discarded) and
		clamped within the range from 0 to 127 Watts. Write fails with
		-EINVAL if input parsing fails.

What:		/sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/power1_crit
Date:		October 2019
KernelVersion:	5.5
Contact:	Wu Hao <hao.wu@intel.com>
Description:	Read-Write. Read this file to get current hardware power
		threshold2 in uW. If power consumption rises at or above
		this threshold, hardware starts 90% throttling.
		Write this file to set current hardware power threshold2 in uW.
		As hardware only accepts values in Watts, so input value will
		be round down per Watts (< 1 watts part will be discarded) and
		clamped within the range from 0 to 127 Watts. Write fails with
		-EINVAL if input parsing fails.

What:		/sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/power1_max_alarm
Date:		October 2019
KernelVersion:	5.5
Contact:	Wu Hao <hao.wu@intel.com>
Description:	Read-only. It returns 1 if power consumption is currently at or
		above hardware threshold1 (see 'power1_max'), otherwise 0.

What:		/sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/power1_crit_alarm
Date:		October 2019
KernelVersion:	5.5
Contact:	Wu Hao <hao.wu@intel.com>
Description:	Read-only. It returns 1 if power consumption is currently at or
		above hardware threshold2 (see 'power1_crit'), otherwise 0.

What:		/sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/power1_xeon_limit
Date:		October 2019
KernelVersion:	5.5
Contact:	Wu Hao <hao.wu@intel.com>
Description:	Read-Only. It returns power limit for XEON in uW.

What:		/sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/power1_fpga_limit
Date:		October 2019
KernelVersion:	5.5
Contact:	Wu Hao <hao.wu@intel.com>
Description:	Read-Only. It returns power limit for FPGA in uW.

What:		/sys/bus/platform/devices/dfl-fme.0/hwmon/hwmonX/power1_ltr
Date:		October 2019
KernelVersion:	5.5
Contact:	Wu Hao <hao.wu@intel.com>
Description:	Read-only. Read this file to get current Latency Tolerance
		Reporting (ltr) value. It returns 1 if all Accelerated
		Function Units (AFUs) can tolerate latency >= 40us for memory
		access or 0 if any AFU is latency sensitive (< 40us).
+207 −0
Original line number Diff line number Diff line
@@ -355,6 +355,209 @@ static const struct dfl_feature_ops fme_thermal_mgmt_ops = {
	.init = fme_thermal_mgmt_init,
};

#define FME_PWR_STATUS		0x8
#define FME_LATENCY_TOLERANCE	BIT_ULL(18)
#define PWR_CONSUMED		GENMASK_ULL(17, 0)

#define FME_PWR_THRESHOLD	0x10
#define PWR_THRESHOLD1		GENMASK_ULL(6, 0)	/* in Watts */
#define PWR_THRESHOLD2		GENMASK_ULL(14, 8)	/* in Watts */
#define PWR_THRESHOLD_MAX	0x7f			/* in Watts */
#define PWR_THRESHOLD1_STATUS	BIT_ULL(16)
#define PWR_THRESHOLD2_STATUS	BIT_ULL(17)

#define FME_PWR_XEON_LIMIT	0x18
#define XEON_PWR_LIMIT		GENMASK_ULL(14, 0)	/* in 0.1 Watts */
#define XEON_PWR_EN		BIT_ULL(15)
#define FME_PWR_FPGA_LIMIT	0x20
#define FPGA_PWR_LIMIT		GENMASK_ULL(14, 0)	/* in 0.1 Watts */
#define FPGA_PWR_EN		BIT_ULL(15)

static int power_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
			    u32 attr, int channel, long *val)
{
	struct dfl_feature *feature = dev_get_drvdata(dev);
	u64 v;

	switch (attr) {
	case hwmon_power_input:
		v = readq(feature->ioaddr + FME_PWR_STATUS);
		*val = (long)(FIELD_GET(PWR_CONSUMED, v) * 1000000);
		break;
	case hwmon_power_max:
		v = readq(feature->ioaddr + FME_PWR_THRESHOLD);
		*val = (long)(FIELD_GET(PWR_THRESHOLD1, v) * 1000000);
		break;
	case hwmon_power_crit:
		v = readq(feature->ioaddr + FME_PWR_THRESHOLD);
		*val = (long)(FIELD_GET(PWR_THRESHOLD2, v) * 1000000);
		break;
	case hwmon_power_max_alarm:
		v = readq(feature->ioaddr + FME_PWR_THRESHOLD);
		*val = (long)FIELD_GET(PWR_THRESHOLD1_STATUS, v);
		break;
	case hwmon_power_crit_alarm:
		v = readq(feature->ioaddr + FME_PWR_THRESHOLD);
		*val = (long)FIELD_GET(PWR_THRESHOLD2_STATUS, v);
		break;
	default:
		return -EOPNOTSUPP;
	}

	return 0;
}

static int power_hwmon_write(struct device *dev, enum hwmon_sensor_types type,
			     u32 attr, int channel, long val)
{
	struct dfl_feature_platform_data *pdata = dev_get_platdata(dev->parent);
	struct dfl_feature *feature = dev_get_drvdata(dev);
	int ret = 0;
	u64 v;

	val = clamp_val(val / 1000000, 0, PWR_THRESHOLD_MAX);

	mutex_lock(&pdata->lock);

	switch (attr) {
	case hwmon_power_max:
		v = readq(feature->ioaddr + FME_PWR_THRESHOLD);
		v &= ~PWR_THRESHOLD1;
		v |= FIELD_PREP(PWR_THRESHOLD1, val);
		writeq(v, feature->ioaddr + FME_PWR_THRESHOLD);
		break;
	case hwmon_power_crit:
		v = readq(feature->ioaddr + FME_PWR_THRESHOLD);
		v &= ~PWR_THRESHOLD2;
		v |= FIELD_PREP(PWR_THRESHOLD2, val);
		writeq(v, feature->ioaddr + FME_PWR_THRESHOLD);
		break;
	default:
		ret = -EOPNOTSUPP;
		break;
	}

	mutex_unlock(&pdata->lock);

	return ret;
}

static umode_t power_hwmon_attrs_visible(const void *drvdata,
					 enum hwmon_sensor_types type,
					 u32 attr, int channel)
{
	switch (attr) {
	case hwmon_power_input:
	case hwmon_power_max_alarm:
	case hwmon_power_crit_alarm:
		return 0444;
	case hwmon_power_max:
	case hwmon_power_crit:
		return 0644;
	}

	return 0;
}

static const struct hwmon_ops power_hwmon_ops = {
	.is_visible = power_hwmon_attrs_visible,
	.read = power_hwmon_read,
	.write = power_hwmon_write,
};

static const struct hwmon_channel_info *power_hwmon_info[] = {
	HWMON_CHANNEL_INFO(power, HWMON_P_INPUT |
				  HWMON_P_MAX   | HWMON_P_MAX_ALARM |
				  HWMON_P_CRIT  | HWMON_P_CRIT_ALARM),
	NULL
};

static const struct hwmon_chip_info power_hwmon_chip_info = {
	.ops = &power_hwmon_ops,
	.info = power_hwmon_info,
};

static ssize_t power1_xeon_limit_show(struct device *dev,
				      struct device_attribute *attr, char *buf)
{
	struct dfl_feature *feature = dev_get_drvdata(dev);
	u16 xeon_limit = 0;
	u64 v;

	v = readq(feature->ioaddr + FME_PWR_XEON_LIMIT);

	if (FIELD_GET(XEON_PWR_EN, v))
		xeon_limit = FIELD_GET(XEON_PWR_LIMIT, v);

	return sprintf(buf, "%u\n", xeon_limit * 100000);
}

static ssize_t power1_fpga_limit_show(struct device *dev,
				      struct device_attribute *attr, char *buf)
{
	struct dfl_feature *feature = dev_get_drvdata(dev);
	u16 fpga_limit = 0;
	u64 v;

	v = readq(feature->ioaddr + FME_PWR_FPGA_LIMIT);

	if (FIELD_GET(FPGA_PWR_EN, v))
		fpga_limit = FIELD_GET(FPGA_PWR_LIMIT, v);

	return sprintf(buf, "%u\n", fpga_limit * 100000);
}

static ssize_t power1_ltr_show(struct device *dev,
			       struct device_attribute *attr, char *buf)
{
	struct dfl_feature *feature = dev_get_drvdata(dev);
	u64 v;

	v = readq(feature->ioaddr + FME_PWR_STATUS);

	return sprintf(buf, "%u\n",
		       (unsigned int)FIELD_GET(FME_LATENCY_TOLERANCE, v));
}

static DEVICE_ATTR_RO(power1_xeon_limit);
static DEVICE_ATTR_RO(power1_fpga_limit);
static DEVICE_ATTR_RO(power1_ltr);

static struct attribute *power_extra_attrs[] = {
	&dev_attr_power1_xeon_limit.attr,
	&dev_attr_power1_fpga_limit.attr,
	&dev_attr_power1_ltr.attr,
	NULL
};

ATTRIBUTE_GROUPS(power_extra);

static int fme_power_mgmt_init(struct platform_device *pdev,
			       struct dfl_feature *feature)
{
	struct device *hwmon;

	hwmon = devm_hwmon_device_register_with_info(&pdev->dev,
						     "dfl_fme_power", feature,
						     &power_hwmon_chip_info,
						     power_extra_groups);
	if (IS_ERR(hwmon)) {
		dev_err(&pdev->dev, "Fail to register power hwmon\n");
		return PTR_ERR(hwmon);
	}

	return 0;
}

static const struct dfl_feature_id fme_power_mgmt_id_table[] = {
	{.id = FME_FEATURE_ID_POWER_MGMT,},
	{0,}
};

static const struct dfl_feature_ops fme_power_mgmt_ops = {
	.init = fme_power_mgmt_init,
};

static struct dfl_feature_driver fme_feature_drvs[] = {
	{
		.id_table = fme_hdr_id_table,
@@ -372,6 +575,10 @@ static struct dfl_feature_driver fme_feature_drvs[] = {
		.id_table = fme_thermal_mgmt_id_table,
		.ops = &fme_thermal_mgmt_ops,
	},
	{
		.id_table = fme_power_mgmt_id_table,
		.ops = &fme_power_mgmt_ops,
	},
	{
		.ops = NULL,
	},