Commit e4cf4bf5 authored by Jonathan Kim's avatar Jonathan Kim Committed by Alex Deucher
Browse files

drm/amdgpu: update df_v3_6 for xgmi perfmons (v2)



add pmu attribute groups and structures for perf events.
add sysfs to track available df perfmon counters
fix overflow handling in perfmon counter reads.

v2: squash in fix (Alex)

Signed-off-by: default avatarJonathan Kim <Jonathan.Kim@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 496091fa
Loading
Loading
Loading
Loading
+18 −0
Original line number Diff line number Diff line
@@ -679,6 +679,7 @@ struct amdgpu_nbio_funcs {

struct amdgpu_df_funcs {
	void (*init)(struct amdgpu_device *adev);
	void (*sw_init)(struct amdgpu_device *adev);
	void (*enable_broadcast_mode)(struct amdgpu_device *adev,
				      bool enable);
	u32 (*get_fb_channel_number)(struct amdgpu_device *adev);
@@ -729,6 +730,7 @@ struct amd_powerplay {
};

#define AMDGPU_RESET_MAGIC_NUM 64
#define AMDGPU_MAX_DF_PERFMONS 4
struct amdgpu_device {
	struct device			*dev;
	struct drm_device		*ddev;
@@ -959,6 +961,7 @@ struct amdgpu_device {
	long				compute_timeout;

	uint64_t			unique_id;
	uint64_t	df_perfmon_config_assign_mask[AMDGPU_MAX_DF_PERFMONS];
};

static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)
@@ -1198,4 +1201,19 @@ static inline int amdgpu_dm_display_resume(struct amdgpu_device *adev) { return
#endif

#include "amdgpu_object.h"

/* used by df_v3_6.c and amdgpu_pmu.c */
#define AMDGPU_PMU_ATTR(_name, _object)					\
static ssize_t								\
_name##_show(struct device *dev,					\
			       struct device_attribute *attr,		\
			       char *page)				\
{									\
	BUILD_BUG_ON(sizeof(_object) >= PAGE_SIZE - 1);			\
	return sprintf(page, _object "\n");				\
}									\
									\
static struct device_attribute pmu_attr_##_name = __ATTR_RO(_name)

#endif
+197 −243
Original line number Diff line number Diff line
@@ -30,8 +30,104 @@
static u32 df_v3_6_channel_number[] = {1, 2, 0, 4, 0, 8, 0,
				       16, 32, 0, 0, 0, 2, 4, 8};

static void df_v3_6_init(struct amdgpu_device *adev)
/* init df format attrs */
AMDGPU_PMU_ATTR(event,		"config:0-7");
AMDGPU_PMU_ATTR(instance,	"config:8-15");
AMDGPU_PMU_ATTR(umask,		"config:16-23");

/* df format attributes  */
static struct attribute *df_v3_6_format_attrs[] = {
	&pmu_attr_event.attr,
	&pmu_attr_instance.attr,
	&pmu_attr_umask.attr,
	NULL
};

/* df format attribute group */
static struct attribute_group df_v3_6_format_attr_group = {
	.name = "format",
	.attrs = df_v3_6_format_attrs,
};

/* df event attrs */
AMDGPU_PMU_ATTR(cake0_pcsout_txdata,
		      "event=0x7,instance=0x46,umask=0x2");
AMDGPU_PMU_ATTR(cake1_pcsout_txdata,
		      "event=0x7,instance=0x47,umask=0x2");
AMDGPU_PMU_ATTR(cake0_pcsout_txmeta,
		      "event=0x7,instance=0x46,umask=0x4");
AMDGPU_PMU_ATTR(cake1_pcsout_txmeta,
		      "event=0x7,instance=0x47,umask=0x4");
AMDGPU_PMU_ATTR(cake0_ftiinstat_reqalloc,
		      "event=0xb,instance=0x46,umask=0x4");
AMDGPU_PMU_ATTR(cake1_ftiinstat_reqalloc,
		      "event=0xb,instance=0x47,umask=0x4");
AMDGPU_PMU_ATTR(cake0_ftiinstat_rspalloc,
		      "event=0xb,instance=0x46,umask=0x8");
AMDGPU_PMU_ATTR(cake1_ftiinstat_rspalloc,
		      "event=0xb,instance=0x47,umask=0x8");

/* df event attributes  */
static struct attribute *df_v3_6_event_attrs[] = {
	&pmu_attr_cake0_pcsout_txdata.attr,
	&pmu_attr_cake1_pcsout_txdata.attr,
	&pmu_attr_cake0_pcsout_txmeta.attr,
	&pmu_attr_cake1_pcsout_txmeta.attr,
	&pmu_attr_cake0_ftiinstat_reqalloc.attr,
	&pmu_attr_cake1_ftiinstat_reqalloc.attr,
	&pmu_attr_cake0_ftiinstat_rspalloc.attr,
	&pmu_attr_cake1_ftiinstat_rspalloc.attr,
	NULL
};

/* df event attribute group */
static struct attribute_group df_v3_6_event_attr_group = {
	.name = "events",
	.attrs = df_v3_6_event_attrs
};

/* df event attr groups  */
const struct attribute_group *df_v3_6_attr_groups[] = {
		&df_v3_6_format_attr_group,
		&df_v3_6_event_attr_group,
		NULL
};

/* get the number of df counters available */
static ssize_t df_v3_6_get_df_cntr_avail(struct device *dev,
		struct device_attribute *attr,
		char *buf)
{
	struct amdgpu_device *adev;
	struct drm_device *ddev;
	int i, count;

	ddev = dev_get_drvdata(dev);
	adev = ddev->dev_private;
	count = 0;

	for (i = 0; i < DF_V3_6_MAX_COUNTERS; i++) {
		if (adev->df_perfmon_config_assign_mask[i] == 0)
			count++;
	}

	return snprintf(buf, PAGE_SIZE,	"%i\n", count);
}

/* device attr for available perfmon counters */
static DEVICE_ATTR(df_cntr_avail, S_IRUGO, df_v3_6_get_df_cntr_avail, NULL);

/* init perfmons */
static void df_v3_6_sw_init(struct amdgpu_device *adev)
{
	int i, ret;

	ret = device_create_file(adev->dev, &dev_attr_df_cntr_avail);
	if (ret)
		DRM_ERROR("failed to create file for available df counters\n");

	for (i = 0; i < AMDGPU_MAX_DF_PERFMONS; i++)
		adev->df_perfmon_config_assign_mask[i] = 0;
}

static void df_v3_6_enable_broadcast_mode(struct amdgpu_device *adev,
@@ -105,28 +201,19 @@ static void df_v3_6_get_clockgating_state(struct amdgpu_device *adev,
		*flags |= AMD_CG_SUPPORT_DF_MGCG;
}

/* hold counter assignment per gpu struct */
struct df_v3_6_event_mask {
		struct amdgpu_device gpu;
		uint64_t config_assign_mask[AMDGPU_DF_MAX_COUNTERS];
};

/* get assigned df perfmon ctr as int */
static void df_v3_6_pmc_config_2_cntr(struct amdgpu_device *adev,
				      uint64_t config,
				      int *counter)
static int df_v3_6_pmc_config_2_cntr(struct amdgpu_device *adev,
				      uint64_t config)
{
	struct df_v3_6_event_mask *mask;
	int i;

	mask = container_of(adev, struct df_v3_6_event_mask, gpu);

	for (i = 0; i < AMDGPU_DF_MAX_COUNTERS; i++) {
		if ((config & 0x0FFFFFFUL) == mask->config_assign_mask[i]) {
			*counter = i;
			return;
		}
	for (i = 0; i < DF_V3_6_MAX_COUNTERS; i++) {
		if ((config & 0x0FFFFFFUL) ==
					adev->df_perfmon_config_assign_mask[i])
			return i;
	}

	return -EINVAL;
}

/* get address based on counter assignment */
@@ -136,10 +223,7 @@ static void df_v3_6_pmc_get_addr(struct amdgpu_device *adev,
				 uint32_t *lo_base_addr,
				 uint32_t *hi_base_addr)
{

	int target_cntr = -1;

	df_v3_6_pmc_config_2_cntr(adev, config, &target_cntr);
	int target_cntr = df_v3_6_pmc_config_2_cntr(adev, config);

	if (target_cntr < 0)
		return;
@@ -184,33 +268,29 @@ static int df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device *adev,
					  uint32_t *lo_val,
					  uint32_t *hi_val)
{

	uint32_t eventsel, instance, unitmask;
	uint32_t es_5_0, es_13_0, es_13_6, es_13_12, es_11_8, es_7_0;

	df_v3_6_pmc_get_addr(adev, config, 1, lo_base_addr, hi_base_addr);

	if (lo_val == NULL || hi_val == NULL)
		return -EINVAL;

	if ((*lo_base_addr == 0) || (*hi_base_addr == 0)) {
		DRM_ERROR("DF PMC addressing not retrieved! Lo: %x, Hi: %x",
		DRM_ERROR("[DF PMC] addressing not retrieved! Lo: %x, Hi: %x",
				*lo_base_addr, *hi_base_addr);
		return -ENXIO;
	}

	eventsel = GET_EVENT(config);
	instance = GET_INSTANCE(config);
	unitmask = GET_UNITMASK(config);
	if (lo_val && hi_val) {
		uint32_t eventsel, instance, unitmask;
		uint32_t instance_10, instance_5432, instance_76;

		eventsel = DF_V3_6_GET_EVENT(config) & 0x3f;
		unitmask = DF_V3_6_GET_UNITMASK(config) & 0xf;
		instance = DF_V3_6_GET_INSTANCE(config);

	es_5_0 = eventsel & 0x3FUL;
	es_13_6 = instance;
	es_13_0 = (es_13_6 << 6) + es_5_0;
	es_13_12 = (es_13_0 & 0x03000UL) >> 12;
	es_11_8 = (es_13_0 & 0x0F00UL) >> 8;
	es_7_0 = es_13_0 & 0x0FFUL;
	*lo_val = (es_7_0 & 0xFFUL) | ((unitmask & 0x0FUL) << 8);
	*hi_val = (es_11_8 | ((es_13_12)<<(29)));
		instance_10 = instance & 0x3;
		instance_5432 = (instance >> 2) & 0xf;
		instance_76 = (instance >> 6) & 0x3;

		*lo_val = (unitmask << 8) | (instance_10 << 6) | eventsel;
		*hi_val = (instance_76 << 29) | instance_5432;
	}

	return 0;
}
@@ -220,26 +300,21 @@ static int df_v3_6_pmc_assign_cntr(struct amdgpu_device *adev,
				   uint64_t config,
				   int *is_assigned)
{

	struct df_v3_6_event_mask *mask;
	int i, target_cntr;

	target_cntr = -1;

	*is_assigned = 0;

	df_v3_6_pmc_config_2_cntr(adev, config, &target_cntr);
	target_cntr = df_v3_6_pmc_config_2_cntr(adev, config);

	if (target_cntr >= 0) {
		*is_assigned = 1;
		return 0;
	}

	mask = container_of(adev, struct df_v3_6_event_mask, gpu);

	for (i = 0; i < AMDGPU_DF_MAX_COUNTERS; i++) {
		if (mask->config_assign_mask[i] == 0ULL) {
			mask->config_assign_mask[i] = config & 0x0FFFFFFUL;
	for (i = 0; i < DF_V3_6_MAX_COUNTERS; i++) {
		if (adev->df_perfmon_config_assign_mask[i] == 0U) {
			adev->df_perfmon_config_assign_mask[i] =
							config & 0x0FFFFFFUL;
			return 0;
		}
	}
@@ -251,66 +326,17 @@ static int df_v3_6_pmc_assign_cntr(struct amdgpu_device *adev,
static void df_v3_6_pmc_release_cntr(struct amdgpu_device *adev,
				     uint64_t config)
{

	struct df_v3_6_event_mask *mask;
	int target_cntr;

	target_cntr = -1;

	df_v3_6_pmc_config_2_cntr(adev, config, &target_cntr);

	mask = container_of(adev, struct df_v3_6_event_mask, gpu);
	int target_cntr = df_v3_6_pmc_config_2_cntr(adev, config);

	if (target_cntr >= 0)
		mask->config_assign_mask[target_cntr] = 0ULL;

		adev->df_perfmon_config_assign_mask[target_cntr] = 0ULL;
}

/*
 * get xgmi link counters via programmable data fabric (df) counters (max 4)
 * using cake tx event.
 *
 * @adev -> amdgpu device
 * @instance-> currently cake has 2 links to poll on vega20
 * @count -> counters to pass
 *
 */

static void df_v3_6_get_xgmi_link_cntr(struct amdgpu_device *adev,
				       int instance,
				       uint64_t *count)
{
	uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
	uint64_t config;

	config = GET_INSTANCE_CONFIG(instance);

	df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr,
				      &hi_base_addr);

	if ((lo_base_addr == 0) || (hi_base_addr == 0))
		return;

	lo_val = RREG32_PCIE(lo_base_addr);
	hi_val = RREG32_PCIE(hi_base_addr);

	*count  = ((hi_val | 0ULL) << 32) | (lo_val | 0ULL);
}

/*
 * reset xgmi link counters
 *
 * @adev -> amdgpu device
 * @instance-> currently cake has 2 links to poll on vega20
 *
 */
static void df_v3_6_reset_xgmi_link_cntr(struct amdgpu_device *adev,
					 int instance)
static void df_v3_6_reset_perfmon_cntr(struct amdgpu_device *adev,
					 uint64_t config)
{
	uint32_t lo_base_addr, hi_base_addr;
	uint64_t config;

	config = 0ULL | (0x7ULL) | ((0x46ULL + instance) << 8) | (0x2 << 16);

	df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr,
				      &hi_base_addr);
@@ -322,26 +348,13 @@ static void df_v3_6_reset_xgmi_link_cntr(struct amdgpu_device *adev,
	WREG32_PCIE(hi_base_addr, 0UL);
}

/*
 * add xgmi link counters
 *
 * @adev -> amdgpu device
 * @instance-> currently cake has 2 links to poll on vega20
 *
 */

static int df_v3_6_add_xgmi_link_cntr(struct amdgpu_device *adev,
				      int instance)
static int df_v3_6_add_perfmon_cntr(struct amdgpu_device *adev,
				      uint64_t config)
{
	uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
	uint64_t config;
	int ret, is_assigned;

	if (instance < 0 || instance > 1)
		return -EINVAL;

	config = GET_INSTANCE_CONFIG(instance);

	ret = df_v3_6_pmc_assign_cntr(adev, config, &is_assigned);

	if (ret || is_assigned)
@@ -357,125 +370,83 @@ static int df_v3_6_add_xgmi_link_cntr(struct amdgpu_device *adev,
	if (ret)
		return ret;

	DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x:%08x",
			config, lo_base_addr, hi_base_addr, lo_val, hi_val);

	WREG32_PCIE(lo_base_addr, lo_val);
	WREG32_PCIE(hi_base_addr, hi_val);

	return ret;
}


/*
 * start xgmi link counters
 *
 * @adev -> amdgpu device
 * @instance-> currently cake has 2 links to poll on vega20
 * @is_enable -> either resume or assign event via df perfmon
 *
 */

static int df_v3_6_start_xgmi_link_cntr(struct amdgpu_device *adev,
					int instance,
static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config,
			     int is_enable)
{
	uint32_t lo_base_addr, hi_base_addr, lo_val;
	uint64_t config;
	int ret;

	if (instance < 0 || instance > 1)
		return -EINVAL;

	if (is_enable) {
	int ret = 0;

		ret = df_v3_6_add_xgmi_link_cntr(adev, instance);
	switch (adev->asic_type) {
	case CHIP_VEGA20:

		if (ret)
			return ret;
		df_v3_6_reset_perfmon_cntr(adev, config);

		if (is_enable) {
			ret = df_v3_6_add_perfmon_cntr(adev, config);
		} else {

		config = GET_INSTANCE_CONFIG(instance);

		df_v3_6_pmc_get_ctrl_settings(adev,
			ret = df_v3_6_pmc_get_ctrl_settings(adev,
					config,
					&lo_base_addr,
					&hi_base_addr,
					NULL,
					NULL);

		if (lo_base_addr == 0)
			return -EINVAL;
			if (ret)
				return ret;

			lo_val = RREG32_PCIE(lo_base_addr);

			DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x",
				config, lo_base_addr, hi_base_addr, lo_val);

			WREG32_PCIE(lo_base_addr, lo_val | (1ULL << 22));
		}

		ret = 0;
		break;
	default:
		break;
	}

	return ret;

}

/*
 * start xgmi link counters
 *
 * @adev -> amdgpu device
 * @instance-> currently cake has 2 links to poll on vega20
 * @is_enable -> either pause or unassign event via df perfmon
 *
 */

static int df_v3_6_stop_xgmi_link_cntr(struct amdgpu_device *adev,
				       int instance,
static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config,
			    int is_disable)
{

	uint32_t lo_base_addr, hi_base_addr, lo_val;
	uint64_t config;

	config = GET_INSTANCE_CONFIG(instance);

	if (is_disable) {
		df_v3_6_reset_xgmi_link_cntr(adev, instance);
		df_v3_6_pmc_release_cntr(adev, config);
	} else {
	int ret = 0;

		df_v3_6_pmc_get_ctrl_settings(adev,
	switch (adev->asic_type) {
	case CHIP_VEGA20:
		ret = df_v3_6_pmc_get_ctrl_settings(adev,
			config,
			&lo_base_addr,
			&hi_base_addr,
			NULL,
			NULL);

		if ((lo_base_addr == 0) || (hi_base_addr == 0))
			return -EINVAL;
		if (ret)
			return ret;

		lo_val = RREG32_PCIE(lo_base_addr);

		WREG32_PCIE(lo_base_addr, lo_val & ~(1ULL << 22));
	}
		DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x",
				config, lo_base_addr, hi_base_addr, lo_val);

	return 0;
}

static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config,
			     int is_enable)
{
	int xgmi_tx_link, ret = 0;

	switch (adev->asic_type) {
	case CHIP_VEGA20:
		xgmi_tx_link = IS_DF_XGMI_0_TX(config) ? 0
					: (IS_DF_XGMI_1_TX(config) ? 1 : -1);

		if (xgmi_tx_link >= 0)
			ret = df_v3_6_start_xgmi_link_cntr(adev, xgmi_tx_link,
						      is_enable);
		WREG32_PCIE(lo_base_addr, lo_val & ~(1ULL << 22));

		if (ret)
			return ret;
		if (is_disable)
			df_v3_6_pmc_release_cntr(adev, config);

		ret = 0;
		break;
	default:
		break;
@@ -484,59 +455,42 @@ static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config,
	return ret;
}

static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config,
			    int is_disable)
static void df_v3_6_pmc_get_count(struct amdgpu_device *adev,
				  uint64_t config,
				  uint64_t *count)
{
	int xgmi_tx_link, ret = 0;
	uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
	*count = 0;

	switch (adev->asic_type) {
	case CHIP_VEGA20:
			xgmi_tx_link = IS_DF_XGMI_0_TX(config) ? 0
				: (IS_DF_XGMI_1_TX(config) ? 1 : -1);

			if (xgmi_tx_link >= 0) {
				ret = df_v3_6_stop_xgmi_link_cntr(adev,
								  xgmi_tx_link,
								  is_disable);
				if (ret)
					return ret;
			}

			ret = 0;
			break;
	default:
		break;
	}
		df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr,
				      &hi_base_addr);

	return ret;
}
		if ((lo_base_addr == 0) || (hi_base_addr == 0))
			return;

static void df_v3_6_pmc_get_count(struct amdgpu_device *adev,
				  uint64_t config,
				  uint64_t *count)
{
		lo_val = RREG32_PCIE(lo_base_addr);
		hi_val = RREG32_PCIE(hi_base_addr);

	int xgmi_tx_link;
		*count  = ((hi_val | 0ULL) << 32) | (lo_val | 0ULL);

	switch (adev->asic_type) {
	case CHIP_VEGA20:
		xgmi_tx_link = IS_DF_XGMI_0_TX(config) ? 0
					: (IS_DF_XGMI_1_TX(config) ? 1 : -1);
		if (*count >= DF_V3_6_PERFMON_OVERFLOW)
			*count = 0;

		if (xgmi_tx_link >= 0) {
			df_v3_6_reset_xgmi_link_cntr(adev, xgmi_tx_link);
			df_v3_6_get_xgmi_link_cntr(adev, xgmi_tx_link, count);
		}
		DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x:%08x",
			config, lo_base_addr, hi_base_addr, lo_val, hi_val);

		break;

	default:
		break;
	}

}

const struct amdgpu_df_funcs df_v3_6_funcs = {
	.init = df_v3_6_init,
	.sw_init = df_v3_6_sw_init,
	.enable_broadcast_mode = df_v3_6_enable_broadcast_mode,
	.get_fb_channel_number = df_v3_6_get_fb_channel_number,
	.get_hbm_channel_number = df_v3_6_get_hbm_channel_number,
+6 −13
Original line number Diff line number Diff line
@@ -36,22 +36,15 @@ enum DF_V3_6_MGCG {
};

/* Defined in global_features.h as FTI_PERFMON_VISIBLE */
#define AMDGPU_DF_MAX_COUNTERS		4
#define DF_V3_6_MAX_COUNTERS		4

/* get flags from df perfmon config */
#define GET_EVENT(x)			(x & 0xFFUL)
#define GET_INSTANCE(x)			((x >> 8) & 0xFFUL)
#define GET_UNITMASK(x)			((x >> 16) & 0xFFUL)
#define GET_INSTANCE_CONFIG(x)		(0ULL | (0x07ULL) \
					| ((0x046ULL + x) << 8) \
					| (0x02 << 16))

/* df event conf macros */
#define IS_DF_XGMI_0_TX(x) (GET_EVENT(x) == 0x7 \
		&& GET_INSTANCE(x) == 0x46 && GET_UNITMASK(x) == 0x2)
#define IS_DF_XGMI_1_TX(x) (GET_EVENT(x) == 0x7 \
		&& GET_INSTANCE(x) == 0x47 && GET_UNITMASK(x) == 0x2)
#define DF_V3_6_GET_EVENT(x)		(x & 0xFFUL)
#define DF_V3_6_GET_INSTANCE(x)		((x >> 8) & 0xFFUL)
#define DF_V3_6_GET_UNITMASK(x)		((x >> 16) & 0xFFUL)
#define DF_V3_6_PERFMON_OVERFLOW	0xFFFFFFFFFFFFULL

extern const struct attribute_group *df_v3_6_attr_groups[];
extern const struct amdgpu_df_funcs df_v3_6_funcs;

#endif
+3 −0
Original line number Diff line number Diff line
@@ -1034,6 +1034,8 @@ static int soc15_common_sw_init(void *handle)
	if (amdgpu_sriov_vf(adev))
		xgpu_ai_mailbox_add_irq_id(adev);

	adev->df_funcs->sw_init(adev);

	return 0;
}

@@ -1080,6 +1082,7 @@ static int soc15_common_hw_init(void *handle)
	 */
	if (adev->nbio_funcs->remap_hdp_registers)
		adev->nbio_funcs->remap_hdp_registers(adev);

	/* enable the doorbell aperture */
	soc15_enable_doorbell_aperture(adev, true);
	/* HW doorbell routing policy: doorbell writing not