Commit 64743e65 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'x86_cache_for_v5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 cache resource control updates from Borislav Petkov:

 - Misc cleanups to the resctrl code in preparation for the ARM side
   (James Morse)

 - Add support for controlling per-thread memory bandwidth throttling
   delay values on hw which supports it (Fenghua Yu)

* tag 'x86_cache_for_v5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/resctrl: Enable user to view thread or core throttling mode
  x86/resctrl: Enumerate per-thread MBA controls
  cacheinfo: Move resctrl's get_cache_id() to the cacheinfo header file
  x86/resctrl: Add struct rdt_cache::arch_has_{sparse, empty}_bitmaps
  x86/resctrl: Merge AMD/Intel parse_bw() calls
  x86/resctrl: Add struct rdt_membw::arch_needs_linear to explain AMD/Intel MBA difference
  x86/resctrl: Use is_closid_match() in more places
  x86/resctrl: Include pid.h
  x86/resctrl: Use container_of() in delayed_work handlers
  x86/resctrl: Fix stale comment
  x86/resctrl: Remove struct rdt_membw::max_delay
  x86/resctrl: Remove unused struct mbm_state::chunks_bw
parents f94ab231 29b6bd41
Loading
Loading
Loading
Loading
+16 −2
Original line number Diff line number Diff line
@@ -138,6 +138,18 @@ with respect to allocation:
		non-linear. This field is purely informational
		only.

"thread_throttle_mode":
		Indicator on Intel systems of how tasks running on threads
		of a physical core are throttled in cases where they
		request different memory bandwidth percentages:

		"max":
			the smallest percentage is applied
			to all threads
		"per-thread":
			bandwidth percentages are directly applied to
			the threads running on the core

If RDT monitoring is available there will be an "L3_MON" directory
with the following files:

@@ -364,8 +376,10 @@ to the next control step available on the hardware.

The bandwidth throttling is a core specific mechanism on some of Intel
SKUs. Using a high bandwidth and a low bandwidth setting on two threads
sharing a core will result in both threads being throttled to use the
low bandwidth. The fact that Memory bandwidth allocation(MBA) is a core
sharing a core may result in both threads being throttled to use the
low bandwidth (see "thread_throttle_mode").

The fact that Memory bandwidth allocation(MBA) may be a core
specific mechanism where as memory bandwidth monitoring(MBM) is done at
the package level may lead to confusion when users try to apply control
via the MBA and then monitor the bandwidth to see if the controls are
+1 −0
Original line number Diff line number Diff line
@@ -288,6 +288,7 @@
#define X86_FEATURE_FENCE_SWAPGS_USER	(11*32+ 4) /* "" LFENCE in user entry SWAPGS path */
#define X86_FEATURE_FENCE_SWAPGS_KERNEL	(11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */
#define X86_FEATURE_SPLIT_LOCK_DETECT	(11*32+ 6) /* #AC for split lock */
#define X86_FEATURE_PER_THREAD_MBA	(11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */

/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX512_BF16		(12*32+ 5) /* AVX512 BFLOAT16 instructions */
+1 −0
Original line number Diff line number Diff line
@@ -70,6 +70,7 @@ static const struct cpuid_dep cpuid_deps[] = {
	{ X86_FEATURE_CQM_MBM_LOCAL,		X86_FEATURE_CQM_LLC   },
	{ X86_FEATURE_AVX512_BF16,		X86_FEATURE_AVX512VL  },
	{ X86_FEATURE_ENQCMD,			X86_FEATURE_XSAVES    },
	{ X86_FEATURE_PER_THREAD_MBA,		X86_FEATURE_MBA       },
	{}
};

+29 −27
Original line number Diff line number Diff line
@@ -168,6 +168,7 @@ struct rdt_resource rdt_resources_all[] = {
		.name			= "MB",
		.domains		= domain_init(RDT_RESOURCE_MBA),
		.cache_level		= 3,
		.parse_ctrlval		= parse_bw,
		.format_str		= "%d=%*u",
		.fflags			= RFTYPE_RES_MB,
	},
@@ -254,22 +255,30 @@ static bool __get_mem_config_intel(struct rdt_resource *r)
{
	union cpuid_0x10_3_eax eax;
	union cpuid_0x10_x_edx edx;
	u32 ebx, ecx;
	u32 ebx, ecx, max_delay;

	cpuid_count(0x00000010, 3, &eax.full, &ebx, &ecx, &edx.full);
	r->num_closid = edx.split.cos_max + 1;
	r->membw.max_delay = eax.split.max_delay + 1;
	max_delay = eax.split.max_delay + 1;
	r->default_ctrl = MAX_MBA_BW;
	r->membw.arch_needs_linear = true;
	if (ecx & MBA_IS_LINEAR) {
		r->membw.delay_linear = true;
		r->membw.min_bw = MAX_MBA_BW - r->membw.max_delay;
		r->membw.bw_gran = MAX_MBA_BW - r->membw.max_delay;
		r->membw.min_bw = MAX_MBA_BW - max_delay;
		r->membw.bw_gran = MAX_MBA_BW - max_delay;
	} else {
		if (!rdt_get_mb_table(r))
			return false;
		r->membw.arch_needs_linear = false;
	}
	r->data_width = 3;

	if (boot_cpu_has(X86_FEATURE_PER_THREAD_MBA))
		r->membw.throttle_mode = THREAD_THROTTLE_PER_THREAD;
	else
		r->membw.throttle_mode = THREAD_THROTTLE_MAX;
	thread_throttle_mode_init();

	r->alloc_capable = true;
	r->alloc_enabled = true;

@@ -288,7 +297,13 @@ static bool __rdt_get_mem_config_amd(struct rdt_resource *r)

	/* AMD does not use delay */
	r->membw.delay_linear = false;
	r->membw.arch_needs_linear = false;

	/*
	 * AMD does not use memory delay throttle model to control
	 * the allocation like Intel does.
	 */
	r->membw.throttle_mode = THREAD_THROTTLE_UNDEFINED;
	r->membw.min_bw = 0;
	r->membw.bw_gran = 1;
	/* Max value is 2048, Data width should be 4 in decimal */
@@ -346,19 +361,6 @@ static void rdt_get_cdp_l2_config(void)
	rdt_get_cdp_config(RDT_RESOURCE_L2, RDT_RESOURCE_L2CODE);
}

static int get_cache_id(int cpu, int level)
{
	struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu);
	int i;

	for (i = 0; i < ci->num_leaves; i++) {
		if (ci->info_list[i].level == level)
			return ci->info_list[i].id;
	}

	return -1;
}

static void
mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r)
{
@@ -556,7 +558,7 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
 */
static void domain_add_cpu(int cpu, struct rdt_resource *r)
{
	int id = get_cache_id(cpu, r->cache_level);
	int id = get_cpu_cacheinfo_id(cpu, r->cache_level);
	struct list_head *add_pos = NULL;
	struct rdt_domain *d;

@@ -602,7 +604,7 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)

static void domain_remove_cpu(int cpu, struct rdt_resource *r)
{
	int id = get_cache_id(cpu, r->cache_level);
	int id = get_cpu_cacheinfo_id(cpu, r->cache_level);
	struct rdt_domain *d;

	d = rdt_find_domain(r, id, NULL);
@@ -918,12 +920,12 @@ static __init void rdt_init_res_defs_intel(void)
		    r->rid == RDT_RESOURCE_L3CODE ||
		    r->rid == RDT_RESOURCE_L2 ||
		    r->rid == RDT_RESOURCE_L2DATA ||
		    r->rid == RDT_RESOURCE_L2CODE)
			r->cbm_validate = cbm_validate_intel;
		else if (r->rid == RDT_RESOURCE_MBA) {
		    r->rid == RDT_RESOURCE_L2CODE) {
			r->cache.arch_has_sparse_bitmaps = false;
			r->cache.arch_has_empty_bitmaps = false;
		} else if (r->rid == RDT_RESOURCE_MBA) {
			r->msr_base = MSR_IA32_MBA_THRTL_BASE;
			r->msr_update = mba_wrmsr_intel;
			r->parse_ctrlval = parse_bw_intel;
		}
	}
}
@@ -938,12 +940,12 @@ static __init void rdt_init_res_defs_amd(void)
		    r->rid == RDT_RESOURCE_L3CODE ||
		    r->rid == RDT_RESOURCE_L2 ||
		    r->rid == RDT_RESOURCE_L2DATA ||
		    r->rid == RDT_RESOURCE_L2CODE)
			r->cbm_validate = cbm_validate_amd;
		else if (r->rid == RDT_RESOURCE_MBA) {
		    r->rid == RDT_RESOURCE_L2CODE) {
			r->cache.arch_has_sparse_bitmaps = true;
			r->cache.arch_has_empty_bitmaps = true;
		} else if (r->rid == RDT_RESOURCE_MBA) {
			r->msr_base = MSR_IA32_MBA_BW_BASE;
			r->msr_update = mba_wrmsr_amd;
			r->parse_ctrlval = parse_bw_amd;
		}
	}
}
+13 −79
Original line number Diff line number Diff line
@@ -21,53 +21,6 @@
#include <linux/slab.h>
#include "internal.h"

/*
 * Check whether MBA bandwidth percentage value is correct. The value is
 * checked against the minimum and maximum bandwidth values specified by
 * the hardware. The allocated bandwidth percentage is rounded to the next
 * control step available on the hardware.
 */
static bool bw_validate_amd(char *buf, unsigned long *data,
			    struct rdt_resource *r)
{
	unsigned long bw;
	int ret;

	ret = kstrtoul(buf, 10, &bw);
	if (ret) {
		rdt_last_cmd_printf("Non-decimal digit in MB value %s\n", buf);
		return false;
	}

	if (bw < r->membw.min_bw || bw > r->default_ctrl) {
		rdt_last_cmd_printf("MB value %ld out of range [%d,%d]\n", bw,
				    r->membw.min_bw, r->default_ctrl);
		return false;
	}

	*data = roundup(bw, (unsigned long)r->membw.bw_gran);
	return true;
}

int parse_bw_amd(struct rdt_parse_data *data, struct rdt_resource *r,
		 struct rdt_domain *d)
{
	unsigned long bw_val;

	if (d->have_new_ctrl) {
		rdt_last_cmd_printf("Duplicate domain %d\n", d->id);
		return -EINVAL;
	}

	if (!bw_validate_amd(data->buf, &bw_val, r))
		return -EINVAL;

	d->new_ctrl = bw_val;
	d->have_new_ctrl = true;

	return 0;
}

/*
 * Check whether MBA bandwidth percentage value is correct. The value is
 * checked against the minimum and max bandwidth values specified by the
@@ -82,7 +35,7 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r)
	/*
	 * Only linear delay values is supported for current Intel SKUs.
	 */
	if (!r->membw.delay_linear) {
	if (!r->membw.delay_linear && r->membw.arch_needs_linear) {
		rdt_last_cmd_puts("No support for non-linear MB domains\n");
		return false;
	}
@@ -104,7 +57,7 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r)
	return true;
}

int parse_bw_intel(struct rdt_parse_data *data, struct rdt_resource *r,
int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r,
	     struct rdt_domain *d)
{
	unsigned long bw_val;
@@ -123,12 +76,14 @@ int parse_bw_intel(struct rdt_parse_data *data, struct rdt_resource *r,
}

/*
 * Check whether a cache bit mask is valid. The SDM says:
 * Check whether a cache bit mask is valid.
 * For Intel the SDM says:
 *	Please note that all (and only) contiguous '1' combinations
 *	are allowed (e.g. FFFFH, 0FF0H, 003CH, etc.).
 * Additionally Haswell requires at least two bits set.
 * AMD allows non-contiguous bitmasks.
 */
bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r)
static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r)
{
	unsigned long first_bit, zero_bit, val;
	unsigned int cbm_len = r->cache.cbm_len;
@@ -140,7 +95,8 @@ bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r)
		return false;
	}

	if (val == 0 || val > r->default_ctrl) {
	if ((!r->cache.arch_has_empty_bitmaps && val == 0) ||
	    val > r->default_ctrl) {
		rdt_last_cmd_puts("Mask out of range\n");
		return false;
	}
@@ -148,7 +104,9 @@ bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r)
	first_bit = find_first_bit(&val, cbm_len);
	zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);

	if (find_next_bit(&val, cbm_len, zero_bit) < cbm_len) {
	/* Are non-contiguous bitmaps allowed? */
	if (!r->cache.arch_has_sparse_bitmaps &&
	    (find_next_bit(&val, cbm_len, zero_bit) < cbm_len)) {
		rdt_last_cmd_printf("The mask %lx has non-consecutive 1-bits\n", val);
		return false;
	}
@@ -163,30 +121,6 @@ bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r)
	return true;
}

/*
 * Check whether a cache bit mask is valid. AMD allows non-contiguous
 * bitmasks
 */
bool cbm_validate_amd(char *buf, u32 *data, struct rdt_resource *r)
{
	unsigned long val;
	int ret;

	ret = kstrtoul(buf, 16, &val);
	if (ret) {
		rdt_last_cmd_printf("Non-hex character in the mask %s\n", buf);
		return false;
	}

	if (val > r->default_ctrl) {
		rdt_last_cmd_puts("Mask out of range\n");
		return false;
	}

	*data = val;
	return true;
}

/*
 * Read one cache bit mask (hex). Check that it is valid for the current
 * resource type.
@@ -212,7 +146,7 @@ int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r,
		return -EINVAL;
	}

	if (!r->cbm_validate(data->buf, &cbm_val, r))
	if (!cbm_validate(data->buf, &cbm_val, r))
		return -EINVAL;

	if ((rdtgrp->mode == RDT_MODE_EXCLUSIVE ||
Loading