Merge tag 'x86_cache_for_v5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip (64743e65) · Commits · 戴 / test

Documentation/x86/resctrl_ui.rst

+16 −2

Original line number	Diff line number	Diff line
		@@ -138,6 +138,18 @@ with respect to allocation:
		non-linear. This field is purely informational
		only.

		"thread_throttle_mode":
		Indicator on Intel systems of how tasks running on threads
		of a physical core are throttled in cases where they
		request different memory bandwidth percentages:

		"max":
		the smallest percentage is applied
		to all threads
		"per-thread":
		bandwidth percentages are directly applied to
		the threads running on the core

		If RDT monitoring is available there will be an "L3_MON" directory
		with the following files:

		@@ -364,8 +376,10 @@ to the next control step available on the hardware.

		The bandwidth throttling is a core specific mechanism on some of Intel
		SKUs. Using a high bandwidth and a low bandwidth setting on two threads
		sharing a core will result in both threads being throttled to use the
		low bandwidth. The fact that Memory bandwidth allocation(MBA) is a core
		sharing a core may result in both threads being throttled to use the
		low bandwidth (see "thread_throttle_mode").

		The fact that Memory bandwidth allocation(MBA) may be a core
		specific mechanism where as memory bandwidth monitoring(MBM) is done at
		the package level may lead to confusion when users try to apply control
		via the MBA and then monitor the bandwidth to see if the controls are

arch/x86/include/asm/cpufeatures.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -288,6 +288,7 @@
		#define X86_FEATURE_FENCE_SWAPGS_USER (1132+ 4) / "" LFENCE in user entry SWAPGS path */
		#define X86_FEATURE_FENCE_SWAPGS_KERNEL (1132+ 5) / "" LFENCE in kernel entry SWAPGS path */
		#define X86_FEATURE_SPLIT_LOCK_DETECT (1132+ 6) / #AC for split lock */
		#define X86_FEATURE_PER_THREAD_MBA (1132+ 7) / "" Per-thread Memory Bandwidth Allocation */

		/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
		#define X86_FEATURE_AVX512_BF16 (1232+ 5) / AVX512 BFLOAT16 instructions */

arch/x86/kernel/cpu/cpuid-deps.c

+1 −0

Original line number	Diff line number	Diff line
		@@ -70,6 +70,7 @@ static const struct cpuid_dep cpuid_deps[] = {
		{ X86_FEATURE_CQM_MBM_LOCAL, X86_FEATURE_CQM_LLC },
		{ X86_FEATURE_AVX512_BF16, X86_FEATURE_AVX512VL },
		{ X86_FEATURE_ENQCMD, X86_FEATURE_XSAVES },
		{ X86_FEATURE_PER_THREAD_MBA, X86_FEATURE_MBA },
		{}
		};

arch/x86/kernel/cpu/resctrl/core.c

+29 −27

Original line number	Diff line number	Diff line
		@@ -168,6 +168,7 @@ struct rdt_resource rdt_resources_all[] = {
		.name = "MB",
		.domains = domain_init(RDT_RESOURCE_MBA),
		.cache_level = 3,
		.parse_ctrlval = parse_bw,
		.format_str = "%d=%*u",
		.fflags = RFTYPE_RES_MB,
		},
		@@ -254,22 +255,30 @@ static bool __get_mem_config_intel(struct rdt_resource *r)
		{
		union cpuid_0x10_3_eax eax;
		union cpuid_0x10_x_edx edx;
		u32 ebx, ecx;
		u32 ebx, ecx, max_delay;

		cpuid_count(0x00000010, 3, &eax.full, &ebx, &ecx, &edx.full);
		r->num_closid = edx.split.cos_max + 1;
		r->membw.max_delay = eax.split.max_delay + 1;
		max_delay = eax.split.max_delay + 1;
		r->default_ctrl = MAX_MBA_BW;
		r->membw.arch_needs_linear = true;
		if (ecx & MBA_IS_LINEAR) {
		r->membw.delay_linear = true;
		r->membw.min_bw = MAX_MBA_BW - r->membw.max_delay;
		r->membw.bw_gran = MAX_MBA_BW - r->membw.max_delay;
		r->membw.min_bw = MAX_MBA_BW - max_delay;
		r->membw.bw_gran = MAX_MBA_BW - max_delay;
		} else {
		if (!rdt_get_mb_table(r))
		return false;
		r->membw.arch_needs_linear = false;
		}
		r->data_width = 3;

		if (boot_cpu_has(X86_FEATURE_PER_THREAD_MBA))
		r->membw.throttle_mode = THREAD_THROTTLE_PER_THREAD;
		else
		r->membw.throttle_mode = THREAD_THROTTLE_MAX;
		thread_throttle_mode_init();

		r->alloc_capable = true;
		r->alloc_enabled = true;

		@@ -288,7 +297,13 @@ static bool __rdt_get_mem_config_amd(struct rdt_resource *r)

		/* AMD does not use delay */
		r->membw.delay_linear = false;
		r->membw.arch_needs_linear = false;

		/*
		* AMD does not use memory delay throttle model to control
		* the allocation like Intel does.
		*/
		r->membw.throttle_mode = THREAD_THROTTLE_UNDEFINED;
		r->membw.min_bw = 0;
		r->membw.bw_gran = 1;
		/* Max value is 2048, Data width should be 4 in decimal */
		@@ -346,19 +361,6 @@ static void rdt_get_cdp_l2_config(void)
		rdt_get_cdp_config(RDT_RESOURCE_L2, RDT_RESOURCE_L2CODE);
		}

		static int get_cache_id(int cpu, int level)
		{
		struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu);
		int i;

		for (i = 0; i < ci->num_leaves; i++) {
		if (ci->info_list[i].level == level)
		return ci->info_list[i].id;
		}

		return -1;
		}

		static void
		mba_wrmsr_amd(struct rdt_domain d, struct msr_param m, struct rdt_resource *r)
		{
		@@ -556,7 +558,7 @@ static int domain_setup_mon_state(struct rdt_resource r, struct rdt_domain d)
		*/
		static void domain_add_cpu(int cpu, struct rdt_resource *r)
		{
		int id = get_cache_id(cpu, r->cache_level);
		int id = get_cpu_cacheinfo_id(cpu, r->cache_level);
		struct list_head *add_pos = NULL;
		struct rdt_domain *d;

		@@ -602,7 +604,7 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)

		static void domain_remove_cpu(int cpu, struct rdt_resource *r)
		{
		int id = get_cache_id(cpu, r->cache_level);
		int id = get_cpu_cacheinfo_id(cpu, r->cache_level);
		struct rdt_domain *d;

		d = rdt_find_domain(r, id, NULL);
		@@ -918,12 +920,12 @@ static __init void rdt_init_res_defs_intel(void)
		r->rid == RDT_RESOURCE_L3CODE \|\|
		r->rid == RDT_RESOURCE_L2 \|\|
		r->rid == RDT_RESOURCE_L2DATA \|\|
		r->rid == RDT_RESOURCE_L2CODE)
		r->cbm_validate = cbm_validate_intel;
		else if (r->rid == RDT_RESOURCE_MBA) {
		r->rid == RDT_RESOURCE_L2CODE) {
		r->cache.arch_has_sparse_bitmaps = false;
		r->cache.arch_has_empty_bitmaps = false;
		} else if (r->rid == RDT_RESOURCE_MBA) {
		r->msr_base = MSR_IA32_MBA_THRTL_BASE;
		r->msr_update = mba_wrmsr_intel;
		r->parse_ctrlval = parse_bw_intel;
		}
		}
		}
		@@ -938,12 +940,12 @@ static __init void rdt_init_res_defs_amd(void)
		r->rid == RDT_RESOURCE_L3CODE \|\|
		r->rid == RDT_RESOURCE_L2 \|\|
		r->rid == RDT_RESOURCE_L2DATA \|\|
		r->rid == RDT_RESOURCE_L2CODE)
		r->cbm_validate = cbm_validate_amd;
		else if (r->rid == RDT_RESOURCE_MBA) {
		r->rid == RDT_RESOURCE_L2CODE) {
		r->cache.arch_has_sparse_bitmaps = true;
		r->cache.arch_has_empty_bitmaps = true;
		} else if (r->rid == RDT_RESOURCE_MBA) {
		r->msr_base = MSR_IA32_MBA_BW_BASE;
		r->msr_update = mba_wrmsr_amd;
		r->parse_ctrlval = parse_bw_amd;
		}
		}
		}

arch/x86/kernel/cpu/resctrl/ctrlmondata.c

+13 −79

Original line number	Diff line number	Diff line
		@@ -21,53 +21,6 @@
		#include <linux/slab.h>
		#include "internal.h"

		/*
		* Check whether MBA bandwidth percentage value is correct. The value is
		* checked against the minimum and maximum bandwidth values specified by
		* the hardware. The allocated bandwidth percentage is rounded to the next
		* control step available on the hardware.
		*/
		static bool bw_validate_amd(char buf, unsigned long data,
		struct rdt_resource *r)
		{
		unsigned long bw;
		int ret;

		ret = kstrtoul(buf, 10, &bw);
		if (ret) {
		rdt_last_cmd_printf("Non-decimal digit in MB value %s\n", buf);
		return false;
		}

		if (bw < r->membw.min_bw \|\| bw > r->default_ctrl) {
		rdt_last_cmd_printf("MB value %ld out of range [%d,%d]\n", bw,
		r->membw.min_bw, r->default_ctrl);
		return false;
		}

		*data = roundup(bw, (unsigned long)r->membw.bw_gran);
		return true;
		}

		int parse_bw_amd(struct rdt_parse_data data, struct rdt_resource r,
		struct rdt_domain *d)
		{
		unsigned long bw_val;

		if (d->have_new_ctrl) {
		rdt_last_cmd_printf("Duplicate domain %d\n", d->id);
		return -EINVAL;
		}

		if (!bw_validate_amd(data->buf, &bw_val, r))
		return -EINVAL;

		d->new_ctrl = bw_val;
		d->have_new_ctrl = true;

		return 0;
		}

		/*
		* Check whether MBA bandwidth percentage value is correct. The value is
		* checked against the minimum and max bandwidth values specified by the
		@@ -82,7 +35,7 @@ static bool bw_validate(char buf, unsigned long data, struct rdt_resource *r)
		/*
		* Only linear delay values is supported for current Intel SKUs.
		*/
		if (!r->membw.delay_linear) {
		if (!r->membw.delay_linear && r->membw.arch_needs_linear) {
		rdt_last_cmd_puts("No support for non-linear MB domains\n");
		return false;
		}
		@@ -104,7 +57,7 @@ static bool bw_validate(char buf, unsigned long data, struct rdt_resource *r)
		return true;
		}

		int parse_bw_intel(struct rdt_parse_data data, struct rdt_resource r,
		int parse_bw(struct rdt_parse_data data, struct rdt_resource r,
		struct rdt_domain *d)
		{
		unsigned long bw_val;
		@@ -123,12 +76,14 @@ int parse_bw_intel(struct rdt_parse_data data, struct rdt_resource r,
		}

		/*
		* Check whether a cache bit mask is valid. The SDM says:
		* Check whether a cache bit mask is valid.
		* For Intel the SDM says:
		* Please note that all (and only) contiguous '1' combinations
		* are allowed (e.g. FFFFH, 0FF0H, 003CH, etc.).
		* Additionally Haswell requires at least two bits set.
		* AMD allows non-contiguous bitmasks.
		*/
		bool cbm_validate_intel(char buf, u32 data, struct rdt_resource *r)
		static bool cbm_validate(char buf, u32 data, struct rdt_resource *r)
		{
		unsigned long first_bit, zero_bit, val;
		unsigned int cbm_len = r->cache.cbm_len;
		@@ -140,7 +95,8 @@ bool cbm_validate_intel(char buf, u32 data, struct rdt_resource *r)
		return false;
		}

		if (val == 0 \|\| val > r->default_ctrl) {
		if ((!r->cache.arch_has_empty_bitmaps && val == 0) \|\|
		val > r->default_ctrl) {
		rdt_last_cmd_puts("Mask out of range\n");
		return false;
		}
		@@ -148,7 +104,9 @@ bool cbm_validate_intel(char buf, u32 data, struct rdt_resource *r)
		first_bit = find_first_bit(&val, cbm_len);
		zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);

		if (find_next_bit(&val, cbm_len, zero_bit) < cbm_len) {
		/* Are non-contiguous bitmaps allowed? */
		if (!r->cache.arch_has_sparse_bitmaps &&
		(find_next_bit(&val, cbm_len, zero_bit) < cbm_len)) {
		rdt_last_cmd_printf("The mask %lx has non-consecutive 1-bits\n", val);
		return false;
		}
		@@ -163,30 +121,6 @@ bool cbm_validate_intel(char buf, u32 data, struct rdt_resource *r)
		return true;
		}

		/*
		* Check whether a cache bit mask is valid. AMD allows non-contiguous
		* bitmasks
		*/
		bool cbm_validate_amd(char buf, u32 data, struct rdt_resource *r)
		{
		unsigned long val;
		int ret;

		ret = kstrtoul(buf, 16, &val);
		if (ret) {
		rdt_last_cmd_printf("Non-hex character in the mask %s\n", buf);
		return false;
		}

		if (val > r->default_ctrl) {
		rdt_last_cmd_puts("Mask out of range\n");
		return false;
		}

		*data = val;
		return true;
		}

		/*
		* Read one cache bit mask (hex). Check that it is valid for the current
		* resource type.
		@@ -212,7 +146,7 @@ int parse_cbm(struct rdt_parse_data data, struct rdt_resource r,
		return -EINVAL;
		}

		if (!r->cbm_validate(data->buf, &cbm_val, r))
		if (!cbm_validate(data->buf, &cbm_val, r))
		return -EINVAL;

		if ((rdtgrp->mode == RDT_MODE_EXCLUSIVE \|\|

Admin message