From a0a0999507752574b80d7fbd179cce052c92791b Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Fri, 26 Sep 2025 12:17:22 +0800 Subject: [PATCH 01/12] x86/resctrl: Support Sub-NUMA Cluster (SNC) mode on Clearwater Forest Clearwater Forest supports SNC mode. Add it to the snc_cpu_ids[] table. Signed-off-by: Chen Yu Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Acked-by: Tony Luck --- arch/x86/kernel/cpu/resctrl/monitor.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index c8945610d455..93f4c2d8bb6b 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -355,6 +355,7 @@ static const struct x86_cpu_id snc_cpu_ids[] __initconst = { X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, 0), X86_MATCH_VFM(INTEL_GRANITERAPIDS_X, 0), X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X, 0), + X86_MATCH_VFM(INTEL_ATOM_DARKMONT_X, 0), {} }; From 5a88a6e92bbf5963fadeb0e8f8c747c2fb425ba8 Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Mon, 17 Nov 2025 16:42:45 -0800 Subject: [PATCH 02/12] fs/resctrl: Consider sparse masks when initializing new group's allocation A new resource group is intended to be created with sane defaults. For a cache resource this means all cache portions the new group could possibly allocate into. This includes unused cache portions and shareable cache portions used by other groups and hardware. New resource group creation does not take sparse masks into account. After determining the bitmask reflecting the new group's possible allocations the bitmask is forced to be contiguous even if the system supports sparse masks. For example, a new group could by default allocate into a large portion of cache represented by 0xff0f, but it is instead created with a mask of 0xf. Do not force a contiguous allocation range if the system supports sparse masks. Signed-off-by: Reinette Chatre Signed-off-by: Borislav Petkov (AMD) Link: https://patch.msgid.link/abbbb008bc09d982d715e79d3b885c10f92c64e0.1763426240.git.reinette.chatre@intel.com --- fs/resctrl/rdtgroup.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index 0320360cd7a6..41ce4b377af4 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -3383,11 +3383,12 @@ static u32 cbm_ensure_valid(u32 _val, struct rdt_resource *r) { unsigned int cbm_len = r->cache.cbm_len; unsigned long first_bit, zero_bit; - unsigned long val = _val; + unsigned long val; - if (!val) - return 0; + if (!_val || r->cache.arch_has_sparse_bitmasks) + return _val; + val = _val; first_bit = find_first_bit(&val, cbm_len); zero_bit = find_next_zero_bit(&val, cbm_len, first_bit); From 3767def18f4cc394dc98cb93e78c3cc9afc4c515 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Wed, 12 Nov 2025 18:57:27 -0600 Subject: [PATCH 03/12] x86/cpufeatures: Add support for L3 Smart Data Cache Injection Allocation Enforcement Smart Data Cache Injection (SDCI) is a mechanism that enables direct insertion of data from I/O devices into the L3 cache. By directly caching data from I/O devices rather than first storing the I/O data in DRAM, SDCI reduces demands on DRAM bandwidth and reduces latency to the processor consuming the I/O data. The SDCIAE (SDCI Allocation Enforcement) PQE feature allows system software to control the portion of the L3 cache used for SDCI. When enabled, SDCIAE forces all SDCI lines to be placed into the L3 cache partitions identified by the highest-supported L3_MASK_n register, where n is the maximum supported CLOSID. Add CPUID feature bit that can be used to configure SDCIAE. The SDCIAE feature details are documented in: AMD64 Architecture Programmer's Manual Volume 2: System Programming Publication # 24593 Revision 3.41 section 19.4.7 L3 Smart Data Cache Injection Allocation Enforcement (SDCIAE). available at https://bugzilla.kernel.org/show_bug.cgi?id=206537 Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Acked-by: Borislav Petkov (AMD) Link: https://patch.msgid.link/83ca10d981c48e86df2c3ad9658bb3ba3544c763.1762995456.git.babu.moger@amd.com --- arch/x86/include/asm/cpufeatures.h | 2 ++ arch/x86/kernel/cpu/cpuid-deps.c | 1 + arch/x86/kernel/cpu/scattered.c | 1 + 3 files changed, 4 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 4091a776e37a..2abee3e9dc75 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -500,6 +500,8 @@ #define X86_FEATURE_ABMC (21*32+15) /* Assignable Bandwidth Monitoring Counters */ #define X86_FEATURE_MSR_IMM (21*32+16) /* MSR immediate form instructions */ +#define X86_FEATURE_SDCIAE (21*32+18) /* L3 Smart Data Cache Injection Allocation Enforcement */ + /* * BUG word(s) */ diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c index 46efcbd6afa4..87e78586395b 100644 --- a/arch/x86/kernel/cpu/cpuid-deps.c +++ b/arch/x86/kernel/cpu/cpuid-deps.c @@ -72,6 +72,7 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_CQM_MBM_LOCAL, X86_FEATURE_CQM_LLC }, { X86_FEATURE_BMEC, X86_FEATURE_CQM_MBM_TOTAL }, { X86_FEATURE_BMEC, X86_FEATURE_CQM_MBM_LOCAL }, + { X86_FEATURE_SDCIAE, X86_FEATURE_CAT_L3 }, { X86_FEATURE_AVX512_BF16, X86_FEATURE_AVX512VL }, { X86_FEATURE_AVX512_FP16, X86_FEATURE_AVX512BW }, { X86_FEATURE_ENQCMD, X86_FEATURE_XSAVES }, diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index caa4dc885c21..d113863a8eab 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -53,6 +53,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_SMBA, CPUID_EBX, 2, 0x80000020, 0 }, { X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 }, { X86_FEATURE_ABMC, CPUID_EBX, 5, 0x80000020, 0 }, + { X86_FEATURE_SDCIAE, CPUID_EBX, 6, 0x80000020, 0 }, { X86_FEATURE_TSA_SQ_NO, CPUID_ECX, 1, 0x80000021, 0 }, { X86_FEATURE_TSA_L1_NO, CPUID_ECX, 2, 0x80000021, 0 }, { X86_FEATURE_AMD_WORKLOAD_CLASS, CPUID_EAX, 22, 0x80000021, 0 }, From 4d4840b1251acc194e4b59d9a5bfba23cd573ed3 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Wed, 12 Nov 2025 18:57:28 -0600 Subject: [PATCH 04/12] x86/resctrl: Add SDCIAE feature in the command line options Add a kernel command-line parameter to enable or disable the exposure of the L3 Smart Data Cache Injection Allocation Enforcement (SDCIAE) hardware feature to resctrl. Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://patch.msgid.link/c623edf7cb369ba9da966de47d9f1b666778a40e.1762995456.git.babu.moger@amd.com --- .../admin-guide/kernel-parameters.txt | 2 +- Documentation/filesystems/resctrl.rst | 23 ++++++++++--------- arch/x86/kernel/cpu/resctrl/core.c | 2 ++ 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 6c42061ca20e..29db32a86815 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -6207,7 +6207,7 @@ rdt= [HW,X86,RDT] Turn on/off individual RDT features. List is: cmt, mbmtotal, mbmlocal, l3cat, l3cdp, l2cat, l2cdp, - mba, smba, bmec, abmc. + mba, smba, bmec, abmc, sdciae. E.g. to turn on cmt and turn off mba use: rdt=cmt,!mba diff --git a/Documentation/filesystems/resctrl.rst b/Documentation/filesystems/resctrl.rst index b7f35b07876a..d7a51cae6b26 100644 --- a/Documentation/filesystems/resctrl.rst +++ b/Documentation/filesystems/resctrl.rst @@ -17,17 +17,18 @@ AMD refers to this feature as AMD Platform Quality of Service(AMD QoS). This feature is enabled by the CONFIG_X86_CPU_RESCTRL and the x86 /proc/cpuinfo flag bits: -=============================================== ================================ -RDT (Resource Director Technology) Allocation "rdt_a" -CAT (Cache Allocation Technology) "cat_l3", "cat_l2" -CDP (Code and Data Prioritization) "cdp_l3", "cdp_l2" -CQM (Cache QoS Monitoring) "cqm_llc", "cqm_occup_llc" -MBM (Memory Bandwidth Monitoring) "cqm_mbm_total", "cqm_mbm_local" -MBA (Memory Bandwidth Allocation) "mba" -SMBA (Slow Memory Bandwidth Allocation) "" -BMEC (Bandwidth Monitoring Event Configuration) "" -ABMC (Assignable Bandwidth Monitoring Counters) "" -=============================================== ================================ +=============================================================== ================================ +RDT (Resource Director Technology) Allocation "rdt_a" +CAT (Cache Allocation Technology) "cat_l3", "cat_l2" +CDP (Code and Data Prioritization) "cdp_l3", "cdp_l2" +CQM (Cache QoS Monitoring) "cqm_llc", "cqm_occup_llc" +MBM (Memory Bandwidth Monitoring) "cqm_mbm_total", "cqm_mbm_local" +MBA (Memory Bandwidth Allocation) "mba" +SMBA (Slow Memory Bandwidth Allocation) "" +BMEC (Bandwidth Monitoring Event Configuration) "" +ABMC (Assignable Bandwidth Monitoring Counters) "" +SDCIAE (Smart Data Cache Injection Allocation Enforcement) "" +=============================================================== ================================ Historically, new features were made visible by default in /proc/cpuinfo. This resulted in the feature flags becoming hard to parse by humans. Adding a new diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 06ca5a30140c..2b2935b3df8d 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -719,6 +719,7 @@ enum { RDT_FLAG_SMBA, RDT_FLAG_BMEC, RDT_FLAG_ABMC, + RDT_FLAG_SDCIAE, }; #define RDT_OPT(idx, n, f) \ @@ -745,6 +746,7 @@ static struct rdt_options rdt_options[] __ro_after_init = { RDT_OPT(RDT_FLAG_SMBA, "smba", X86_FEATURE_SMBA), RDT_OPT(RDT_FLAG_BMEC, "bmec", X86_FEATURE_BMEC), RDT_OPT(RDT_FLAG_ABMC, "abmc", X86_FEATURE_ABMC), + RDT_OPT(RDT_FLAG_SDCIAE, "sdciae", X86_FEATURE_SDCIAE), }; #define NUM_RDT_OPTIONS ARRAY_SIZE(rdt_options) From 7923ae7698cf9728501974d76d8ea712686281bc Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Wed, 12 Nov 2025 18:57:29 -0600 Subject: [PATCH 05/12] x86,fs/resctrl: Detect io_alloc feature AMD's SDCIAE (SDCI Allocation Enforcement) PQE feature enables system software to control the portions of L3 cache used for direct insertion of data from I/O devices into the L3 cache. Introduce a generic resctrl cache resource property "io_alloc_capable" as the first part of the new "io_alloc" resctrl feature that will support AMD's SDCIAE. Any architecture can set a cache resource as "io_alloc_capable" if a portion of the cache can be allocated for I/O traffic. Set the "io_alloc_capable" property for the L3 cache resource on x86 (AMD) systems that support SDCIAE. Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://patch.msgid.link/df85a9a6081674fd3ef6b4170920485512ce2ded.1762995456.git.babu.moger@amd.com --- arch/x86/kernel/cpu/resctrl/core.c | 7 +++++++ include/linux/resctrl.h | 3 +++ 2 files changed, 10 insertions(+) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 2b2935b3df8d..3792ab4819dc 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -274,6 +274,11 @@ static void rdt_get_cdp_config(int level) rdt_resources_all[level].r_resctrl.cdp_capable = true; } +static void rdt_set_io_alloc_capable(struct rdt_resource *r) +{ + r->cache.io_alloc_capable = true; +} + static void rdt_get_cdp_l3_config(void) { rdt_get_cdp_config(RDT_RESOURCE_L3); @@ -855,6 +860,8 @@ static __init bool get_rdt_alloc_resources(void) rdt_get_cache_alloc_cfg(1, r); if (rdt_cpu_has(X86_FEATURE_CDP_L3)) rdt_get_cdp_l3_config(); + if (rdt_cpu_has(X86_FEATURE_SDCIAE)) + rdt_set_io_alloc_capable(r); ret = true; } if (rdt_cpu_has(X86_FEATURE_CAT_L2)) { diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index a7d92718b653..533f240dbe21 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -206,6 +206,8 @@ struct rdt_mon_domain { * @arch_has_sparse_bitmasks: True if a bitmask like f00f is valid. * @arch_has_per_cpu_cfg: True if QOS_CFG register for this cache * level has CPU scope. + * @io_alloc_capable: True if portion of the cache can be configured + * for I/O traffic. */ struct resctrl_cache { unsigned int cbm_len; @@ -213,6 +215,7 @@ struct resctrl_cache { unsigned int shareable_bits; bool arch_has_sparse_bitmasks; bool arch_has_per_cpu_cfg; + bool io_alloc_capable; }; /** From 556d2892aa715286d840a74216c8fff885559261 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Wed, 12 Nov 2025 18:57:30 -0600 Subject: [PATCH 06/12] x86,fs/resctrl: Implement "io_alloc" enable/disable handlers "io_alloc" is the generic name of the new resctrl feature that enables system software to configure the portion of cache allocated for I/O traffic. On AMD systems, "io_alloc" resctrl feature is backed by AMD's L3 Smart Data Cache Injection Allocation Enforcement (SDCIAE). Introduce the architecture-specific functions that resctrl fs should call to enable, disable, or check status of the "io_alloc" feature. Change SDCIAE state by setting (to enable) or clearing (to disable) bit 1 of MSR_IA32_L3_QOS_EXT_CFG on all logical processors within the cache domain. Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://patch.msgid.link/9e9070100c320eab5368e088a3642443dee95ed7.1762995456.git.babu.moger@amd.com --- arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 40 +++++++++++++++++++++++ arch/x86/kernel/cpu/resctrl/internal.h | 5 +++ include/linux/resctrl.h | 21 ++++++++++++ 3 files changed, 66 insertions(+) diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index 1189c0df4ad7..b20e705606b8 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -91,3 +91,43 @@ u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d, return hw_dom->ctrl_val[idx]; } + +bool resctrl_arch_get_io_alloc_enabled(struct rdt_resource *r) +{ + return resctrl_to_arch_res(r)->sdciae_enabled; +} + +static void resctrl_sdciae_set_one_amd(void *arg) +{ + bool *enable = arg; + + if (*enable) + msr_set_bit(MSR_IA32_L3_QOS_EXT_CFG, SDCIAE_ENABLE_BIT); + else + msr_clear_bit(MSR_IA32_L3_QOS_EXT_CFG, SDCIAE_ENABLE_BIT); +} + +static void _resctrl_sdciae_enable(struct rdt_resource *r, bool enable) +{ + struct rdt_ctrl_domain *d; + + /* Walking r->ctrl_domains, ensure it can't race with cpuhp */ + lockdep_assert_cpus_held(); + + /* Update MSR_IA32_L3_QOS_EXT_CFG MSR on all the CPUs in all domains */ + list_for_each_entry(d, &r->ctrl_domains, hdr.list) + on_each_cpu_mask(&d->hdr.cpu_mask, resctrl_sdciae_set_one_amd, &enable, 1); +} + +int resctrl_arch_io_alloc_enable(struct rdt_resource *r, bool enable) +{ + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); + + if (hw_res->r_resctrl.cache.io_alloc_capable && + hw_res->sdciae_enabled != enable) { + _resctrl_sdciae_enable(r, enable); + hw_res->sdciae_enabled = enable; + } + + return 0; +} diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 9f4c2f0aaf5c..4a916c84a322 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -46,6 +46,9 @@ struct arch_mbm_state { #define ABMC_EXTENDED_EVT_ID BIT(31) #define ABMC_EVT_ID BIT(0) +/* Setting bit 1 in MSR_IA32_L3_QOS_EXT_CFG enables the SDCIAE feature. */ +#define SDCIAE_ENABLE_BIT 1 + /** * struct rdt_hw_ctrl_domain - Arch private attributes of a set of CPUs that share * a resource for a control function @@ -112,6 +115,7 @@ struct msr_param { * @mbm_width: Monitor width, to detect and correct for overflow. * @cdp_enabled: CDP state of this resource * @mbm_cntr_assign_enabled: ABMC feature is enabled + * @sdciae_enabled: SDCIAE feature (backing "io_alloc") is enabled. * * Members of this structure are either private to the architecture * e.g. mbm_width, or accessed via helpers that provide abstraction. e.g. @@ -126,6 +130,7 @@ struct rdt_hw_resource { unsigned int mbm_width; bool cdp_enabled; bool mbm_cntr_assign_enabled; + bool sdciae_enabled; }; static inline struct rdt_hw_resource *resctrl_to_arch_res(struct rdt_resource *r) diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 533f240dbe21..54701668b3df 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -657,6 +657,27 @@ void resctrl_arch_reset_cntr(struct rdt_resource *r, struct rdt_mon_domain *d, u32 closid, u32 rmid, int cntr_id, enum resctrl_event_id eventid); +/** + * resctrl_arch_io_alloc_enable() - Enable/disable io_alloc feature. + * @r: The resctrl resource. + * @enable: Enable (true) or disable (false) io_alloc on resource @r. + * + * This can be called from any CPU. + * + * Return: + * 0 on success, <0 on error. + */ +int resctrl_arch_io_alloc_enable(struct rdt_resource *r, bool enable); + +/** + * resctrl_arch_get_io_alloc_enabled() - Get io_alloc feature state. + * @r: The resctrl resource. + * + * Return: + * true if io_alloc is enabled or false if disabled. + */ +bool resctrl_arch_get_io_alloc_enabled(struct rdt_resource *r); + extern unsigned int resctrl_rmid_realloc_threshold; extern unsigned int resctrl_rmid_realloc_limit; From 48068e565045ee0c77fdb34225ac6dedb5871fc2 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Wed, 12 Nov 2025 18:57:31 -0600 Subject: [PATCH 07/12] fs/resctrl: Introduce interface to display "io_alloc" support Introduce the "io_alloc" resctrl file to the "info" area of a cache resource, for example /sys/fs/resctrl/info/L3/io_alloc. "io_alloc" indicates support for the "io_alloc" feature that allows direct insertion of data from I/O devices into the cache. Restrict exposing support for "io_alloc" to the L3 resource that is the only resource where this feature can be backed by AMD's L3 Smart Data Cache Injection Allocation Enforcement (SDCIAE). With that, the "io_alloc" file is only visible to user space if the L3 resource supports "io_alloc". Doing so makes the file visible for all cache resources though, for example also L2 cache (if it supports cache allocation). As a consequence, add capability for file to report expected "enabled" and "disabled", as well as "not supported". Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://patch.msgid.link/e8b116a8f424128b227734bb1d433c14af478d90.1762995456.git.babu.moger@amd.com --- Documentation/filesystems/resctrl.rst | 15 +++++++++++++++ fs/resctrl/ctrlmondata.c | 21 +++++++++++++++++++++ fs/resctrl/internal.h | 1 + fs/resctrl/rdtgroup.c | 22 ++++++++++++++++++++++ 4 files changed, 59 insertions(+) diff --git a/Documentation/filesystems/resctrl.rst b/Documentation/filesystems/resctrl.rst index d7a51cae6b26..108995640ca5 100644 --- a/Documentation/filesystems/resctrl.rst +++ b/Documentation/filesystems/resctrl.rst @@ -137,6 +137,21 @@ related to allocation: "1": Non-contiguous 1s value in CBM is supported. +"io_alloc": + "io_alloc" enables system software to configure the portion of + the cache allocated for I/O traffic. File may only exist if the + system supports this feature on some of its cache resources. + + "disabled": + Resource supports "io_alloc" but the feature is disabled. + Portions of cache used for allocation of I/O traffic cannot + be configured. + "enabled": + Portions of cache used for allocation of I/O traffic + can be configured using "io_alloc_cbm". + "not supported": + Support not available for this resource. + Memory bandwidth(MB) subdirectory contains the following files with respect to allocation: diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c index 0d0ef54fc4de..78a8e7b4ba24 100644 --- a/fs/resctrl/ctrlmondata.c +++ b/fs/resctrl/ctrlmondata.c @@ -676,3 +676,24 @@ out: rdtgroup_kn_unlock(of->kn); return ret; } + +int resctrl_io_alloc_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) +{ + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); + struct rdt_resource *r = s->res; + + mutex_lock(&rdtgroup_mutex); + + if (r->cache.io_alloc_capable) { + if (resctrl_arch_get_io_alloc_enabled(r)) + seq_puts(seq, "enabled\n"); + else + seq_puts(seq, "disabled\n"); + } else { + seq_puts(seq, "not supported\n"); + } + + mutex_unlock(&rdtgroup_mutex); + + return 0; +} diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index cf1fd82dc5a9..a18ed8889396 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -426,6 +426,7 @@ int mbm_L3_assignments_show(struct kernfs_open_file *of, struct seq_file *s, voi ssize_t mbm_L3_assignments_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off); +int resctrl_io_alloc_show(struct kernfs_open_file *of, struct seq_file *seq, void *v); #ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp); diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index 41ce4b377af4..c1a603bb0c79 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -1947,6 +1947,12 @@ static struct rftype res_common_files[] = { .kf_ops = &rdtgroup_kf_single_ops, .seq_show = rdt_thread_throttle_mode_show, }, + { + .name = "io_alloc", + .mode = 0444, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = resctrl_io_alloc_show, + }, { .name = "max_threshold_occupancy", .mode = 0644, @@ -2138,6 +2144,20 @@ static void thread_throttle_mode_init(void) RFTYPE_CTRL_INFO | RFTYPE_RES_MB); } +/* + * The resctrl file "io_alloc" is added using L3 resource. However, it results + * in this file being visible for *all* cache resources (eg. L2 cache), + * whether it supports "io_alloc" or not. + */ +static void io_alloc_init(void) +{ + struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); + + if (r->cache.io_alloc_capable) + resctrl_file_fflags_init("io_alloc", RFTYPE_CTRL_INFO | + RFTYPE_RES_CACHE); +} + void resctrl_file_fflags_init(const char *config, unsigned long fflags) { struct rftype *rft; @@ -4409,6 +4429,8 @@ int resctrl_init(void) thread_throttle_mode_init(); + io_alloc_init(); + ret = resctrl_mon_resource_init(); if (ret) return ret; From 9445c7059c1c3b097ec8e924eb374df84770bee5 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Wed, 12 Nov 2025 18:57:32 -0600 Subject: [PATCH 08/12] fs/resctrl: Add user interface to enable/disable io_alloc feature AMD's SDCIAE forces all SDCI lines to be placed into the L3 cache portions identified by the highest-supported L3_MASK_n register, where n is the maximum supported CLOSID. To support this, when io_alloc resctrl feature is enabled, reserve the highest CLOSID exclusively for I/O allocation traffic making it no longer available for general CPU cache allocation. Introduce user interface to enable/disable io_alloc feature and encourage users to enable io_alloc only when running workloads that can benefit from this functionality. On enable, initialize the io_alloc CLOSID with all usable CBMs across all the domains. Since CLOSIDs are managed by resctrl fs, it is least invasive to make "io_alloc is supported by maximum supported CLOSID" part of the initial resctrl fs support for io_alloc. Take care to minimally (only in error messages) expose this use of CLOSID for io_alloc to user space so that this is not required from other architectures that may support io_alloc differently in the future. When resctrl is mounted with "-o cdp" to enable code/data prioritization, there are two L3 resources that can support I/O allocation: L3CODE and L3DATA. From resctrl fs perspective the two resources share a CLOSID and the architecture's available CLOSID are halved to support this. The architecture's underlying CLOSID used by SDCIAE when CDP is enabled is the CLOSID associated with the CDP_CODE resource, but from resctrl's perspective there is only one CLOSID for both CDP_CODE and CDP_DATA. CDP_DATA is thus not usable for general (CPU) cache allocation nor I/O allocation. Keep the CDP_CODE and CDP_DATA I/O alloc status in sync to avoid any confusion to user space. That is, enabling io_alloc on CDP_CODE does so on CDP_DATA and vice-versa, and keep the I/O allocation CBMs of CDP_CODE and CDP_DATA in sync. Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://patch.msgid.link/c7d3037795e653e22b02d8fc73ca80d9b075031c.1762995456.git.babu.moger@amd.com --- Documentation/filesystems/resctrl.rst | 30 ++++++ fs/resctrl/ctrlmondata.c | 126 ++++++++++++++++++++++++++ fs/resctrl/internal.h | 11 +++ fs/resctrl/rdtgroup.c | 24 ++++- 4 files changed, 188 insertions(+), 3 deletions(-) diff --git a/Documentation/filesystems/resctrl.rst b/Documentation/filesystems/resctrl.rst index 108995640ca5..91c71e254bbd 100644 --- a/Documentation/filesystems/resctrl.rst +++ b/Documentation/filesystems/resctrl.rst @@ -73,6 +73,11 @@ The 'info' directory contains information about the enabled resources. Each resource has its own subdirectory. The subdirectory names reflect the resource names. +Most of the files in the resource's subdirectory are read-only, and +describe properties of the resource. Resources that support global +configuration options also include writable files that can be used +to modify those settings. + Each subdirectory contains the following files with respect to allocation: @@ -152,6 +157,31 @@ related to allocation: "not supported": Support not available for this resource. + The feature can be modified by writing to the interface, for example: + + To enable:: + + # echo 1 > /sys/fs/resctrl/info/L3/io_alloc + + To disable:: + + # echo 0 > /sys/fs/resctrl/info/L3/io_alloc + + The underlying implementation may reduce resources available to + general (CPU) cache allocation. See architecture specific notes + below. Depending on usage requirements the feature can be enabled + or disabled. + + On AMD systems, io_alloc feature is supported by the L3 Smart + Data Cache Injection Allocation Enforcement (SDCIAE). The CLOSID for + io_alloc is the highest CLOSID supported by the resource. When + io_alloc is enabled, the highest CLOSID is dedicated to io_alloc and + no longer available for general (CPU) cache allocation. When CDP is + enabled, io_alloc routes I/O traffic using the highest CLOSID allocated + for the instruction cache (CDP_CODE), making this CLOSID no longer + available for general (CPU) cache allocation for both the CDP_CODE + and CDP_DATA resources. + Memory bandwidth(MB) subdirectory contains the following files with respect to allocation: diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c index 78a8e7b4ba24..454fdf3b9f3c 100644 --- a/fs/resctrl/ctrlmondata.c +++ b/fs/resctrl/ctrlmondata.c @@ -697,3 +697,129 @@ int resctrl_io_alloc_show(struct kernfs_open_file *of, struct seq_file *seq, voi return 0; } + +/* + * resctrl_io_alloc_closid_supported() - io_alloc feature utilizes the + * highest CLOSID value to direct I/O traffic. Ensure that io_alloc_closid + * is in the supported range. + */ +static bool resctrl_io_alloc_closid_supported(u32 io_alloc_closid) +{ + return io_alloc_closid < closids_supported(); +} + +/* + * Initialize io_alloc CLOSID cache resource CBM with all usable (shared + * and unused) cache portions. + */ +static int resctrl_io_alloc_init_cbm(struct resctrl_schema *s, u32 closid) +{ + enum resctrl_conf_type peer_type; + struct rdt_resource *r = s->res; + struct rdt_ctrl_domain *d; + int ret; + + rdt_staged_configs_clear(); + + ret = rdtgroup_init_cat(s, closid); + if (ret < 0) + goto out; + + /* Keep CDP_CODE and CDP_DATA of io_alloc CLOSID's CBM in sync. */ + if (resctrl_arch_get_cdp_enabled(r->rid)) { + peer_type = resctrl_peer_type(s->conf_type); + list_for_each_entry(d, &s->res->ctrl_domains, hdr.list) + memcpy(&d->staged_config[peer_type], + &d->staged_config[s->conf_type], + sizeof(d->staged_config[0])); + } + + ret = resctrl_arch_update_domains(r, closid); +out: + rdt_staged_configs_clear(); + return ret; +} + +/* + * resctrl_io_alloc_closid() - io_alloc feature routes I/O traffic using + * the highest available CLOSID. Retrieve the maximum CLOSID supported by the + * resource. Note that if Code Data Prioritization (CDP) is enabled, the number + * of available CLOSIDs is reduced by half. + */ +static u32 resctrl_io_alloc_closid(struct rdt_resource *r) +{ + if (resctrl_arch_get_cdp_enabled(r->rid)) + return resctrl_arch_get_num_closid(r) / 2 - 1; + else + return resctrl_arch_get_num_closid(r) - 1; +} + +ssize_t resctrl_io_alloc_write(struct kernfs_open_file *of, char *buf, + size_t nbytes, loff_t off) +{ + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); + struct rdt_resource *r = s->res; + char const *grp_name; + u32 io_alloc_closid; + bool enable; + int ret; + + ret = kstrtobool(buf, &enable); + if (ret) + return ret; + + cpus_read_lock(); + mutex_lock(&rdtgroup_mutex); + + rdt_last_cmd_clear(); + + if (!r->cache.io_alloc_capable) { + rdt_last_cmd_printf("io_alloc is not supported on %s\n", s->name); + ret = -ENODEV; + goto out_unlock; + } + + /* If the feature is already up to date, no action is needed. */ + if (resctrl_arch_get_io_alloc_enabled(r) == enable) + goto out_unlock; + + io_alloc_closid = resctrl_io_alloc_closid(r); + if (!resctrl_io_alloc_closid_supported(io_alloc_closid)) { + rdt_last_cmd_printf("io_alloc CLOSID (ctrl_hw_id) %u is not available\n", + io_alloc_closid); + ret = -EINVAL; + goto out_unlock; + } + + if (enable) { + if (!closid_alloc_fixed(io_alloc_closid)) { + grp_name = rdtgroup_name_by_closid(io_alloc_closid); + WARN_ON_ONCE(!grp_name); + rdt_last_cmd_printf("CLOSID (ctrl_hw_id) %u for io_alloc is used by %s group\n", + io_alloc_closid, grp_name ? grp_name : "another"); + ret = -ENOSPC; + goto out_unlock; + } + + ret = resctrl_io_alloc_init_cbm(s, io_alloc_closid); + if (ret) { + rdt_last_cmd_puts("Failed to initialize io_alloc allocations\n"); + closid_free(io_alloc_closid); + goto out_unlock; + } + } else { + closid_free(io_alloc_closid); + } + + ret = resctrl_arch_io_alloc_enable(r, enable); + if (enable && ret) { + rdt_last_cmd_puts("Failed to enable io_alloc feature\n"); + closid_free(io_alloc_closid); + } + +out_unlock: + mutex_unlock(&rdtgroup_mutex); + cpus_read_unlock(); + + return ret ?: nbytes; +} diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index a18ed8889396..145e22f9a350 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -390,6 +390,8 @@ void rdt_staged_configs_clear(void); bool closid_allocated(unsigned int closid); +bool closid_alloc_fixed(u32 closid); + int resctrl_find_cleanest_closid(void); void *rdt_kn_parent_priv(struct kernfs_node *kn); @@ -428,6 +430,15 @@ ssize_t mbm_L3_assignments_write(struct kernfs_open_file *of, char *buf, size_t loff_t off); int resctrl_io_alloc_show(struct kernfs_open_file *of, struct seq_file *seq, void *v); +int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid); + +enum resctrl_conf_type resctrl_peer_type(enum resctrl_conf_type my_type); + +ssize_t resctrl_io_alloc_write(struct kernfs_open_file *of, char *buf, + size_t nbytes, loff_t off); + +const char *rdtgroup_name_by_closid(u32 closid); + #ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp); diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index c1a603bb0c79..9f9bd31e9662 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -226,6 +226,11 @@ bool closid_allocated(unsigned int closid) return !test_bit(closid, closid_free_map); } +bool closid_alloc_fixed(u32 closid) +{ + return __test_and_clear_bit(closid, closid_free_map); +} + /** * rdtgroup_mode_by_closid - Return mode of resource group with closid * @closid: closid if the resource group @@ -1247,7 +1252,7 @@ static int rdtgroup_mode_show(struct kernfs_open_file *of, return 0; } -static enum resctrl_conf_type resctrl_peer_type(enum resctrl_conf_type my_type) +enum resctrl_conf_type resctrl_peer_type(enum resctrl_conf_type my_type) { switch (my_type) { case CDP_CODE: @@ -1838,6 +1843,18 @@ void resctrl_bmec_files_show(struct rdt_resource *r, struct kernfs_node *l3_mon_ kernfs_put(mon_kn); } +const char *rdtgroup_name_by_closid(u32 closid) +{ + struct rdtgroup *rdtgrp; + + list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) { + if (rdtgrp->closid == closid) + return rdt_kn_name(rdtgrp->kn); + } + + return NULL; +} + /* rdtgroup information files for one cache resource. */ static struct rftype res_common_files[] = { { @@ -1949,9 +1966,10 @@ static struct rftype res_common_files[] = { }, { .name = "io_alloc", - .mode = 0444, + .mode = 0644, .kf_ops = &rdtgroup_kf_single_ops, .seq_show = resctrl_io_alloc_show, + .write = resctrl_io_alloc_write, }, { .name = "max_threshold_occupancy", @@ -3501,7 +3519,7 @@ static int __init_one_rdt_domain(struct rdt_ctrl_domain *d, struct resctrl_schem * If there are no more shareable bits available on any domain then * the entire allocation will fail. */ -static int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid) +int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid) { struct rdt_ctrl_domain *d; int ret; From 77b662326200135fe72cedc47fb1b0e5679d604d Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Wed, 12 Nov 2025 18:57:33 -0600 Subject: [PATCH 09/12] fs/resctrl: Introduce interface to display io_alloc CBMs Introduce the "io_alloc_cbm" resctrl file to display the capacity bitmasks (CBMs) that represent the portions of each cache instance allocated for I/O traffic on a cache resource that supports the "io_alloc" feature. io_alloc_cbm resides in the info directory of a cache resource, for example, /sys/fs/resctrl/info/L3/. Since the resource name is part of the path, it is not necessary to display the resource name as done in the schemata file. When CDP is enabled, io_alloc routes traffic using the highest CLOSID associated with the CDP_CODE resource and that CLOSID becomes unusable for the CDP_DATA resource. The highest CLOSID of CDP_CODE and CDP_DATA resources will be kept in sync to ensure consistent user interface. In preparation for this, access the CBMs for I/O traffic through highest CLOSID of either CDP_CODE or CDP_DATA resource. Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://patch.msgid.link/55a3ff66a70e7ce8239f022e62b334e9d64af604.1762995456.git.babu.moger@amd.com --- Documentation/filesystems/resctrl.rst | 19 +++++++++++ fs/resctrl/ctrlmondata.c | 45 +++++++++++++++++++++++++-- fs/resctrl/internal.h | 2 ++ fs/resctrl/rdtgroup.c | 11 ++++++- 4 files changed, 73 insertions(+), 4 deletions(-) diff --git a/Documentation/filesystems/resctrl.rst b/Documentation/filesystems/resctrl.rst index 91c71e254bbd..e7994538e0ce 100644 --- a/Documentation/filesystems/resctrl.rst +++ b/Documentation/filesystems/resctrl.rst @@ -182,6 +182,25 @@ related to allocation: available for general (CPU) cache allocation for both the CDP_CODE and CDP_DATA resources. +"io_alloc_cbm": + Capacity bitmasks that describe the portions of cache instances to + which I/O traffic from supported I/O devices are routed when "io_alloc" + is enabled. + + CBMs are displayed in the following format: + + =;=;... + + Example:: + + # cat /sys/fs/resctrl/info/L3/io_alloc_cbm + 0=ffff;1=ffff + + When CDP is enabled "io_alloc_cbm" associated with the CDP_DATA and CDP_CODE + resources may reflect the same values. For example, values read from and + written to /sys/fs/resctrl/info/L3DATA/io_alloc_cbm may be reflected by + /sys/fs/resctrl/info/L3CODE/io_alloc_cbm and vice versa. + Memory bandwidth(MB) subdirectory contains the following files with respect to allocation: diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c index 454fdf3b9f3c..1ac89b107e6f 100644 --- a/fs/resctrl/ctrlmondata.c +++ b/fs/resctrl/ctrlmondata.c @@ -381,7 +381,8 @@ out: return ret ?: nbytes; } -static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int closid) +static void show_doms(struct seq_file *s, struct resctrl_schema *schema, + char *resource_name, int closid) { struct rdt_resource *r = schema->res; struct rdt_ctrl_domain *dom; @@ -391,7 +392,8 @@ static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int clo /* Walking r->domains, ensure it can't race with cpuhp */ lockdep_assert_cpus_held(); - seq_printf(s, "%*s:", max_name_width, schema->name); + if (resource_name) + seq_printf(s, "%*s:", max_name_width, resource_name); list_for_each_entry(dom, &r->ctrl_domains, hdr.list) { if (sep) seq_puts(s, ";"); @@ -437,7 +439,7 @@ int rdtgroup_schemata_show(struct kernfs_open_file *of, closid = rdtgrp->closid; list_for_each_entry(schema, &resctrl_schema_all, list) { if (closid < schema->num_closid) - show_doms(s, schema, closid); + show_doms(s, schema, schema->name, closid); } } } else { @@ -823,3 +825,40 @@ out_unlock: return ret ?: nbytes; } + +int resctrl_io_alloc_cbm_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) +{ + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); + struct rdt_resource *r = s->res; + int ret = 0; + + cpus_read_lock(); + mutex_lock(&rdtgroup_mutex); + + rdt_last_cmd_clear(); + + if (!r->cache.io_alloc_capable) { + rdt_last_cmd_printf("io_alloc is not supported on %s\n", s->name); + ret = -ENODEV; + goto out_unlock; + } + + if (!resctrl_arch_get_io_alloc_enabled(r)) { + rdt_last_cmd_printf("io_alloc is not enabled on %s\n", s->name); + ret = -EINVAL; + goto out_unlock; + } + + /* + * When CDP is enabled, the CBMs of the highest CLOSID of CDP_CODE and + * CDP_DATA are kept in sync. As a result, the io_alloc CBMs shown for + * either CDP resource are identical and accurately represent the CBMs + * used for I/O. + */ + show_doms(seq, s, NULL, resctrl_io_alloc_closid(r)); + +out_unlock: + mutex_unlock(&rdtgroup_mutex); + cpus_read_unlock(); + return ret; +} diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index 145e22f9a350..779a575e0828 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -438,6 +438,8 @@ ssize_t resctrl_io_alloc_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off); const char *rdtgroup_name_by_closid(u32 closid); +int resctrl_io_alloc_cbm_show(struct kernfs_open_file *of, struct seq_file *seq, + void *v); #ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp); diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index 9f9bd31e9662..ac326db9b5ba 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -1971,6 +1971,12 @@ static struct rftype res_common_files[] = { .seq_show = resctrl_io_alloc_show, .write = resctrl_io_alloc_write, }, + { + .name = "io_alloc_cbm", + .mode = 0444, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = resctrl_io_alloc_cbm_show, + }, { .name = "max_threshold_occupancy", .mode = 0644, @@ -2171,9 +2177,12 @@ static void io_alloc_init(void) { struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3); - if (r->cache.io_alloc_capable) + if (r->cache.io_alloc_capable) { resctrl_file_fflags_init("io_alloc", RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE); + resctrl_file_fflags_init("io_alloc_cbm", + RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE); + } } void resctrl_file_fflags_init(const char *config, unsigned long fflags) From af1242eeca50b20076d1bc9a41653005634a8a4f Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Wed, 12 Nov 2025 18:57:34 -0600 Subject: [PATCH 10/12] fs/resctrl: Modify struct rdt_parse_data to pass mode and CLOSID parse_cbm() requires resource group mode and CLOSID to validate the capacity bitmask (CBM). It is passed via struct rdtgroup in struct rdt_parse_data. The io_alloc feature also uses CBMs to indicate which portions of cache are allocated for I/O traffic. The CBMs are provided by user space and need to be validated the same as CBMs provided for general (CPU) cache allocation. parse_cbm() cannot be used as-is since io_alloc does not have rdtgroup context. Pass the resource group mode and CLOSID directly to parse_cbm() via struct rdt_parse_data, instead of through the rdtgroup struct, to facilitate calling parse_cbm() to verify the CBM of the io_alloc feature. Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://patch.msgid.link/f8ec6ab5cf594d906a3fe75f56793d5fbd63f38f.1762995456.git.babu.moger@amd.com --- fs/resctrl/ctrlmondata.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c index 1ac89b107e6f..c43bedea70d7 100644 --- a/fs/resctrl/ctrlmondata.c +++ b/fs/resctrl/ctrlmondata.c @@ -24,7 +24,8 @@ #include "internal.h" struct rdt_parse_data { - struct rdtgroup *rdtgrp; + u32 closid; + enum rdtgrp_mode mode; char *buf; }; @@ -77,8 +78,8 @@ static int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s, struct rdt_ctrl_domain *d) { struct resctrl_staged_config *cfg; - u32 closid = data->rdtgrp->closid; struct rdt_resource *r = s->res; + u32 closid = data->closid; u32 bw_val; cfg = &d->staged_config[s->conf_type]; @@ -156,9 +157,10 @@ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r) static int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s, struct rdt_ctrl_domain *d) { - struct rdtgroup *rdtgrp = data->rdtgrp; + enum rdtgrp_mode mode = data->mode; struct resctrl_staged_config *cfg; struct rdt_resource *r = s->res; + u32 closid = data->closid; u32 cbm_val; cfg = &d->staged_config[s->conf_type]; @@ -171,7 +173,7 @@ static int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s, * Cannot set up more than one pseudo-locked region in a cache * hierarchy. */ - if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP && + if (mode == RDT_MODE_PSEUDO_LOCKSETUP && rdtgroup_pseudo_locked_in_hierarchy(d)) { rdt_last_cmd_puts("Pseudo-locked region in hierarchy\n"); return -EINVAL; @@ -180,8 +182,7 @@ static int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s, if (!cbm_validate(data->buf, &cbm_val, r)) return -EINVAL; - if ((rdtgrp->mode == RDT_MODE_EXCLUSIVE || - rdtgrp->mode == RDT_MODE_SHAREABLE) && + if ((mode == RDT_MODE_EXCLUSIVE || mode == RDT_MODE_SHAREABLE) && rdtgroup_cbm_overlaps_pseudo_locked(d, cbm_val)) { rdt_last_cmd_puts("CBM overlaps with pseudo-locked region\n"); return -EINVAL; @@ -191,14 +192,14 @@ static int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s, * The CBM may not overlap with the CBM of another closid if * either is exclusive. */ - if (rdtgroup_cbm_overlaps(s, d, cbm_val, rdtgrp->closid, true)) { + if (rdtgroup_cbm_overlaps(s, d, cbm_val, closid, true)) { rdt_last_cmd_puts("Overlaps with exclusive group\n"); return -EINVAL; } - if (rdtgroup_cbm_overlaps(s, d, cbm_val, rdtgrp->closid, false)) { - if (rdtgrp->mode == RDT_MODE_EXCLUSIVE || - rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { + if (rdtgroup_cbm_overlaps(s, d, cbm_val, closid, false)) { + if (mode == RDT_MODE_EXCLUSIVE || + mode == RDT_MODE_PSEUDO_LOCKSETUP) { rdt_last_cmd_puts("Overlaps with other group\n"); return -EINVAL; } @@ -262,7 +263,8 @@ next: list_for_each_entry(d, &r->ctrl_domains, hdr.list) { if (d->hdr.id == dom_id) { data.buf = dom; - data.rdtgrp = rdtgrp; + data.closid = rdtgrp->closid; + data.mode = rdtgrp->mode; if (parse_ctrlval(&data, s, d)) return -EINVAL; if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { From 28fa2cce7a8388f09e457f1e24241ca6d5e985d8 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Wed, 12 Nov 2025 18:57:35 -0600 Subject: [PATCH 11/12] fs/resctrl: Introduce interface to modify io_alloc capacity bitmasks The io_alloc feature in resctrl enables system software to configure the portion of the cache allocated for I/O traffic. When supported, the io_alloc_cbm file in resctrl provides access to capacity bitmasks (CBMs) allocated for I/O devices. Enable users to modify io_alloc CBMs by writing to the io_alloc_cbm resctrl file when the io_alloc feature is enabled. Mirror the CBMs between CDP_CODE and CDP_DATA when CDP is enabled to present consistent I/O allocation information to user space. Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://patch.msgid.link/67609641b03ccfba18a8ee0bf9dbd1f3dcbecda3.1762995456.git.babu.moger@amd.com --- Documentation/filesystems/resctrl.rst | 12 ++++ fs/resctrl/ctrlmondata.c | 93 +++++++++++++++++++++++++++ fs/resctrl/internal.h | 2 + fs/resctrl/rdtgroup.c | 3 +- 4 files changed, 109 insertions(+), 1 deletion(-) diff --git a/Documentation/filesystems/resctrl.rst b/Documentation/filesystems/resctrl.rst index e7994538e0ce..bbc4b6cbb71d 100644 --- a/Documentation/filesystems/resctrl.rst +++ b/Documentation/filesystems/resctrl.rst @@ -196,6 +196,18 @@ related to allocation: # cat /sys/fs/resctrl/info/L3/io_alloc_cbm 0=ffff;1=ffff + CBMs can be configured by writing to the interface. + + Example:: + + # echo 1=ff > /sys/fs/resctrl/info/L3/io_alloc_cbm + # cat /sys/fs/resctrl/info/L3/io_alloc_cbm + 0=ffff;1=00ff + + # echo "0=ff;1=f" > /sys/fs/resctrl/info/L3/io_alloc_cbm + # cat /sys/fs/resctrl/info/L3/io_alloc_cbm + 0=00ff;1=000f + When CDP is enabled "io_alloc_cbm" associated with the CDP_DATA and CDP_CODE resources may reflect the same values. For example, values read from and written to /sys/fs/resctrl/info/L3DATA/io_alloc_cbm may be reflected by diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c index c43bedea70d7..332918fc961a 100644 --- a/fs/resctrl/ctrlmondata.c +++ b/fs/resctrl/ctrlmondata.c @@ -864,3 +864,96 @@ out_unlock: cpus_read_unlock(); return ret; } + +static int resctrl_io_alloc_parse_line(char *line, struct rdt_resource *r, + struct resctrl_schema *s, u32 closid) +{ + enum resctrl_conf_type peer_type; + struct rdt_parse_data data; + struct rdt_ctrl_domain *d; + char *dom = NULL, *id; + unsigned long dom_id; + +next: + if (!line || line[0] == '\0') + return 0; + + dom = strsep(&line, ";"); + id = strsep(&dom, "="); + if (!dom || kstrtoul(id, 10, &dom_id)) { + rdt_last_cmd_puts("Missing '=' or non-numeric domain\n"); + return -EINVAL; + } + + dom = strim(dom); + list_for_each_entry(d, &r->ctrl_domains, hdr.list) { + if (d->hdr.id == dom_id) { + data.buf = dom; + data.mode = RDT_MODE_SHAREABLE; + data.closid = closid; + if (parse_cbm(&data, s, d)) + return -EINVAL; + /* + * Keep io_alloc CLOSID's CBM of CDP_CODE and CDP_DATA + * in sync. + */ + if (resctrl_arch_get_cdp_enabled(r->rid)) { + peer_type = resctrl_peer_type(s->conf_type); + memcpy(&d->staged_config[peer_type], + &d->staged_config[s->conf_type], + sizeof(d->staged_config[0])); + } + goto next; + } + } + + return -EINVAL; +} + +ssize_t resctrl_io_alloc_cbm_write(struct kernfs_open_file *of, char *buf, + size_t nbytes, loff_t off) +{ + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); + struct rdt_resource *r = s->res; + u32 io_alloc_closid; + int ret = 0; + + /* Valid input requires a trailing newline */ + if (nbytes == 0 || buf[nbytes - 1] != '\n') + return -EINVAL; + + buf[nbytes - 1] = '\0'; + + cpus_read_lock(); + mutex_lock(&rdtgroup_mutex); + rdt_last_cmd_clear(); + + if (!r->cache.io_alloc_capable) { + rdt_last_cmd_printf("io_alloc is not supported on %s\n", s->name); + ret = -ENODEV; + goto out_unlock; + } + + if (!resctrl_arch_get_io_alloc_enabled(r)) { + rdt_last_cmd_printf("io_alloc is not enabled on %s\n", s->name); + ret = -EINVAL; + goto out_unlock; + } + + io_alloc_closid = resctrl_io_alloc_closid(r); + + rdt_staged_configs_clear(); + ret = resctrl_io_alloc_parse_line(buf, r, s, io_alloc_closid); + if (ret) + goto out_clear_configs; + + ret = resctrl_arch_update_domains(r, io_alloc_closid); + +out_clear_configs: + rdt_staged_configs_clear(); +out_unlock: + mutex_unlock(&rdtgroup_mutex); + cpus_read_unlock(); + + return ret ?: nbytes; +} diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index 779a575e0828..e1eda74881a9 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -440,6 +440,8 @@ ssize_t resctrl_io_alloc_write(struct kernfs_open_file *of, char *buf, const char *rdtgroup_name_by_closid(u32 closid); int resctrl_io_alloc_cbm_show(struct kernfs_open_file *of, struct seq_file *seq, void *v); +ssize_t resctrl_io_alloc_cbm_write(struct kernfs_open_file *of, char *buf, + size_t nbytes, loff_t off); #ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp); diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index ac326db9b5ba..361497489d87 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -1973,9 +1973,10 @@ static struct rftype res_common_files[] = { }, { .name = "io_alloc_cbm", - .mode = 0444, + .mode = 0644, .kf_ops = &rdtgroup_kf_single_ops, .seq_show = resctrl_io_alloc_cbm_show, + .write = resctrl_io_alloc_cbm_write, }, { .name = "max_threshold_occupancy", From ac7de456a37f9b126eb53b89c2bb27d625dc5fd9 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Wed, 12 Nov 2025 18:57:36 -0600 Subject: [PATCH 12/12] fs/resctrl: Update bit_usage to reflect io_alloc The "shareable_bits" and "bit_usage" resctrl files associated with cache resources give insight into how instances of a cache is used. Update the annotated capacity bitmasks displayed by "bit_usage" to include the cache portions allocated for I/O via the "io_alloc" feature. "shareable_bits" is a global bitmask of shareable cache with I/O and can thus not present the per-domain I/O allocations possible with the "io_alloc" feature. Revise the "shareable_bits" documentation to direct users to "bit_usage" for accurate cache usage information. Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://patch.msgid.link/e02a0d424129fd7f3e45822a559b1c614ae4652a.1762995456.git.babu.moger@amd.com --- Documentation/filesystems/resctrl.rst | 35 ++++++++++++++++----------- fs/resctrl/ctrlmondata.c | 2 +- fs/resctrl/internal.h | 1 + fs/resctrl/rdtgroup.c | 21 ++++++++++++++-- 4 files changed, 42 insertions(+), 17 deletions(-) diff --git a/Documentation/filesystems/resctrl.rst b/Documentation/filesystems/resctrl.rst index bbc4b6cbb71d..8c8ce678148a 100644 --- a/Documentation/filesystems/resctrl.rst +++ b/Documentation/filesystems/resctrl.rst @@ -96,12 +96,19 @@ related to allocation: must be set when writing a mask. "shareable_bits": - Bitmask of shareable resource with other executing - entities (e.g. I/O). User can use this when - setting up exclusive cache partitions. Note that - some platforms support devices that have their - own settings for cache use which can over-ride - these bits. + Bitmask of shareable resource with other executing entities + (e.g. I/O). Applies to all instances of this resource. User + can use this when setting up exclusive cache partitions. + Note that some platforms support devices that have their + own settings for cache use which can over-ride these bits. + + When "io_alloc" is enabled, a portion of each cache instance can + be configured for shared use between hardware and software. + "bit_usage" should be used to see which portions of each cache + instance is configured for hardware use via "io_alloc" feature + because every cache instance can have its "io_alloc" bitmask + configured independently via "io_alloc_cbm". + "bit_usage": Annotated capacity bitmasks showing how all instances of the resource are used. The legend is: @@ -115,16 +122,16 @@ related to allocation: "H": Corresponding region is used by hardware only but available for software use. If a resource - has bits set in "shareable_bits" but not all - of these bits appear in the resource groups' - schematas then the bits appearing in - "shareable_bits" but no resource group will - be marked as "H". + has bits set in "shareable_bits" or "io_alloc_cbm" + but not all of these bits appear in the resource + groups' schemata then the bits appearing in + "shareable_bits" or "io_alloc_cbm" but no + resource group will be marked as "H". "X": Corresponding region is available for sharing and - used by hardware and software. These are the - bits that appear in "shareable_bits" as - well as a resource group's allocation. + used by hardware and software. These are the bits + that appear in "shareable_bits" or "io_alloc_cbm" + as well as a resource group's allocation. "S": Corresponding region is used by software and available for sharing. diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c index 332918fc961a..b2d178d3556e 100644 --- a/fs/resctrl/ctrlmondata.c +++ b/fs/resctrl/ctrlmondata.c @@ -750,7 +750,7 @@ out: * resource. Note that if Code Data Prioritization (CDP) is enabled, the number * of available CLOSIDs is reduced by half. */ -static u32 resctrl_io_alloc_closid(struct rdt_resource *r) +u32 resctrl_io_alloc_closid(struct rdt_resource *r) { if (resctrl_arch_get_cdp_enabled(r->rid)) return resctrl_arch_get_num_closid(r) / 2 - 1; diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index e1eda74881a9..bff4a54ae333 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -442,6 +442,7 @@ int resctrl_io_alloc_cbm_show(struct kernfs_open_file *of, struct seq_file *seq, void *v); ssize_t resctrl_io_alloc_cbm_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off); +u32 resctrl_io_alloc_closid(struct rdt_resource *r); #ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp); diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index 361497489d87..8e39dfda56bc 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -1062,15 +1062,17 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of, cpus_read_lock(); mutex_lock(&rdtgroup_mutex); - hw_shareable = r->cache.shareable_bits; list_for_each_entry(dom, &r->ctrl_domains, hdr.list) { if (sep) seq_putc(seq, ';'); + hw_shareable = r->cache.shareable_bits; sw_shareable = 0; exclusive = 0; seq_printf(seq, "%d=", dom->hdr.id); for (i = 0; i < closids_supported(); i++) { - if (!closid_allocated(i)) + if (!closid_allocated(i) || + (resctrl_arch_get_io_alloc_enabled(r) && + i == resctrl_io_alloc_closid(r))) continue; ctrl_val = resctrl_arch_get_config(r, dom, i, s->conf_type); @@ -1098,6 +1100,21 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of, break; } } + + /* + * When the "io_alloc" feature is enabled, a portion of the cache + * is configured for shared use between hardware and software. + * Also, when CDP is enabled the CBMs of CDP_CODE and CDP_DATA + * resources are kept in sync. So, the CBMs for "io_alloc" can + * be accessed through either resource. + */ + if (resctrl_arch_get_io_alloc_enabled(r)) { + ctrl_val = resctrl_arch_get_config(r, dom, + resctrl_io_alloc_closid(r), + s->conf_type); + hw_shareable |= ctrl_val; + } + for (i = r->cache.cbm_len - 1; i >= 0; i--) { pseudo_locked = dom->plr ? dom->plr->cbm : 0; hwb = test_bit(i, &hw_shareable);