Memory b/w allocation(MBA) is part of the Intel Resource Director
Technology (RDT). RDT helps monitor and share processor shared
resources. MBA helps enforce a limit on the memory bandwidth(b/w)
threads can use when they are scheduled. OS does the enforcement using
MSR(model specific register) interface and mapping the threads to
architecture specific CLOSids(class of service IDs).
The user interface is via the common resctrl fs for RDT features.
This can be used along with MBM (memory b/w monitoring) and cache
allocation to control and monitor the applications cache and memory
resources as per the QoS or other performance requirements. Use cases
could be large serverclusters, VM, cloud and container based services
where the admin or orchestration tools can use this framework to
manage/allocate these processor shared resources like other memory/cpu
resources to provide QoS guarantees. Detailed use case examples are
documented in the 01/08 Documentation patch.
Patches are dependent on :
https://marc.info/?l=linux-kernel&m=149125583824700
V3 changes
----------
Thanks to Thomas for all the feed back and below are the changes as per
the feedback:
- Did a lot of changes to make code more readable and follow convention
like moving the defines/variables, renaming, removing unnecessary return
value changes, unnecessary globals.
- Changed all change logs to follow context, problem, solution format.
- Fixed the issue where rdt init may return success even if all features
fail to init.
- Added warn_on to code that should be reached in case when system does
not support non_linear delay.
- The schemata interface is changed to allow user enter a value which
does not follow the granularity requirements and intermediate values are
just rounded off to next available value.
- Changed documentation to reflect these details and remove all
architectural details.
V2 changes
----------
Thanks to Thomas for all the feed back and below are the changes as per
the feedback:
- Sent the RDT generic improvements patch series seperately
- Split the info file/validate apis as prep patches to MBA
changes
- Fixed a lot of coding convention issues, variable initializations,
and removing unnecessary allocations etc.
- Changed the hotcpu handling for CLOS/RMID to zero the CLOSid/RMID when
cpu is offlined.
- Changed some naming conventions.
- Changed the interface to show the bandwidth percentage that can be
requested rather than showing h/w specific delay values which do not map
to a particular b/w percentage in non-linear scale
[PATCH 1/8] Documentation, x86: Intel Memory bandwidth allocation
[PATCH 2/8] x86/intel_rdt/mba: Generalize the naming to get ready for
[PATCH 3/8] x86/intel_rdt/mba: Memory b/w allocation feature detect
[PATCH 4/8] x86/intel_rct/mba: Add MBA structures and initialize MBA
[PATCH 5/8] x86/intel_rdt: Prep to add info files for MBA
[PATCH 6/8] x86/intel_rdt/mba: Add info directory files for MBA
[PATCH 7/8] x86/intel_rdt: Prep to add schemata file for MBA
[PATCH 8/8] x86/intel_rdt/mba: Add schemata file support for MBA
The files in the info directory for MBA are as follows:
num_closids
The maximum number of CLOSids available for MBA
min_bandwidth
The minimum memory bandwidth percentage value
bandwidth_gran
The granularity of the bandwidth control in percent for the
particular CPU SKU. Intermediate values entered are rounded off
to the previous control step available. Available bandwidth
control steps are minimum_bandwidth + N * bandwidth_gran.
delay_linear
When set, the OS writes a linear percentage based value to the
control MSRs ranging from minimum_bandwidth to 100 percent.
This value is informational and has no influence on the values
written to the schemata files. The values written to the
schemata are always bandwidth percentage that is requested.
Signed-off-by: Vikas Shivappa <[email protected]>
---
arch/x86/include/asm/intel_rdt.h | 1 +
arch/x86/kernel/cpu/intel_rdt.c | 1 +
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 64 ++++++++++++++++++++++++++++++++
3 files changed, 66 insertions(+)
diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index e58e1d4..b42fba5 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -155,6 +155,7 @@ struct rdt_resource {
};
void rdt_get_cache_infofile(struct rdt_resource *r);
+void rdt_get_mba_infofile(struct rdt_resource *r);
extern struct mutex rdtgroup_mutex;
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index b50cb35..fd65548 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -197,6 +197,7 @@ static bool rdt_get_mem_config(struct rdt_resource *r)
return false;
}
r->data_width = 3;
+ rdt_get_mba_infofile(r);
r->capable = true;
r->enabled = true;
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index aee4a19..ce5b173 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -518,6 +518,36 @@ static int rdt_min_cbm_bits_show(struct kernfs_open_file *of,
return 0;
}
+static int rdt_min_bw_show(struct kernfs_open_file *of,
+ struct seq_file *seq, void *v)
+{
+ struct rdt_resource *r = of->kn->parent->priv;
+
+ seq_printf(seq, "%d\n", r->min_bw);
+
+ return 0;
+}
+
+static int rdt_bw_gran_show(struct kernfs_open_file *of,
+ struct seq_file *seq, void *v)
+{
+ struct rdt_resource *r = of->kn->parent->priv;
+
+ seq_printf(seq, "%d\n", r->bw_gran);
+
+ return 0;
+}
+
+static int rdt_delay_linear_show(struct kernfs_open_file *of,
+ struct seq_file *seq, void *v)
+{
+ struct rdt_resource *r = of->kn->parent->priv;
+
+ seq_printf(seq, "%d\n", r->delay_linear);
+
+ return 0;
+}
+
/* rdtgroup information files for one cache resource. */
static struct rftype res_cache_info_files[] = {
{
@@ -540,6 +570,40 @@ static int rdt_min_cbm_bits_show(struct kernfs_open_file *of,
},
};
+/* rdtgroup information files for memory bandwidth. */
+static struct rftype res_mba_info_files[] = {
+ {
+ .name = "num_closids",
+ .mode = 0444,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = rdt_num_closids_show,
+ },
+ {
+ .name = "min_bandwidth",
+ .mode = 0444,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = rdt_min_bw_show,
+ },
+ {
+ .name = "bandwidth_gran",
+ .mode = 0444,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = rdt_bw_gran_show,
+ },
+ {
+ .name = "delay_linear",
+ .mode = 0444,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = rdt_delay_linear_show,
+ },
+};
+
+void rdt_get_mba_infofile(struct rdt_resource *r)
+{
+ r->info_files = res_mba_info_files;
+ r->nr_info_files = ARRAY_SIZE(res_mba_info_files);
+}
+
void rdt_get_cache_infofile(struct rdt_resource *r)
{
r->info_files = res_cache_info_files;
--
1.9.1
Update the 'intel_rdt_ui' documentation to have Memory bandwidth(b/w)
allocation interface usage.
Signed-off-by: Vikas Shivappa <[email protected]>
---
Documentation/x86/intel_rdt_ui.txt | 107 ++++++++++++++++++++++++++++++-------
1 file changed, 87 insertions(+), 20 deletions(-)
diff --git a/Documentation/x86/intel_rdt_ui.txt b/Documentation/x86/intel_rdt_ui.txt
index 3ea1984..f223a3b 100644
--- a/Documentation/x86/intel_rdt_ui.txt
+++ b/Documentation/x86/intel_rdt_ui.txt
@@ -4,6 +4,7 @@ Copyright (C) 2016 Intel Corporation
Fenghua Yu <[email protected]>
Tony Luck <[email protected]>
+Vikas Shivappa <[email protected]>
This feature is enabled by the CONFIG_INTEL_RDT_A Kconfig and the
X86 /proc/cpuinfo flag bits "rdt", "cat_l3" and "cdp_l3".
@@ -22,19 +23,34 @@ Info directory
The 'info' directory contains information about the enabled
resources. Each resource has its own subdirectory. The subdirectory
-names reflect the resource names. Each subdirectory contains the
-following files:
+names reflect the resource names.
+Cache resource(L3/L2) subdirectory contains the following files:
-"num_closids": The number of CLOSIDs which are valid for this
- resource. The kernel uses the smallest number of
- CLOSIDs of all enabled resources as limit.
+"num_closids": The number of CLOSIDs which are valid for this
+ resource. The kernel uses the smallest number of
+ CLOSIDs of all enabled resources as limit.
-"cbm_mask": The bitmask which is valid for this resource. This
- mask is equivalent to 100%.
+"cbm_mask": The bitmask which is valid for this resource.
+ This mask is equivalent to 100%.
-"min_cbm_bits": The minimum number of consecutive bits which must be
- set when writing a mask.
+"min_cbm_bits": The minimum number of consecutive bits which
+ must be set when writing a mask.
+Memory bandwitdh(MB) subdirectory contains the following files:
+
+"min_bandwidth": The minimum memory bandwidth percentage which
+ user can request.
+
+"bandwidth_gran": The granularity in which the memory bandwidth
+ percentage is allocated. The allocated
+ b/w percentage is rounded off to the next
+ control step available on the hardware. The
+ available bandwidth control steps are:
+ min_bandwidth + N * bandwidth_gran.
+
+"delay_linear": Indicates if the delay scale is linear or
+ non-linear. This field is purely informational
+ only.
Resource groups
---------------
@@ -107,6 +123,22 @@ and 0xA are not. On a system with a 20-bit mask each bit represents 5%
of the capacity of the cache. You could partition the cache into four
equal parts with masks: 0x1f, 0x3e0, 0x7c00, 0xf8000.
+Memory bandwidth(b/w) percentage
+--------------------------------
+For Memory b/w resource, user controls the resource by indicating the
+percentage of total memory b/w.
+
+The minimum bandwidth percentage value for each cpu model is predefined
+and can be looked up through "info/MB/min_bandwidth". The bandwidth
+granularity that is allocated is also dependent on the cpu model and can
+be looked up at "info/MB/bandwidth_gran". The available bandwidth
+control steps are: min_bw + N * bw_gran. Intermediate values are rounded
+to the next control step available on the hardware.
+
+The bandwidth throttling is a core specific mechanism on some of Intel
+SKUs. Using a high bandwidth and a low bandwidth setting on two threads
+sharing a core will result in both threads being throttled to use the
+low bandwidth.
L3 details (code and data prioritization disabled)
--------------------------------------------------
@@ -129,6 +161,13 @@ schemata format is always:
L2:<cache_id0>=<cbm>;<cache_id1>=<cbm>;...
+Memory b/w Allocation details
+-----------------------------
+
+Memory b/w domain is L3 cache.
+
+ MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;...
+
Reading/writing the schemata file
---------------------------------
Reading the schemata file will show the state of all resources
@@ -146,13 +185,14 @@ L3CODE:0=fffff;1=fffff;2=fffff;3=fffff
Example 1
---------
On a two socket machine (one L3 cache per socket) with just four bits
-for cache bit masks
+for cache bit masks, minimum b/w of 10% with a memory bandwidth
+granularity of 10%
# mount -t resctrl resctrl /sys/fs/resctrl
# cd /sys/fs/resctrl
# mkdir p0 p1
-# echo "L3:0=3;1=c" > /sys/fs/resctrl/p0/schemata
-# echo "L3:0=3;1=3" > /sys/fs/resctrl/p1/schemata
+# echo "L3:0=3;1=c\nMB:0=50;1=50" > /sys/fs/resctrl/p0/schemata
+# echo "L3:0=3;1=3\nMB:0=50;1=50" > /sys/fs/resctrl/p1/schemata
The default resource group is unmodified, so we have access to all parts
of all caches (its schemata file reads "L3:0=f;1=f").
@@ -161,6 +201,14 @@ Tasks that are under the control of group "p0" may only allocate from the
"lower" 50% on cache ID 0, and the "upper" 50% of cache ID 1.
Tasks in group "p1" use the "lower" 50% of cache on both sockets.
+Similarly, tasks that are under the control of group "p0" may use a
+maximum memory b/w of 50% on socket0 and 50% on socket 1.
+Tasks in group "p1" may also use 50% memory b/w on both sockets.
+Note that unlike cache masks, memory b/w cannot specify whether these
+allocations can overlap or not. The allocations specifies the maximum
+b/w that the group may be able to use and the system admin can configure
+the b/w accordingly.
+
Example 2
---------
Again two sockets, but this time with a more realistic 20-bit mask.
@@ -174,9 +222,10 @@ of L3 cache on socket 0.
# cd /sys/fs/resctrl
First we reset the schemata for the default group so that the "upper"
-50% of the L3 cache on socket 0 cannot be used by ordinary tasks:
+50% of the L3 cache on socket 0 and 50% of memory b/w cannot be used by
+ordinary tasks:
-# echo "L3:0=3ff;1=fffff" > schemata
+# echo "L3:0=3ff;1=fffff\nMB:0=50;1=100" > schemata
Next we make a resource group for our first real time task and give
it access to the "top" 25% of the cache on socket 0.
@@ -199,6 +248,20 @@ Ditto for the second real time task (with the remaining 25% of cache):
# echo 5678 > p1/tasks
# taskset -cp 2 5678
+For the same 2 socket system with memory b/w resource and CAT L3 the
+schemata would look like(Assume min_bandwidth 10 and bandwidth_gran is
+10):
+
+For our first real time task this would request 20% memory b/w on socket
+0.
+
+# echo -e "L3:0=f8000;1=fffff\nMB:0=20;1=100" > p0/schemata
+
+For our second real time task this would request an other 20% memory b/w
+on socket 0.
+
+# echo -e "L3:0=f8000;1=fffff\nMB:0=20;1=100" > p0/schemata
+
Example 3
---------
@@ -212,18 +275,22 @@ the tasks.
# cd /sys/fs/resctrl
First we reset the schemata for the default group so that the "upper"
-50% of the L3 cache on socket 0 cannot be used by ordinary tasks:
+50% of the L3 cache on socket 0, and 50% of memory bandwidth on socket 0
+cannot be used by ordinary tasks:
-# echo "L3:0=3ff" > schemata
+# echo "L3:0=3ff\nMB:0=50" > schemata
-Next we make a resource group for our real time cores and give
-it access to the "top" 50% of the cache on socket 0.
+Next we make a resource group for our real time cores and give it access
+to the "top" 50% of the cache on socket 0 and 50% of memory bandwidth on
+socket 0.
# mkdir p0
-# echo "L3:0=ffc00;" > p0/schemata
+# echo "L3:0=ffc00\nMB:0=50" > p0/schemata
Finally we move core 4-7 over to the new group and make sure that the
-kernel and the tasks running there get 50% of the cache.
+kernel and the tasks running there get 50% of the cache. They should
+also get 50% of memory bandwidth assuming that the cores 4-7 are SMT
+siblings and only the real time threads are scheduled on the cores 4-7.
# echo C0 > p0/cpus
--
1.9.1
User updates RDT resource controls by updating the schemata file. OS
then parses these updates and updates the corresponding QOS_MSRs.
However the parsing and MSR update functions are specific to cache
resource type and do not support memory resource type.
Define resource specific function pointers to dynamically choose such
functions in order to prepare to add MBA support.
Signed-off-by: Vikas Shivappa <[email protected]>
---
arch/x86/include/asm/intel_rdt.h | 8 +++++++-
arch/x86/kernel/cpu/intel_rdt.c | 16 ++++++++++------
arch/x86/kernel/cpu/intel_rdt_schemata.c | 23 +++++++++++++----------
3 files changed, 30 insertions(+), 17 deletions(-)
diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index b42fba5..6fc82a3 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -109,6 +109,9 @@ struct msr_param {
* @data_width: Character width of data when displaying
* @min_cbm_bits: Minimum number of consecutive bits to be set
* in a cache bit mask
+ * @format_str: Per resource format string to show domain value
+ * @parse_ctrlval: Per resource function pointer to parse
+ * the ctrl values
* @msr_update: Function pointer to update QOS MSRs
* @info_files: resctrl info files for the resource
* @nr_info_files: Number of info files
@@ -137,6 +140,9 @@ struct rdt_resource {
int min_cbm_bits;
u32 default_ctrl;
int data_width;
+ const char *format_str;
+ int (*parse_ctrlval) (char *buf, struct rdt_resource *r,
+ struct rdt_domain *d);
void (*msr_update) (struct rdt_domain *d, struct msr_param *m,
struct rdt_resource *r);
struct rftype *info_files;
@@ -146,7 +152,6 @@ struct rdt_resource {
u32 bw_gran;
u32 delay_linear;
u32 *mb_map;
-
struct list_head domains;
int msr_base;
int cache_level;
@@ -156,6 +161,7 @@ struct rdt_resource {
void rdt_get_cache_infofile(struct rdt_resource *r);
void rdt_get_mba_infofile(struct rdt_resource *r);
+int parse_cbm(char *buf, struct rdt_resource *r, struct rdt_domain *d);
extern struct mutex rdtgroup_mutex;
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index fd65548..2c18142 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -58,7 +58,9 @@ struct rdt_resource rdt_resources_all[] = {
.name = "L3",
.domains = domain_init(RDT_RESOURCE_L3),
.msr_base = IA32_L3_CBM_BASE,
+ .parse_ctrlval = parse_cbm,
.msr_update = cqm_wrmsr,
+ .format_str = "%d=%0*x",
.min_cbm_bits = 1,
.cache_level = 3,
.cbm_idx_multi = 1,
@@ -68,7 +70,9 @@ struct rdt_resource rdt_resources_all[] = {
.name = "L3DATA",
.domains = domain_init(RDT_RESOURCE_L3DATA),
.msr_base = IA32_L3_CBM_BASE,
+ .parse_ctrlval = parse_cbm,
.msr_update = cqm_wrmsr,
+ .format_str = "%d=%0*x",
.min_cbm_bits = 1,
.cache_level = 3,
.cbm_idx_multi = 2,
@@ -78,7 +82,9 @@ struct rdt_resource rdt_resources_all[] = {
.name = "L3CODE",
.domains = domain_init(RDT_RESOURCE_L3CODE),
.msr_base = IA32_L3_CBM_BASE,
+ .parse_ctrlval = parse_cbm,
.msr_update = cqm_wrmsr,
+ .format_str = "%d=%0*x",
.min_cbm_bits = 1,
.cache_level = 3,
.cbm_idx_multi = 2,
@@ -88,7 +94,9 @@ struct rdt_resource rdt_resources_all[] = {
.name = "L2",
.domains = domain_init(RDT_RESOURCE_L2),
.msr_base = IA32_L2_CBM_BASE,
+ .parse_ctrlval = parse_cbm,
.msr_update = cqm_wrmsr,
+ .format_str = "%d=%0*x",
.min_cbm_bits = 1,
.cache_level = 2,
.cbm_idx_multi = 1,
@@ -349,7 +357,7 @@ void rdt_ctrl_update(void *arg)
{
struct msr_param *m = (struct msr_param *)arg;
struct rdt_resource *r = m->res;
- int i, cpu = smp_processor_id();
+ int cpu = smp_processor_id();
struct rdt_domain *d;
list_for_each_entry(d, &r->domains, list) {
@@ -363,11 +371,7 @@ void rdt_ctrl_update(void *arg)
return;
found:
- for (i = m->low; i < m->high; i++) {
- int idx = cbm_idx(r, i);
-
- wrmsrl(r->msr_base + idx, d->ctrl_val[i]);
- }
+ r->msr_update(d, m, r);
}
/*
diff --git a/arch/x86/kernel/cpu/intel_rdt_schemata.c b/arch/x86/kernel/cpu/intel_rdt_schemata.c
index 7695179..03f9a70 100644
--- a/arch/x86/kernel/cpu/intel_rdt_schemata.c
+++ b/arch/x86/kernel/cpu/intel_rdt_schemata.c
@@ -34,9 +34,14 @@
* are allowed (e.g. FFFFH, 0FF0H, 003CH, etc.).
* Additionally Haswell requires at least two bits set.
*/
-static bool cbm_validate(unsigned long var, struct rdt_resource *r)
+static bool cbm_validate(char *buf, unsigned long *data, struct rdt_resource *r)
{
- unsigned long first_bit, zero_bit;
+ unsigned long first_bit, zero_bit, var;
+ int ret;
+
+ ret = kstrtoul(buf, 16, &var);
+ if (ret)
+ return false;
if (var == 0 || var > r->default_ctrl)
return false;
@@ -49,6 +54,8 @@ static bool cbm_validate(unsigned long var, struct rdt_resource *r)
if ((zero_bit - first_bit) < r->min_cbm_bits)
return false;
+
+ *data = var;
return true;
}
@@ -56,18 +63,14 @@ static bool cbm_validate(unsigned long var, struct rdt_resource *r)
* Read one cache bit mask (hex). Check that it is valid for the current
* resource type.
*/
-static int parse_cbm(char *buf, struct rdt_resource *r, struct rdt_domain *d)
+int parse_cbm(char *buf, struct rdt_resource *r, struct rdt_domain *d)
{
unsigned long data;
- int ret;
if (d->have_new_ctrl)
return -EINVAL;
- ret = kstrtoul(buf, 16, &data);
- if (ret)
- return ret;
- if (!cbm_validate(data, r))
+ if(!cbm_validate(buf, &data, r))
return -EINVAL;
d->new_ctrl = data;
d->have_new_ctrl = true;
@@ -96,7 +99,7 @@ static int parse_line(char *line, struct rdt_resource *r)
return -EINVAL;
list_for_each_entry(d, &r->domains, list) {
if (d->id == dom_id) {
- if (parse_cbm(dom, r, d))
+ if (r->parse_ctrlval(dom, r, d))
return -EINVAL;
goto next;
}
@@ -207,7 +210,7 @@ static void show_doms(struct seq_file *s, struct rdt_resource *r, int closid)
list_for_each_entry(dom, &r->domains, list) {
if (sep)
seq_puts(s, ";");
- seq_printf(s, "%d=%0*x", dom->id, max_data_width,
+ seq_printf(s, r->format_str, dom->id, max_data_width,
dom->ctrl_val[closid]);
sep = true;
}
--
1.9.1
Add support to update the MBA bandwidth values for the domains. The MBA
bandwidth values are specified by updating the schemata.
We do the following to parse the bandwidth(bw) value from
schemata and update the PQOS_MSRS:
1. check the bw to satisfy the minimum and max bandwidth requirements.
2. To meet the granularity requirement intermediate values are rounded
to the next control step available on the hardware.
3. map the bw to delay values and write the delay values to
corresponding domain PQOS_MSRs which are indexed from 0xD50. For
linear scale the delay value = 100 - bw. Currently no Intel SKUs
support non-linear delay values.
Signed-off-by: Vikas Shivappa <[email protected]>
---
arch/x86/include/asm/intel_rdt.h | 1 +
arch/x86/kernel/cpu/intel_rdt.c | 2 ++
arch/x86/kernel/cpu/intel_rdt_schemata.c | 46 ++++++++++++++++++++++++++++++++
3 files changed, 49 insertions(+)
diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index 6fc82a3..7a77fec 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -162,6 +162,7 @@ struct rdt_resource {
void rdt_get_cache_infofile(struct rdt_resource *r);
void rdt_get_mba_infofile(struct rdt_resource *r);
int parse_cbm(char *buf, struct rdt_resource *r, struct rdt_domain *d);
+int parse_bw(char *buf, struct rdt_resource *r, struct rdt_domain *d);
extern struct mutex rdtgroup_mutex;
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 2c18142..34bcfbc 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -106,7 +106,9 @@ struct rdt_resource rdt_resources_all[] = {
.name = "MB",
.domains = domain_init(RDT_RESOURCE_MBA),
.msr_base = IA32_MBA_THRTL_BASE,
+ .parse_ctrlval = parse_bw,
.msr_update = mba_wrmsr,
+ .format_str = "%d=%*d",
.cache_level = 3,
.cbm_idx_multi = 1,
.cbm_idx_offset = 0
diff --git a/arch/x86/kernel/cpu/intel_rdt_schemata.c b/arch/x86/kernel/cpu/intel_rdt_schemata.c
index 03f9a70..9154695 100644
--- a/arch/x86/kernel/cpu/intel_rdt_schemata.c
+++ b/arch/x86/kernel/cpu/intel_rdt_schemata.c
@@ -29,6 +29,52 @@
#include <asm/intel_rdt.h>
/*
+ * Check whether MBA bandwidth percentage value is correct.
+ * The value is checked against the minimum and max bandwidth
+ * values specified by the hardware. The allocated b/w
+ * percentage is rounded off the next control step
+ * available on the hardware.
+ */
+static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r)
+{
+ unsigned long bw, m;
+ int ret;
+
+ /*
+ * Only linear delay values is supported for current Intel SKUs.
+ */
+ if (!r->delay_linear)
+ return false;
+
+ ret = kstrtoul(buf, 10, &bw);
+ if (ret)
+ return false;
+
+ if (bw < r->min_bw || bw > r->default_ctrl)
+ return false;
+
+ m = (bw + r->bw_gran - 1) / r->bw_gran;
+ *data = m * r->bw_gran;
+
+ return true;
+}
+
+int parse_bw(char *buf, struct rdt_resource *r, struct rdt_domain *d)
+{
+ unsigned long data;
+
+ if (d->have_new_ctrl)
+ return -EINVAL;
+
+ if (!bw_validate(buf, &data, r))
+ return -EINVAL;
+ d->new_ctrl = data;
+ d->have_new_ctrl = true;
+
+ return 0;
+}
+
+/*
* Check whether a cache bit mask is valid. The SDM says:
* Please note that all (and only) contiguous '1' combinations
* are allowed (e.g. FFFFH, 0FF0H, 003CH, etc.).
--
1.9.1
Each RDT resource has a separate directory with in the
"/sys/fs/resctrl/info" which has files to display information about the
resource. Currently this list of files is a static list of files
specific to cache resource.
As a preparatory patch to add MBA info files, extend this
implementation support dynamic list of files.
Signed-off-by: Vikas Shivappa <[email protected]>
---
arch/x86/include/asm/intel_rdt.h | 6 ++++++
arch/x86/kernel/cpu/intel_rdt.c | 1 +
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 18 ++++++++++++++----
3 files changed, 21 insertions(+), 4 deletions(-)
diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index 285cdeb..e58e1d4 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -110,6 +110,8 @@ struct msr_param {
* @min_cbm_bits: Minimum number of consecutive bits to be set
* in a cache bit mask
* @msr_update: Function pointer to update QOS MSRs
+ * @info_files: resctrl info files for the resource
+ * @nr_info_files: Number of info files
* @max_delay: Max throttle delay. Delay is the hardware
* understandable value for memory bandwidth.
* @min_bw: Minimum memory bandwidth percentage user
@@ -137,6 +139,8 @@ struct rdt_resource {
int data_width;
void (*msr_update) (struct rdt_domain *d, struct msr_param *m,
struct rdt_resource *r);
+ struct rftype *info_files;
+ int nr_info_files;
u32 max_delay;
u32 min_bw;
u32 bw_gran;
@@ -150,6 +154,8 @@ struct rdt_resource {
int cbm_idx_offset;
};
+void rdt_get_cache_infofile(struct rdt_resource *r);
+
extern struct mutex rdtgroup_mutex;
extern struct rdt_resource rdt_resources_all[];
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index be272b9..b50cb35 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -215,6 +215,7 @@ static void rdt_get_cache_config(int idx, struct rdt_resource *r)
r->cbm_len = eax.split.cbm_len + 1;
r->default_ctrl = BIT_MASK(eax.split.cbm_len + 1) - 1;
r->data_width = (r->cbm_len + 3) / 4;
+ rdt_get_cache_infofile(r);
r->capable = true;
r->enabled = true;
}
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index 5ffe637..aee4a19 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -519,7 +519,7 @@ static int rdt_min_cbm_bits_show(struct kernfs_open_file *of,
}
/* rdtgroup information files for one cache resource. */
-static struct rftype res_info_files[] = {
+static struct rftype res_cache_info_files[] = {
{
.name = "num_closids",
.mode = 0444,
@@ -540,11 +540,18 @@ static int rdt_min_cbm_bits_show(struct kernfs_open_file *of,
},
};
+void rdt_get_cache_infofile(struct rdt_resource *r)
+{
+ r->info_files = res_cache_info_files;
+ r->nr_info_files = ARRAY_SIZE(res_cache_info_files);
+}
+
static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
{
struct kernfs_node *kn_subdir;
+ struct rftype *res_info_files;
struct rdt_resource *r;
- int ret;
+ int ret, len;
/* create the directory */
kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL);
@@ -563,8 +570,11 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
ret = rdtgroup_kn_set_ugid(kn_subdir);
if (ret)
goto out_destroy;
- ret = rdtgroup_add_files(kn_subdir, res_info_files,
- ARRAY_SIZE(res_info_files));
+
+ res_info_files = r->info_files;
+ len = r->nr_info_files;
+
+ ret = rdtgroup_add_files(kn_subdir, res_info_files, len);
if (ret)
goto out_destroy;
kernfs_activate(kn_subdir);
--
1.9.1
The MBA feature details like minimum bandwidth supported, b/w
granularity etc are obtained via executing CPUID with EAX=10H
,ECX=3.
Setup and initialize the MBA specific extensions to data structures like
global list of RDT resources, RDT resource structure and RDT domain
structure.
Signed-off-by: Vikas Shivappa <[email protected]>
---
arch/x86/include/asm/intel_rdt.h | 80 +++++++++++++--------
arch/x86/kernel/cpu/intel_rdt.c | 151 ++++++++++++++++++++++++++++++++++++---
2 files changed, 190 insertions(+), 41 deletions(-)
diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index 4c94f18..285cdeb 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -12,6 +12,7 @@
#define IA32_L3_QOS_CFG 0xc81
#define IA32_L3_CBM_BASE 0xc90
#define IA32_L2_CBM_BASE 0xd10
+#define IA32_MBA_THRTL_BASE 0xd50
#define L3_QOS_CDP_ENABLE 0x01ULL
@@ -69,6 +70,36 @@ struct rftype {
};
/**
+ * struct rdt_domain - group of cpus sharing an RDT resource
+ * @list: all instances of this resource
+ * @id: unique id for this instance
+ * @cpu_mask: which cpus share this resource
+ * @ctrl_val: array of cache or mem ctrl values (indexed by CLOSID)
+ * @new_cbm: new cbm value to be loaded
+ * @have_new_cbm: did user provide new_cbm for this domain
+ */
+struct rdt_domain {
+ struct list_head list;
+ int id;
+ struct cpumask cpu_mask;
+ u32 *ctrl_val;
+ u32 new_ctrl;
+ bool have_new_ctrl;
+};
+
+/**
+ * struct msr_param - set a range of MSRs from a domain
+ * @res: The resource to use
+ * @low: Beginning index from base MSR
+ * @high: End index
+ */
+struct msr_param {
+ struct rdt_resource *res;
+ int low;
+ int high;
+};
+
+/**
* struct rdt_resource - attributes of an RDT resource
* @enabled: Is this feature enabled on this machine
* @capable: Is this feature available on this machine
@@ -78,6 +109,16 @@ struct rftype {
* @data_width: Character width of data when displaying
* @min_cbm_bits: Minimum number of consecutive bits to be set
* in a cache bit mask
+ * @msr_update: Function pointer to update QOS MSRs
+ * @max_delay: Max throttle delay. Delay is the hardware
+ * understandable value for memory bandwidth.
+ * @min_bw: Minimum memory bandwidth percentage user
+ * can request
+ * @bw_gran: Granularity at which the memory bandwidth
+ * is allocated
+ * @delay_linear: True if memory b/w delay is in linear scale
+ * @mb_map: Mapping of memory b/w percentage to
+ * memory b/w delay values
* @domains: All domains for this resource
* @msr_base: Base MSR address for CBMs
* @cache_level: Which cache level defines scope of this domain
@@ -94,6 +135,14 @@ struct rdt_resource {
int min_cbm_bits;
u32 default_ctrl;
int data_width;
+ void (*msr_update) (struct rdt_domain *d, struct msr_param *m,
+ struct rdt_resource *r);
+ u32 max_delay;
+ u32 min_bw;
+ u32 bw_gran;
+ u32 delay_linear;
+ u32 *mb_map;
+
struct list_head domains;
int msr_base;
int cache_level;
@@ -101,36 +150,6 @@ struct rdt_resource {
int cbm_idx_offset;
};
-/**
- * struct rdt_domain - group of cpus sharing an RDT resource
- * @list: all instances of this resource
- * @id: unique id for this instance
- * @cpu_mask: which cpus share this resource
- * @ctrl_val: array of cache or mem ctrl values (indexed by CLOSID)
- * @new_ctrl: new ctrl value to be loaded
- * @have_new_ctrl: did user provide new_ctrl for this domain
- */
-struct rdt_domain {
- struct list_head list;
- int id;
- struct cpumask cpu_mask;
- u32 *ctrl_val;
- u32 new_ctrl;
- bool have_new_ctrl;
-};
-
-/**
- * struct msr_param - set a range of MSRs from a domain
- * @res: The resource to use
- * @low: Beginning index from base MSR
- * @high: End index
- */
-struct msr_param {
- struct rdt_resource *res;
- int low;
- int high;
-};
-
extern struct mutex rdtgroup_mutex;
extern struct rdt_resource rdt_resources_all[];
@@ -144,6 +163,7 @@ enum {
RDT_RESOURCE_L3DATA,
RDT_RESOURCE_L3CODE,
RDT_RESOURCE_L2,
+ RDT_RESOURCE_MBA,
/* Must be the last */
RDT_NUM_RESOURCES,
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index c4cf2e8..be272b9 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -32,6 +32,9 @@
#include <asm/intel-family.h>
#include <asm/intel_rdt.h>
+#define MAX_MBA_BW 100u
+#define MBA_IS_LINEAR 0x4
+
/* Mutex to protect rdtgroup access. */
DEFINE_MUTEX(rdtgroup_mutex);
@@ -45,11 +48,17 @@
*/
int max_name_width, max_data_width;
+static void
+mba_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r);
+static void
+cqm_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r);
+
struct rdt_resource rdt_resources_all[] = {
{
.name = "L3",
.domains = domain_init(RDT_RESOURCE_L3),
.msr_base = IA32_L3_CBM_BASE,
+ .msr_update = cqm_wrmsr,
.min_cbm_bits = 1,
.cache_level = 3,
.cbm_idx_multi = 1,
@@ -59,6 +68,7 @@ struct rdt_resource rdt_resources_all[] = {
.name = "L3DATA",
.domains = domain_init(RDT_RESOURCE_L3DATA),
.msr_base = IA32_L3_CBM_BASE,
+ .msr_update = cqm_wrmsr,
.min_cbm_bits = 1,
.cache_level = 3,
.cbm_idx_multi = 2,
@@ -68,6 +78,7 @@ struct rdt_resource rdt_resources_all[] = {
.name = "L3CODE",
.domains = domain_init(RDT_RESOURCE_L3CODE),
.msr_base = IA32_L3_CBM_BASE,
+ .msr_update = cqm_wrmsr,
.min_cbm_bits = 1,
.cache_level = 3,
.cbm_idx_multi = 2,
@@ -77,11 +88,21 @@ struct rdt_resource rdt_resources_all[] = {
.name = "L2",
.domains = domain_init(RDT_RESOURCE_L2),
.msr_base = IA32_L2_CBM_BASE,
+ .msr_update = cqm_wrmsr,
.min_cbm_bits = 1,
.cache_level = 2,
.cbm_idx_multi = 1,
.cbm_idx_offset = 0
},
+ {
+ .name = "MB",
+ .domains = domain_init(RDT_RESOURCE_MBA),
+ .msr_base = IA32_MBA_THRTL_BASE,
+ .msr_update = mba_wrmsr,
+ .cache_level = 3,
+ .cbm_idx_multi = 1,
+ .cbm_idx_offset = 0
+ },
};
static int cbm_idx(struct rdt_resource *r, int closid)
@@ -136,6 +157,53 @@ static inline bool cache_alloc_hsw_probe(void)
return false;
}
+/*
+ * rdt_get_mb_table() - get a mapping of bandwidth(b/w) percentage values
+ * exposed to user interface and the h/w understandable delay values.
+ *
+ * The non-linear delay values have the granularity of power of two
+ * and also the h/w does not guarantee a curve for configured delay
+ * values vs. actual b/w enforced.
+ * Hence we need a mapping that is pre calibrated so the user can
+ * express the memory b/w as a percentage value.
+ */
+static inline bool rdt_get_mb_table(struct rdt_resource *r)
+{
+ /*
+ * There are no Intel SKUs as of now to support non-linear delay.
+ */
+ pr_info("MBA b/w map not implemented for cpu:%d, model:%d",
+ boot_cpu_data.x86, boot_cpu_data.x86_model);
+
+ return false;
+}
+
+static bool rdt_get_mem_config(struct rdt_resource *r)
+{
+ union cpuid_0x10_3_eax eax;
+ union cpuid_0x10_x_edx edx;
+ u32 ebx, ecx;
+
+ cpuid_count(0x00000010, 3, &eax.full, &ebx, &ecx, &edx.full);
+ r->num_closid = edx.split.cos_max + 1;
+ r->max_delay = eax.split.max_delay + 1;
+ r->default_ctrl = MAX_MBA_BW;
+ if (ecx & MBA_IS_LINEAR) {
+ r->delay_linear = true;
+ r->min_bw = MAX_MBA_BW - r->max_delay;
+ r->bw_gran = MAX_MBA_BW - r->max_delay;
+ } else {
+ if (!rdt_get_mb_table(r))
+ return false;
+ }
+ r->data_width = 3;
+
+ r->capable = true;
+ r->enabled = true;
+
+ return true;
+}
+
static void rdt_get_cache_config(int idx, struct rdt_resource *r)
{
union cpuid_0x10_1_eax eax;
@@ -212,7 +280,8 @@ static inline bool get_rdt_resources(void)
ret = true;
}
- if (boot_cpu_has(X86_FEATURE_MBA))
+ if (boot_cpu_has(X86_FEATURE_MBA) &&
+ rdt_get_mem_config(&rdt_resources_all[RDT_RESOURCE_MBA]))
ret = true;
rdt_init_padding();
@@ -233,6 +302,47 @@ static int get_cache_id(int cpu, int level)
return -1;
}
+/*
+ * Map the memory b/w percentage value to delay values
+ * that can be written to QOS_MSRs.
+ * There are currently no SKUs which support non linear delay values.
+ */
+static u32 delay_bw_map(unsigned long bw, struct rdt_resource *r)
+{
+ if (r->delay_linear)
+ return MAX_MBA_BW - bw;
+
+ WARN_ONCE(1, "Non Linear delay-bw map not supported but queried\n");
+ return r->default_ctrl;
+}
+
+static void
+mba_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r)
+{
+ int i;
+
+ for (i = m->low; i < m->high; i++) {
+ int idx = cbm_idx(r, i);
+
+ /*
+ * Write the delay value for mba.
+ */
+ wrmsrl(r->msr_base + idx, delay_bw_map(d->ctrl_val[i], r));
+ }
+}
+
+static void
+cqm_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r)
+{
+ int i;
+
+ for (i = m->low; i < m->high; i++) {
+ int idx = cbm_idx(r, i);
+
+ wrmsrl(r->msr_base + idx, d->ctrl_val[i]);
+ }
+}
+
void rdt_ctrl_update(void *arg)
{
struct msr_param *m = (struct msr_param *)arg;
@@ -291,6 +401,33 @@ static struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
return NULL;
}
+static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d)
+{
+ struct msr_param m;
+ u32 *dc;
+ int i;
+
+ dc = kmalloc_array(r->num_closid, sizeof(*d->ctrl_val), GFP_KERNEL);
+ if (!dc)
+ return -ENOMEM;
+
+ d->ctrl_val = dc;
+
+ /*
+ * Initialize the Control MSRs to having no control.
+ * For Cache Allocation: Set all bits in cbm
+ * For Memory Allocation: Set b/w requested to 100
+ */
+ for (i = 0; i < r->num_closid; i++, dc++)
+ *dc = r->default_ctrl;
+
+ m.low = 0;
+ m.high = r->num_closid;
+ r->msr_update(d, &m, r);
+
+ return 0;
+}
+
/*
* domain_add_cpu - Add a cpu to a resource's domain list.
*
@@ -306,7 +443,7 @@ static struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
*/
static void domain_add_cpu(int cpu, struct rdt_resource *r)
{
- int i, id = get_cache_id(cpu, r->cache_level);
+ int id = get_cache_id(cpu, r->cache_level);
struct list_head *add_pos = NULL;
struct rdt_domain *d;
@@ -327,19 +464,11 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
d->id = id;
- d->ctrl_val = kmalloc_array(r->num_closid, sizeof(*d->ctrl_val), GFP_KERNEL);
- if (!d->ctrl_val) {
+ if (domain_setup_ctrlval(r, d)) {
kfree(d);
return;
}
- for (i = 0; i < r->num_closid; i++) {
- int idx = cbm_idx(r, i);
-
- d->ctrl_val[i] = r->default_ctrl;
- wrmsrl(r->msr_base + idx, d->ctrl_val[i]);
- }
-
cpumask_set_cpu(cpu, &d->cpu_mask);
list_add_tail(&d->list, add_pos);
}
--
1.9.1
Lot of data structures and functions are named after cache specific
resources(named after cbm, cache etc). In many cases other non cache
resources may need to share the same data structures/functions.
Generalize such naming to prepare to add more resources like memory
bandwidth.
Signed-off-by: Vikas Shivappa <[email protected]>
---
arch/x86/include/asm/intel_rdt.h | 22 +++++++++++-----------
arch/x86/kernel/cpu/intel_rdt.c | 28 ++++++++++++++--------------
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 12 ++++++------
arch/x86/kernel/cpu/intel_rdt_schemata.c | 20 ++++++++++----------
4 files changed, 41 insertions(+), 41 deletions(-)
diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index 3f31399..2add1a7 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -74,7 +74,7 @@ struct rftype {
* @capable: Is this feature available on this machine
* @name: Name to use in "schemata" file
* @num_closid: Number of CLOSIDs available
- * @max_cbm: Largest Cache Bit Mask allowed
+ * @default_ctrl: Specifies default cache cbm or mem b/w percent.
* @data_width: Character width of data when displaying
* @min_cbm_bits: Minimum number of consecutive bits to be set
* in a cache bit mask
@@ -92,7 +92,7 @@ struct rdt_resource {
int num_closid;
int cbm_len;
int min_cbm_bits;
- u32 max_cbm;
+ u32 default_ctrl;
int data_width;
struct list_head domains;
int msr_base;
@@ -106,17 +106,17 @@ struct rdt_resource {
* @list: all instances of this resource
* @id: unique id for this instance
* @cpu_mask: which cpus share this resource
- * @cbm: array of cache bit masks (indexed by CLOSID)
- * @new_cbm: new cbm value to be loaded
- * @have_new_cbm: did user provide new_cbm for this domain
+ * @ctrl_val: array of cache or mem ctrl values (indexed by CLOSID)
+ * @new_ctrl: new ctrl value to be loaded
+ * @have_new_ctrl: did user provide new_ctrl for this domain
*/
struct rdt_domain {
struct list_head list;
int id;
struct cpumask cpu_mask;
- u32 *cbm;
- u32 new_cbm;
- bool have_new_cbm;
+ u32 *ctrl_val;
+ u32 new_ctrl;
+ bool have_new_ctrl;
};
/**
@@ -167,8 +167,8 @@ enum {
unsigned int full;
};
-/* CPUID.(EAX=10H, ECX=ResID=1).EDX */
-union cpuid_0x10_1_edx {
+/* CPUID.(EAX=10H, ECX=ResID).EDX */
+union cpuid_0x10_x_edx {
struct {
unsigned int cos_max:16;
} split;
@@ -177,7 +177,7 @@ enum {
DECLARE_PER_CPU_READ_MOSTLY(int, cpu_closid);
-void rdt_cbm_update(void *arg);
+void rdt_ctrl_update(void *arg);
struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn);
void rdtgroup_kn_unlock(struct kernfs_node *kn);
ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 70a3307..6507e93 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -125,7 +125,7 @@ static inline bool cache_alloc_hsw_probe(void)
r->num_closid = 4;
r->cbm_len = 20;
- r->max_cbm = max_cbm;
+ r->default_ctrl = max_cbm;
r->min_cbm_bits = 2;
r->capable = true;
r->enabled = true;
@@ -136,16 +136,16 @@ static inline bool cache_alloc_hsw_probe(void)
return false;
}
-static void rdt_get_config(int idx, struct rdt_resource *r)
+static void rdt_get_cache_config(int idx, struct rdt_resource *r)
{
union cpuid_0x10_1_eax eax;
- union cpuid_0x10_1_edx edx;
+ union cpuid_0x10_x_edx edx;
u32 ebx, ecx;
cpuid_count(0x00000010, idx, &eax.full, &ebx, &ecx, &edx.full);
r->num_closid = edx.split.cos_max + 1;
r->cbm_len = eax.split.cbm_len + 1;
- r->max_cbm = BIT_MASK(eax.split.cbm_len + 1) - 1;
+ r->default_ctrl = BIT_MASK(eax.split.cbm_len + 1) - 1;
r->data_width = (r->cbm_len + 3) / 4;
r->capable = true;
r->enabled = true;
@@ -158,7 +158,7 @@ static void rdt_get_cdp_l3_config(int type)
r->num_closid = r_l3->num_closid / 2;
r->cbm_len = r_l3->cbm_len;
- r->max_cbm = r_l3->max_cbm;
+ r->default_ctrl = r_l3->default_ctrl;
r->data_width = (r->cbm_len + 3) / 4;
r->capable = true;
/*
@@ -199,7 +199,7 @@ static inline bool get_rdt_resources(void)
return false;
if (boot_cpu_has(X86_FEATURE_CAT_L3)) {
- rdt_get_config(1, &rdt_resources_all[RDT_RESOURCE_L3]);
+ rdt_get_cache_config(1, &rdt_resources_all[RDT_RESOURCE_L3]);
if (boot_cpu_has(X86_FEATURE_CDP_L3)) {
rdt_get_cdp_l3_config(RDT_RESOURCE_L3DATA);
rdt_get_cdp_l3_config(RDT_RESOURCE_L3CODE);
@@ -208,7 +208,7 @@ static inline bool get_rdt_resources(void)
}
if (boot_cpu_has(X86_FEATURE_CAT_L2)) {
/* CPUID 0x10.2 fields are same format at 0x10.1 */
- rdt_get_config(2, &rdt_resources_all[RDT_RESOURCE_L2]);
+ rdt_get_cache_config(2, &rdt_resources_all[RDT_RESOURCE_L2]);
ret = true;
}
@@ -230,7 +230,7 @@ static int get_cache_id(int cpu, int level)
return -1;
}
-void rdt_cbm_update(void *arg)
+void rdt_ctrl_update(void *arg)
{
struct msr_param *m = (struct msr_param *)arg;
struct rdt_resource *r = m->res;
@@ -251,7 +251,7 @@ void rdt_cbm_update(void *arg)
for (i = m->low; i < m->high; i++) {
int idx = cbm_idx(r, i);
- wrmsrl(r->msr_base + idx, d->cbm[i]);
+ wrmsrl(r->msr_base + idx, d->ctrl_val[i]);
}
}
@@ -324,8 +324,8 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
d->id = id;
- d->cbm = kmalloc_array(r->num_closid, sizeof(*d->cbm), GFP_KERNEL);
- if (!d->cbm) {
+ d->ctrl_val = kmalloc_array(r->num_closid, sizeof(*d->ctrl_val), GFP_KERNEL);
+ if (!d->ctrl_val) {
kfree(d);
return;
}
@@ -333,8 +333,8 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
for (i = 0; i < r->num_closid; i++) {
int idx = cbm_idx(r, i);
- d->cbm[i] = r->max_cbm;
- wrmsrl(r->msr_base + idx, d->cbm[i]);
+ d->ctrl_val[i] = r->default_ctrl;
+ wrmsrl(r->msr_base + idx, d->ctrl_val[i]);
}
cpumask_set_cpu(cpu, &d->cpu_mask);
@@ -354,7 +354,7 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
cpumask_clear_cpu(cpu, &d->cpu_mask);
if (cpumask_empty(&d->cpu_mask)) {
- kfree(d->cbm);
+ kfree(d->ctrl_val);
list_del(&d->list);
kfree(d);
}
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index 77e88c0..5ffe637 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -498,12 +498,12 @@ static int rdt_num_closids_show(struct kernfs_open_file *of,
return 0;
}
-static int rdt_cbm_mask_show(struct kernfs_open_file *of,
+static int rdt_default_ctrl_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
struct rdt_resource *r = of->kn->parent->priv;
- seq_printf(seq, "%x\n", r->max_cbm);
+ seq_printf(seq, "%x\n", r->default_ctrl);
return 0;
}
@@ -530,7 +530,7 @@ static int rdt_min_cbm_bits_show(struct kernfs_open_file *of,
.name = "cbm_mask",
.mode = 0444,
.kf_ops = &rdtgroup_kf_single_ops,
- .seq_show = rdt_cbm_mask_show,
+ .seq_show = rdt_default_ctrl_show,
},
{
.name = "min_cbm_bits",
@@ -803,14 +803,14 @@ static int reset_all_ctrls(struct rdt_resource *r, u32 closid, u32 count)
cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
for (i = closid; i < closid + count; i++)
- d->cbm[i] = r->max_cbm;
+ d->ctrl_val[i] = r->default_ctrl;
}
cpu = get_cpu();
/* Update CBM on this cpu if it's in cpu_mask. */
if (cpumask_test_cpu(cpu, cpu_mask))
- rdt_cbm_update(&msr_param);
+ rdt_ctrl_update(&msr_param);
/* Update CBM on all other cpus in cpu_mask. */
- smp_call_function_many(cpu_mask, rdt_cbm_update, &msr_param, 1);
+ smp_call_function_many(cpu_mask, rdt_ctrl_update, &msr_param, 1);
put_cpu();
free_cpumask_var(cpu_mask);
diff --git a/arch/x86/kernel/cpu/intel_rdt_schemata.c b/arch/x86/kernel/cpu/intel_rdt_schemata.c
index 8594db4..7695179 100644
--- a/arch/x86/kernel/cpu/intel_rdt_schemata.c
+++ b/arch/x86/kernel/cpu/intel_rdt_schemata.c
@@ -38,7 +38,7 @@ static bool cbm_validate(unsigned long var, struct rdt_resource *r)
{
unsigned long first_bit, zero_bit;
- if (var == 0 || var > r->max_cbm)
+ if (var == 0 || var > r->default_ctrl)
return false;
first_bit = find_first_bit(&var, r->cbm_len);
@@ -61,7 +61,7 @@ static int parse_cbm(char *buf, struct rdt_resource *r, struct rdt_domain *d)
unsigned long data;
int ret;
- if (d->have_new_cbm)
+ if (d->have_new_ctrl)
return -EINVAL;
ret = kstrtoul(buf, 16, &data);
@@ -69,8 +69,8 @@ static int parse_cbm(char *buf, struct rdt_resource *r, struct rdt_domain *d)
return ret;
if (!cbm_validate(data, r))
return -EINVAL;
- d->new_cbm = data;
- d->have_new_cbm = true;
+ d->new_ctrl = data;
+ d->have_new_ctrl = true;
return 0;
}
@@ -119,9 +119,9 @@ static int update_domains(struct rdt_resource *r, int closid)
msr_param.res = r;
list_for_each_entry(d, &r->domains, list) {
- if (d->have_new_cbm && d->new_cbm != d->cbm[closid]) {
+ if (d->have_new_ctrl && d->new_ctrl != d->ctrl_val[closid]) {
cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
- d->cbm[closid] = d->new_cbm;
+ d->ctrl_val[closid] = d->new_ctrl;
}
}
if (cpumask_empty(cpu_mask))
@@ -129,9 +129,9 @@ static int update_domains(struct rdt_resource *r, int closid)
cpu = get_cpu();
/* Update CBM on this cpu if it's in cpu_mask. */
if (cpumask_test_cpu(cpu, cpu_mask))
- rdt_cbm_update(&msr_param);
+ rdt_ctrl_update(&msr_param);
/* Update CBM on other cpus. */
- smp_call_function_many(cpu_mask, rdt_cbm_update, &msr_param, 1);
+ smp_call_function_many(cpu_mask, rdt_ctrl_update, &msr_param, 1);
put_cpu();
done:
@@ -164,7 +164,7 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
for_each_enabled_rdt_resource(r)
list_for_each_entry(dom, &r->domains, list)
- dom->have_new_cbm = false;
+ dom->have_new_ctrl = false;
while ((tok = strsep(&buf, "\n")) != NULL) {
resname = strsep(&tok, ":");
@@ -208,7 +208,7 @@ static void show_doms(struct seq_file *s, struct rdt_resource *r, int closid)
if (sep)
seq_puts(s, ";");
seq_printf(s, "%d=%0*x", dom->id, max_data_width,
- dom->cbm[closid]);
+ dom->ctrl_val[closid]);
sep = true;
}
seq_puts(s, "\n");
--
1.9.1
Detect MBA feature if CPUID.(EAX=10H, ECX=0):EBX.L2[bit 3] = 1.
Add supporting data structures to detect feature details which is done
in later patch using CPUID with EAX=10H, ECX= 3.
Signed-off-by: Vikas Shivappa <[email protected]>
---
arch/x86/include/asm/cpufeatures.h | 2 ++
arch/x86/include/asm/intel_rdt.h | 8 ++++++++
arch/x86/kernel/cpu/intel_rdt.c | 3 +++
arch/x86/kernel/cpu/scattered.c | 1 +
4 files changed, 14 insertions(+)
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index b04bb6d..25d7f52 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -201,6 +201,8 @@
#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
+#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
+
/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index 2add1a7..4c94f18 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -167,6 +167,14 @@ enum {
unsigned int full;
};
+/* CPUID.(EAX=10H, ECX=ResID=3).EAX */
+union cpuid_0x10_3_eax {
+ struct {
+ unsigned int max_delay:12;
+ } split;
+ unsigned int full;
+};
+
/* CPUID.(EAX=10H, ECX=ResID).EDX */
union cpuid_0x10_x_edx {
struct {
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 6507e93..c4cf2e8 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -212,6 +212,9 @@ static inline bool get_rdt_resources(void)
ret = true;
}
+ if (boot_cpu_has(X86_FEATURE_MBA))
+ ret = true;
+
rdt_init_padding();
return ret;
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index d979406..23c2350 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -27,6 +27,7 @@ struct cpuid_bit {
{ X86_FEATURE_CAT_L3, CPUID_EBX, 1, 0x00000010, 0 },
{ X86_FEATURE_CAT_L2, CPUID_EBX, 2, 0x00000010, 0 },
{ X86_FEATURE_CDP_L3, CPUID_ECX, 2, 0x00000010, 1 },
+ { X86_FEATURE_MBA, CPUID_EBX, 3, 0x00000010, 0 },
{ X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 },
{ X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 },
{ X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 },
--
1.9.1
On Mon, 3 Apr 2017, Tracy Smith wrote:
> Hi All,
>
> No JTAG available and need to understand why Linux 4.8.3 doesn't boot on a
> x86_64 corei7-64. Hangs at the typical "Starting kernel" location after
> the last message of the U-boot. The bootcmd is given below.
Do you see the issue when you apply MBA patches . It seems like you use 4.8
kernel but the mba patches are dependent on :
https://marc.info/?l=linux-kernel&m=149125583824700
which is applied on 4.11-rc4.
>
> See a fault FFS and a message indicating the image didn't load after
> failover.
>
> 1) How can I verify if the kernel image was loaded from uboot
> 2) What is this fault?
> 3) Has the bootargs or bootcmd changed between 4.1 and 4.8.3?
> 4) If the boot cmd/arg has changed, what should the boot cmd/arg be for
> 4.8.3 to boot on x86_64 corei7-64?
>
> Initial RAM disk at linear address 0x20000000, size 11638378 bytes
> Kernel command line: "BOOT_IMAGE=/imgx/bzImage LABEL=BOOT root=/dev/ram0
> imgmnt=/media/sda2 imgdir=imgx img=image.rootfs rootdelay=2 slub_debug=F
> console=ttyS1,115200 bootcount=1 bootcount_addr=0xa4000
> acpi_enforce_resources=lax pram_size=0x800000 pram_addr=10000000
> pram_loc=ddr crashkernel=128M memmap=0x800000$0x10000000 "
> EFI table at 683392c0, mmap 68339300, mmap size 4c0, version 1, descr. size
> 0x30
>
> Starting kernel ...
>
> Timer summary in microseconds:
> Mark Elapsed Stage
> 0 0 reset
> 1 1 board_init_r
> 105 104 board_init_f
> 10,180,048 10,179,943 id=64
> 10,221,985 41,937 id=65
> 10,356,645 134,660 main_loop
> 12,366,521 2,009,876 usb_start
> 18,747,284 6,380,763 start_kernel
> Accumulated time:
> 10,162,689 ahci
>
> On a 4.1.26-yocto-standard #1 SMP it boots with no issues. Basically same
> .config used in both cases except for anything deprecated between 4.1 and
> 4.8.3.
>
> root@:~# cat /proc/cmdline
> BOOT_IMAGE=/imgy/bzImage LABEL=BOOT root=/dev/ram0 imgmnt=/media/sda2
> imgdir=imgy img=image.rootfs rootdelay=2 slub_debug=F console=ttyS1,115200
> fault=FFS bootcount=3 bootcount_addr=0xa4000 acpi_enforce_resources=lax
> pram_size=0x800000 pram_addr=10000000 pram_loc=ddr crashkernel=128M
> memmap=0x800000$0x10000000
> root@CLX3001:~# cat /proc/consoles
> ttyS1 -W- (EC p a) 4:65
> netcon0 -W- (E )
>
> thx,
> Tray
>
Apologies, unrelated to MBA. Resent later with a changed subject line.
On Tue, Apr 4, 2017 at 1:50 PM, Shivappa Vikas <[email protected]> wrote:
>
>
>
> On Mon, 3 Apr 2017, Tracy Smith wrote:
>
>> Hi All,
>>
>> No JTAG available and need to understand why Linux 4.8.3 doesn't boot on a
>> x86_64 corei7-64. Hangs at the typical "Starting kernel" location after
>> the last message of the U-boot. The bootcmd is given below.
>
>
> Do you see the issue when you apply MBA patches . It seems like you use 4.8
> kernel but the mba patches are dependent on :
> https://marc.info/?l=linux-kernel&m=149125583824700
>
> which is applied on 4.11-rc4.
>
>
>>
>> See a fault FFS and a message indicating the image didn't load after
>> failover.
>>
>> 1) How can I verify if the kernel image was loaded from uboot
>> 2) What is this fault?
>> 3) Has the bootargs or bootcmd changed between 4.1 and 4.8.3?
>> 4) If the boot cmd/arg has changed, what should the boot cmd/arg be for
>> 4.8.3 to boot on x86_64 corei7-64?
>>
>> Initial RAM disk at linear address 0x20000000, size 11638378 bytes
>> Kernel command line: "BOOT_IMAGE=/imgx/bzImage LABEL=BOOT root=/dev/ram0
>> imgmnt=/media/sda2 imgdir=imgx img=image.rootfs rootdelay=2 slub_debug=F
>> console=ttyS1,115200 bootcount=1 bootcount_addr=0xa4000
>> acpi_enforce_resources=lax pram_size=0x800000 pram_addr=10000000
>> pram_loc=ddr crashkernel=128M memmap=0x800000$0x10000000 "
>> EFI table at 683392c0, mmap 68339300, mmap size 4c0, version 1, descr.
>> size
>> 0x30
>>
>> Starting kernel ...
>>
>> Timer summary in microseconds:
>> Mark Elapsed Stage
>> 0 0 reset
>> 1 1 board_init_r
>> 105 104 board_init_f
>> 10,180,048 10,179,943 id=64
>> 10,221,985 41,937 id=65
>> 10,356,645 134,660 main_loop
>> 12,366,521 2,009,876 usb_start
>> 18,747,284 6,380,763 start_kernel
>> Accumulated time:
>> 10,162,689 ahci
>>
>> On a 4.1.26-yocto-standard #1 SMP it boots with no issues. Basically same
>> .config used in both cases except for anything deprecated between 4.1 and
>> 4.8.3.
>>
>> root@:~# cat /proc/cmdline
>> BOOT_IMAGE=/imgy/bzImage LABEL=BOOT root=/dev/ram0 imgmnt=/media/sda2
>> imgdir=imgy img=image.rootfs rootdelay=2 slub_debug=F console=ttyS1,115200
>> fault=FFS bootcount=3 bootcount_addr=0xa4000 acpi_enforce_resources=lax
>> pram_size=0x800000 pram_addr=10000000 pram_loc=ddr crashkernel=128M
>> memmap=0x800000$0x10000000
>> root@CLX3001:~# cat /proc/consoles
>> ttyS1 -W- (EC p a) 4:65
>> netcon0 -W- (E )
>>
>> thx,
>> Tray
>>
>
--
Confidentiality notice: This e-mail message, including any
attachments, may contain legally privileged and/or confidential
information. If you are not the intended recipient(s), please
immediately notify the sender and delete this e-mail message.
On Mon, 3 Apr 2017, Vikas Shivappa wrote:
> +Cache resource(L3/L2) subdirectory contains the following files:
>
> -"num_closids": The number of CLOSIDs which are valid for this
> - resource. The kernel uses the smallest number of
> - CLOSIDs of all enabled resources as limit.
> +"num_closids": The number of CLOSIDs which are valid for this
> + resource. The kernel uses the smallest number of
> + CLOSIDs of all enabled resources as limit.
>
> -"cbm_mask": The bitmask which is valid for this resource. This
> - mask is equivalent to 100%.
> +"cbm_mask": The bitmask which is valid for this resource.
> + This mask is equivalent to 100%.
>
> -"min_cbm_bits": The minimum number of consecutive bits which must be
> - set when writing a mask.
> +"min_cbm_bits": The minimum number of consecutive bits which
> + must be set when writing a mask.
>
> +Memory bandwitdh(MB) subdirectory contains the following files:
This has a num_closids file as well, right?
Thanks,
tglx
On Mon, 3 Apr 2017, Vikas Shivappa wrote:
>
> /**
> + * struct rdt_domain - group of cpus sharing an RDT resource
> + * @list: all instances of this resource
> + * @id: unique id for this instance
> + * @cpu_mask: which cpus share this resource
> + * @ctrl_val: array of cache or mem ctrl values (indexed by CLOSID)
> + * @new_cbm: new cbm value to be loaded
> + * @have_new_cbm: did user provide new_cbm for this domain
The version which you removed below has the kernel-doc comments correct ....
> +/**
> * struct rdt_resource - attributes of an RDT resource
> * @enabled: Is this feature enabled on this machine
> * @capable: Is this feature available on this machine
> @@ -78,6 +109,16 @@ struct rftype {
> * @data_width: Character width of data when displaying
> * @min_cbm_bits: Minimum number of consecutive bits to be set
> * in a cache bit mask
> + * @msr_update: Function pointer to update QOS MSRs
> + * @max_delay: Max throttle delay. Delay is the hardware
> + * understandable value for memory bandwidth.
> + * @min_bw: Minimum memory bandwidth percentage user
> + * can request
> + * @bw_gran: Granularity at which the memory bandwidth
> + * is allocated
> + * @delay_linear: True if memory b/w delay is in linear scale
> + * @mb_map: Mapping of memory b/w percentage to
> + * memory b/w delay values
> * @domains: All domains for this resource
> * @msr_base: Base MSR address for CBMs
> * @cache_level: Which cache level defines scope of this domain
> @@ -94,6 +135,14 @@ struct rdt_resource {
> int min_cbm_bits;
> u32 default_ctrl;
> int data_width;
> + void (*msr_update) (struct rdt_domain *d, struct msr_param *m,
> + struct rdt_resource *r);
> + u32 max_delay;
> + u32 min_bw;
> + u32 bw_gran;
> + u32 delay_linear;
> + u32 *mb_map;
I don't know what other weird controls will be added over time, but we are
probably better off to have
struct cache_ctrl {
int cbm_len;
int min_cbm_bits;
};
struct mba_ctrl {
u32 max_delay;
u32 min_bw;
u32 bw_gran;
u32 delay_linear;
u32 *mb_map;
};
and in then in struct rdt_resource:
<common fields>
union {
struct cache_ctrl foo;
struct mba_ctrl bla;
} ctrl;
That avoids that rdt_resource becomes a hodgepodge of unrelated or even
contradicting fields.
Hmm?
Thanks,
tglx
On Wed, 5 Apr 2017, Thomas Gleixner wrote:
> On Mon, 3 Apr 2017, Vikas Shivappa wrote:
>> +Cache resource(L3/L2) subdirectory contains the following files:
>>
>> -"num_closids": The number of CLOSIDs which are valid for this
>> - resource. The kernel uses the smallest number of
>> - CLOSIDs of all enabled resources as limit.
>> +"num_closids": The number of CLOSIDs which are valid for this
>> + resource. The kernel uses the smallest number of
>> + CLOSIDs of all enabled resources as limit.
>>
>> -"cbm_mask": The bitmask which is valid for this resource. This
>> - mask is equivalent to 100%.
>> +"cbm_mask": The bitmask which is valid for this resource.
>> + This mask is equivalent to 100%.
>>
>> -"min_cbm_bits": The minimum number of consecutive bits which must be
>> - set when writing a mask.
>> +"min_cbm_bits": The minimum number of consecutive bits which
>> + must be set when writing a mask.
>>
>> +Memory bandwitdh(MB) subdirectory contains the following files:
>
> This has a num_closids file as well, right?
Yes. copy below - just indented same text
-"num_closids": The number of CLOSIDs which are valid for this
- resource. The kernel uses the smallest number of
- CLOSIDs of all enabled resources as limit.
+"num_closids": The number of CLOSIDs which are valid for this
+ resource. The kernel uses the smallest number of
+ CLOSIDs of all enabled resources as limit.
Thanks,
Vikas
>
> Thanks,
>
> tglx
>
On Wed, 5 Apr 2017, Thomas Gleixner wrote:
> On Mon, 3 Apr 2017, Vikas Shivappa wrote:
>>
>> /**
>> + * struct rdt_domain - group of cpus sharing an RDT resource
>> + * @list: all instances of this resource
>> + * @id: unique id for this instance
>> + * @cpu_mask: which cpus share this resource
>> + * @ctrl_val: array of cache or mem ctrl values (indexed by CLOSID)
>> + * @new_cbm: new cbm value to be loaded
>> + * @have_new_cbm: did user provide new_cbm for this domain
>
> The version which you removed below has the kernel-doc comments correct ....
Will fix
>
>> +/**
>> * struct rdt_resource - attributes of an RDT resource
>> * @enabled: Is this feature enabled on this machine
>> * @capable: Is this feature available on this machine
>> @@ -78,6 +109,16 @@ struct rftype {
>> * @data_width: Character width of data when displaying
>> * @min_cbm_bits: Minimum number of consecutive bits to be set
>> * in a cache bit mask
>> + * @msr_update: Function pointer to update QOS MSRs
>> + * @max_delay: Max throttle delay. Delay is the hardware
>> + * understandable value for memory bandwidth.
>> + * @min_bw: Minimum memory bandwidth percentage user
>> + * can request
>> + * @bw_gran: Granularity at which the memory bandwidth
>> + * is allocated
>> + * @delay_linear: True if memory b/w delay is in linear scale
>> + * @mb_map: Mapping of memory b/w percentage to
>> + * memory b/w delay values
>> * @domains: All domains for this resource
>> * @msr_base: Base MSR address for CBMs
>> * @cache_level: Which cache level defines scope of this domain
>> @@ -94,6 +135,14 @@ struct rdt_resource {
>> int min_cbm_bits;
>> u32 default_ctrl;
>> int data_width;
>> + void (*msr_update) (struct rdt_domain *d, struct msr_param *m,
>> + struct rdt_resource *r);
>> + u32 max_delay;
>> + u32 min_bw;
>> + u32 bw_gran;
>> + u32 delay_linear;
>> + u32 *mb_map;
>
> I don't know what other weird controls will be added over time, but we are
> probably better off to have
>
> struct cache_ctrl {
> int cbm_len;
> int min_cbm_bits;
> };
>
> struct mba_ctrl {
> u32 max_delay;
> u32 min_bw;
> u32 bw_gran;
> u32 delay_linear;
> u32 *mb_map;
> };
>
> and in then in struct rdt_resource:
>
> <common fields>
> union {
> struct cache_ctrl foo;
> struct mba_ctrl bla;
> } ctrl;
>
>
> That avoids that rdt_resource becomes a hodgepodge of unrelated or even
> contradicting fields.
>
> Hmm?
Ok, makes sense. Will fix. Thought of a union when i had added a couple fields
and given up but its grown a lot now.
Thanks,
Vikas
>
> Thanks,
>
> tglx
>