Sending another version of MBA patch series with changes to V3 version
as per Thomas feedback here:
https://marc.info/?l=linux-kernel&m=149125664024881
Changes:
- Fixed the wrong names in struct document for rdt_domain
- Changed the mba and cache ctrl values to have seperate structs and put
them in a union in rdt_resource structure.
patch applies on tip x86/cpus
[PATCH 1/8] Documentation, x86: Intel Memory bandwidth allocation
[PATCH 2/8] x86/intel_rdt/mba: Generalize the naming to get ready for
[PATCH 3/8] x86/intel_rdt/mba: Memory b/w allocation feature detect
[PATCH 4/8] x86/intel_rct/mba: Add MBA structures and initialize MBA
[PATCH 5/8] x86/intel_rdt: Prep to add info files for MBA
[PATCH 6/8] x86/intel_rdt/mba: Add info directory files for MBA
[PATCH 7/8] x86/intel_rdt: Prep to add schemata file for MBA
[PATCH 8/8] x86/intel_rdt/mba: Add schemata file support for MBA
Lot of data structures and functions are named after cache specific
resources(named after cbm, cache etc). In many cases other non cache
resources may need to share the same data structures/functions.
Generalize such naming to prepare to add more resources like memory
bandwidth.
Signed-off-by: Vikas Shivappa <[email protected]>
---
arch/x86/include/asm/intel_rdt.h | 22 +++++++++++-----------
arch/x86/kernel/cpu/intel_rdt.c | 28 ++++++++++++++--------------
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 16 ++++++++--------
arch/x86/kernel/cpu/intel_rdt_schemata.c | 20 ++++++++++----------
4 files changed, 43 insertions(+), 43 deletions(-)
diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index 3f31399..2add1a7 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -74,7 +74,7 @@ struct rftype {
* @capable: Is this feature available on this machine
* @name: Name to use in "schemata" file
* @num_closid: Number of CLOSIDs available
- * @max_cbm: Largest Cache Bit Mask allowed
+ * @default_ctrl: Specifies default cache cbm or mem b/w percent.
* @data_width: Character width of data when displaying
* @min_cbm_bits: Minimum number of consecutive bits to be set
* in a cache bit mask
@@ -92,7 +92,7 @@ struct rdt_resource {
int num_closid;
int cbm_len;
int min_cbm_bits;
- u32 max_cbm;
+ u32 default_ctrl;
int data_width;
struct list_head domains;
int msr_base;
@@ -106,17 +106,17 @@ struct rdt_resource {
* @list: all instances of this resource
* @id: unique id for this instance
* @cpu_mask: which cpus share this resource
- * @cbm: array of cache bit masks (indexed by CLOSID)
- * @new_cbm: new cbm value to be loaded
- * @have_new_cbm: did user provide new_cbm for this domain
+ * @ctrl_val: array of cache or mem ctrl values (indexed by CLOSID)
+ * @new_ctrl: new ctrl value to be loaded
+ * @have_new_ctrl: did user provide new_ctrl for this domain
*/
struct rdt_domain {
struct list_head list;
int id;
struct cpumask cpu_mask;
- u32 *cbm;
- u32 new_cbm;
- bool have_new_cbm;
+ u32 *ctrl_val;
+ u32 new_ctrl;
+ bool have_new_ctrl;
};
/**
@@ -167,8 +167,8 @@ enum {
unsigned int full;
};
-/* CPUID.(EAX=10H, ECX=ResID=1).EDX */
-union cpuid_0x10_1_edx {
+/* CPUID.(EAX=10H, ECX=ResID).EDX */
+union cpuid_0x10_x_edx {
struct {
unsigned int cos_max:16;
} split;
@@ -177,7 +177,7 @@ enum {
DECLARE_PER_CPU_READ_MOSTLY(int, cpu_closid);
-void rdt_cbm_update(void *arg);
+void rdt_ctrl_update(void *arg);
struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn);
void rdtgroup_kn_unlock(struct kernfs_node *kn);
ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 70a3307..6507e93 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -125,7 +125,7 @@ static inline bool cache_alloc_hsw_probe(void)
r->num_closid = 4;
r->cbm_len = 20;
- r->max_cbm = max_cbm;
+ r->default_ctrl = max_cbm;
r->min_cbm_bits = 2;
r->capable = true;
r->enabled = true;
@@ -136,16 +136,16 @@ static inline bool cache_alloc_hsw_probe(void)
return false;
}
-static void rdt_get_config(int idx, struct rdt_resource *r)
+static void rdt_get_cache_config(int idx, struct rdt_resource *r)
{
union cpuid_0x10_1_eax eax;
- union cpuid_0x10_1_edx edx;
+ union cpuid_0x10_x_edx edx;
u32 ebx, ecx;
cpuid_count(0x00000010, idx, &eax.full, &ebx, &ecx, &edx.full);
r->num_closid = edx.split.cos_max + 1;
r->cbm_len = eax.split.cbm_len + 1;
- r->max_cbm = BIT_MASK(eax.split.cbm_len + 1) - 1;
+ r->default_ctrl = BIT_MASK(eax.split.cbm_len + 1) - 1;
r->data_width = (r->cbm_len + 3) / 4;
r->capable = true;
r->enabled = true;
@@ -158,7 +158,7 @@ static void rdt_get_cdp_l3_config(int type)
r->num_closid = r_l3->num_closid / 2;
r->cbm_len = r_l3->cbm_len;
- r->max_cbm = r_l3->max_cbm;
+ r->default_ctrl = r_l3->default_ctrl;
r->data_width = (r->cbm_len + 3) / 4;
r->capable = true;
/*
@@ -199,7 +199,7 @@ static inline bool get_rdt_resources(void)
return false;
if (boot_cpu_has(X86_FEATURE_CAT_L3)) {
- rdt_get_config(1, &rdt_resources_all[RDT_RESOURCE_L3]);
+ rdt_get_cache_config(1, &rdt_resources_all[RDT_RESOURCE_L3]);
if (boot_cpu_has(X86_FEATURE_CDP_L3)) {
rdt_get_cdp_l3_config(RDT_RESOURCE_L3DATA);
rdt_get_cdp_l3_config(RDT_RESOURCE_L3CODE);
@@ -208,7 +208,7 @@ static inline bool get_rdt_resources(void)
}
if (boot_cpu_has(X86_FEATURE_CAT_L2)) {
/* CPUID 0x10.2 fields are same format at 0x10.1 */
- rdt_get_config(2, &rdt_resources_all[RDT_RESOURCE_L2]);
+ rdt_get_cache_config(2, &rdt_resources_all[RDT_RESOURCE_L2]);
ret = true;
}
@@ -230,7 +230,7 @@ static int get_cache_id(int cpu, int level)
return -1;
}
-void rdt_cbm_update(void *arg)
+void rdt_ctrl_update(void *arg)
{
struct msr_param *m = (struct msr_param *)arg;
struct rdt_resource *r = m->res;
@@ -251,7 +251,7 @@ void rdt_cbm_update(void *arg)
for (i = m->low; i < m->high; i++) {
int idx = cbm_idx(r, i);
- wrmsrl(r->msr_base + idx, d->cbm[i]);
+ wrmsrl(r->msr_base + idx, d->ctrl_val[i]);
}
}
@@ -324,8 +324,8 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
d->id = id;
- d->cbm = kmalloc_array(r->num_closid, sizeof(*d->cbm), GFP_KERNEL);
- if (!d->cbm) {
+ d->ctrl_val = kmalloc_array(r->num_closid, sizeof(*d->ctrl_val), GFP_KERNEL);
+ if (!d->ctrl_val) {
kfree(d);
return;
}
@@ -333,8 +333,8 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
for (i = 0; i < r->num_closid; i++) {
int idx = cbm_idx(r, i);
- d->cbm[i] = r->max_cbm;
- wrmsrl(r->msr_base + idx, d->cbm[i]);
+ d->ctrl_val[i] = r->default_ctrl;
+ wrmsrl(r->msr_base + idx, d->ctrl_val[i]);
}
cpumask_set_cpu(cpu, &d->cpu_mask);
@@ -354,7 +354,7 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
cpumask_clear_cpu(cpu, &d->cpu_mask);
if (cpumask_empty(&d->cpu_mask)) {
- kfree(d->cbm);
+ kfree(d->ctrl_val);
list_del(&d->list);
kfree(d);
}
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index c05509d..c4b6972 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -498,12 +498,12 @@ static int rdt_num_closids_show(struct kernfs_open_file *of,
return 0;
}
-static int rdt_cbm_mask_show(struct kernfs_open_file *of,
+static int rdt_default_ctrl_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
struct rdt_resource *r = of->kn->parent->priv;
- seq_printf(seq, "%x\n", r->max_cbm);
+ seq_printf(seq, "%x\n", r->default_ctrl);
return 0;
}
@@ -530,7 +530,7 @@ static int rdt_min_cbm_bits_show(struct kernfs_open_file *of,
.name = "cbm_mask",
.mode = 0444,
.kf_ops = &rdtgroup_kf_single_ops,
- .seq_show = rdt_cbm_mask_show,
+ .seq_show = rdt_default_ctrl_show,
},
{
.name = "min_cbm_bits",
@@ -780,7 +780,7 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type,
return dentry;
}
-static int reset_all_cbms(struct rdt_resource *r)
+static int reset_all_ctrls(struct rdt_resource *r)
{
struct msr_param msr_param;
cpumask_var_t cpu_mask;
@@ -803,14 +803,14 @@ static int reset_all_cbms(struct rdt_resource *r)
cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
for (i = 0; i < r->num_closid; i++)
- d->cbm[i] = r->max_cbm;
+ d->ctrl_val[i] = r->default_ctrl;
}
cpu = get_cpu();
/* Update CBM on this cpu if it's in cpu_mask. */
if (cpumask_test_cpu(cpu, cpu_mask))
- rdt_cbm_update(&msr_param);
+ rdt_ctrl_update(&msr_param);
/* Update CBM on all other cpus in cpu_mask. */
- smp_call_function_many(cpu_mask, rdt_cbm_update, &msr_param, 1);
+ smp_call_function_many(cpu_mask, rdt_ctrl_update, &msr_param, 1);
put_cpu();
free_cpumask_var(cpu_mask);
@@ -896,7 +896,7 @@ static void rdt_kill_sb(struct super_block *sb)
/*Put everything back to default values. */
for_each_enabled_rdt_resource(r)
- reset_all_cbms(r);
+ reset_all_ctrls(r);
cdp_disable();
rmdir_all_sub();
static_branch_disable(&rdt_enable_key);
diff --git a/arch/x86/kernel/cpu/intel_rdt_schemata.c b/arch/x86/kernel/cpu/intel_rdt_schemata.c
index 8594db4..7695179 100644
--- a/arch/x86/kernel/cpu/intel_rdt_schemata.c
+++ b/arch/x86/kernel/cpu/intel_rdt_schemata.c
@@ -38,7 +38,7 @@ static bool cbm_validate(unsigned long var, struct rdt_resource *r)
{
unsigned long first_bit, zero_bit;
- if (var == 0 || var > r->max_cbm)
+ if (var == 0 || var > r->default_ctrl)
return false;
first_bit = find_first_bit(&var, r->cbm_len);
@@ -61,7 +61,7 @@ static int parse_cbm(char *buf, struct rdt_resource *r, struct rdt_domain *d)
unsigned long data;
int ret;
- if (d->have_new_cbm)
+ if (d->have_new_ctrl)
return -EINVAL;
ret = kstrtoul(buf, 16, &data);
@@ -69,8 +69,8 @@ static int parse_cbm(char *buf, struct rdt_resource *r, struct rdt_domain *d)
return ret;
if (!cbm_validate(data, r))
return -EINVAL;
- d->new_cbm = data;
- d->have_new_cbm = true;
+ d->new_ctrl = data;
+ d->have_new_ctrl = true;
return 0;
}
@@ -119,9 +119,9 @@ static int update_domains(struct rdt_resource *r, int closid)
msr_param.res = r;
list_for_each_entry(d, &r->domains, list) {
- if (d->have_new_cbm && d->new_cbm != d->cbm[closid]) {
+ if (d->have_new_ctrl && d->new_ctrl != d->ctrl_val[closid]) {
cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
- d->cbm[closid] = d->new_cbm;
+ d->ctrl_val[closid] = d->new_ctrl;
}
}
if (cpumask_empty(cpu_mask))
@@ -129,9 +129,9 @@ static int update_domains(struct rdt_resource *r, int closid)
cpu = get_cpu();
/* Update CBM on this cpu if it's in cpu_mask. */
if (cpumask_test_cpu(cpu, cpu_mask))
- rdt_cbm_update(&msr_param);
+ rdt_ctrl_update(&msr_param);
/* Update CBM on other cpus. */
- smp_call_function_many(cpu_mask, rdt_cbm_update, &msr_param, 1);
+ smp_call_function_many(cpu_mask, rdt_ctrl_update, &msr_param, 1);
put_cpu();
done:
@@ -164,7 +164,7 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
for_each_enabled_rdt_resource(r)
list_for_each_entry(dom, &r->domains, list)
- dom->have_new_cbm = false;
+ dom->have_new_ctrl = false;
while ((tok = strsep(&buf, "\n")) != NULL) {
resname = strsep(&tok, ":");
@@ -208,7 +208,7 @@ static void show_doms(struct seq_file *s, struct rdt_resource *r, int closid)
if (sep)
seq_puts(s, ";");
seq_printf(s, "%d=%0*x", dom->id, max_data_width,
- dom->cbm[closid]);
+ dom->ctrl_val[closid]);
sep = true;
}
seq_puts(s, "\n");
--
1.9.1
Update the 'intel_rdt_ui' documentation to have Memory bandwidth(b/w)
allocation interface usage.
Signed-off-by: Vikas Shivappa <[email protected]>
---
Documentation/x86/intel_rdt_ui.txt | 107 ++++++++++++++++++++++++++++++-------
1 file changed, 87 insertions(+), 20 deletions(-)
diff --git a/Documentation/x86/intel_rdt_ui.txt b/Documentation/x86/intel_rdt_ui.txt
index 3ea1984..f223a3b 100644
--- a/Documentation/x86/intel_rdt_ui.txt
+++ b/Documentation/x86/intel_rdt_ui.txt
@@ -4,6 +4,7 @@ Copyright (C) 2016 Intel Corporation
Fenghua Yu <[email protected]>
Tony Luck <[email protected]>
+Vikas Shivappa <[email protected]>
This feature is enabled by the CONFIG_INTEL_RDT_A Kconfig and the
X86 /proc/cpuinfo flag bits "rdt", "cat_l3" and "cdp_l3".
@@ -22,19 +23,34 @@ Info directory
The 'info' directory contains information about the enabled
resources. Each resource has its own subdirectory. The subdirectory
-names reflect the resource names. Each subdirectory contains the
-following files:
+names reflect the resource names.
+Cache resource(L3/L2) subdirectory contains the following files:
-"num_closids": The number of CLOSIDs which are valid for this
- resource. The kernel uses the smallest number of
- CLOSIDs of all enabled resources as limit.
+"num_closids": The number of CLOSIDs which are valid for this
+ resource. The kernel uses the smallest number of
+ CLOSIDs of all enabled resources as limit.
-"cbm_mask": The bitmask which is valid for this resource. This
- mask is equivalent to 100%.
+"cbm_mask": The bitmask which is valid for this resource.
+ This mask is equivalent to 100%.
-"min_cbm_bits": The minimum number of consecutive bits which must be
- set when writing a mask.
+"min_cbm_bits": The minimum number of consecutive bits which
+ must be set when writing a mask.
+Memory bandwitdh(MB) subdirectory contains the following files:
+
+"min_bandwidth": The minimum memory bandwidth percentage which
+ user can request.
+
+"bandwidth_gran": The granularity in which the memory bandwidth
+ percentage is allocated. The allocated
+ b/w percentage is rounded off to the next
+ control step available on the hardware. The
+ available bandwidth control steps are:
+ min_bandwidth + N * bandwidth_gran.
+
+"delay_linear": Indicates if the delay scale is linear or
+ non-linear. This field is purely informational
+ only.
Resource groups
---------------
@@ -107,6 +123,22 @@ and 0xA are not. On a system with a 20-bit mask each bit represents 5%
of the capacity of the cache. You could partition the cache into four
equal parts with masks: 0x1f, 0x3e0, 0x7c00, 0xf8000.
+Memory bandwidth(b/w) percentage
+--------------------------------
+For Memory b/w resource, user controls the resource by indicating the
+percentage of total memory b/w.
+
+The minimum bandwidth percentage value for each cpu model is predefined
+and can be looked up through "info/MB/min_bandwidth". The bandwidth
+granularity that is allocated is also dependent on the cpu model and can
+be looked up at "info/MB/bandwidth_gran". The available bandwidth
+control steps are: min_bw + N * bw_gran. Intermediate values are rounded
+to the next control step available on the hardware.
+
+The bandwidth throttling is a core specific mechanism on some of Intel
+SKUs. Using a high bandwidth and a low bandwidth setting on two threads
+sharing a core will result in both threads being throttled to use the
+low bandwidth.
L3 details (code and data prioritization disabled)
--------------------------------------------------
@@ -129,6 +161,13 @@ schemata format is always:
L2:<cache_id0>=<cbm>;<cache_id1>=<cbm>;...
+Memory b/w Allocation details
+-----------------------------
+
+Memory b/w domain is L3 cache.
+
+ MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;...
+
Reading/writing the schemata file
---------------------------------
Reading the schemata file will show the state of all resources
@@ -146,13 +185,14 @@ L3CODE:0=fffff;1=fffff;2=fffff;3=fffff
Example 1
---------
On a two socket machine (one L3 cache per socket) with just four bits
-for cache bit masks
+for cache bit masks, minimum b/w of 10% with a memory bandwidth
+granularity of 10%
# mount -t resctrl resctrl /sys/fs/resctrl
# cd /sys/fs/resctrl
# mkdir p0 p1
-# echo "L3:0=3;1=c" > /sys/fs/resctrl/p0/schemata
-# echo "L3:0=3;1=3" > /sys/fs/resctrl/p1/schemata
+# echo "L3:0=3;1=c\nMB:0=50;1=50" > /sys/fs/resctrl/p0/schemata
+# echo "L3:0=3;1=3\nMB:0=50;1=50" > /sys/fs/resctrl/p1/schemata
The default resource group is unmodified, so we have access to all parts
of all caches (its schemata file reads "L3:0=f;1=f").
@@ -161,6 +201,14 @@ Tasks that are under the control of group "p0" may only allocate from the
"lower" 50% on cache ID 0, and the "upper" 50% of cache ID 1.
Tasks in group "p1" use the "lower" 50% of cache on both sockets.
+Similarly, tasks that are under the control of group "p0" may use a
+maximum memory b/w of 50% on socket0 and 50% on socket 1.
+Tasks in group "p1" may also use 50% memory b/w on both sockets.
+Note that unlike cache masks, memory b/w cannot specify whether these
+allocations can overlap or not. The allocations specifies the maximum
+b/w that the group may be able to use and the system admin can configure
+the b/w accordingly.
+
Example 2
---------
Again two sockets, but this time with a more realistic 20-bit mask.
@@ -174,9 +222,10 @@ of L3 cache on socket 0.
# cd /sys/fs/resctrl
First we reset the schemata for the default group so that the "upper"
-50% of the L3 cache on socket 0 cannot be used by ordinary tasks:
+50% of the L3 cache on socket 0 and 50% of memory b/w cannot be used by
+ordinary tasks:
-# echo "L3:0=3ff;1=fffff" > schemata
+# echo "L3:0=3ff;1=fffff\nMB:0=50;1=100" > schemata
Next we make a resource group for our first real time task and give
it access to the "top" 25% of the cache on socket 0.
@@ -199,6 +248,20 @@ Ditto for the second real time task (with the remaining 25% of cache):
# echo 5678 > p1/tasks
# taskset -cp 2 5678
+For the same 2 socket system with memory b/w resource and CAT L3 the
+schemata would look like(Assume min_bandwidth 10 and bandwidth_gran is
+10):
+
+For our first real time task this would request 20% memory b/w on socket
+0.
+
+# echo -e "L3:0=f8000;1=fffff\nMB:0=20;1=100" > p0/schemata
+
+For our second real time task this would request an other 20% memory b/w
+on socket 0.
+
+# echo -e "L3:0=f8000;1=fffff\nMB:0=20;1=100" > p0/schemata
+
Example 3
---------
@@ -212,18 +275,22 @@ the tasks.
# cd /sys/fs/resctrl
First we reset the schemata for the default group so that the "upper"
-50% of the L3 cache on socket 0 cannot be used by ordinary tasks:
+50% of the L3 cache on socket 0, and 50% of memory bandwidth on socket 0
+cannot be used by ordinary tasks:
-# echo "L3:0=3ff" > schemata
+# echo "L3:0=3ff\nMB:0=50" > schemata
-Next we make a resource group for our real time cores and give
-it access to the "top" 50% of the cache on socket 0.
+Next we make a resource group for our real time cores and give it access
+to the "top" 50% of the cache on socket 0 and 50% of memory bandwidth on
+socket 0.
# mkdir p0
-# echo "L3:0=ffc00;" > p0/schemata
+# echo "L3:0=ffc00\nMB:0=50" > p0/schemata
Finally we move core 4-7 over to the new group and make sure that the
-kernel and the tasks running there get 50% of the cache.
+kernel and the tasks running there get 50% of the cache. They should
+also get 50% of memory bandwidth assuming that the cores 4-7 are SMT
+siblings and only the real time threads are scheduled on the cores 4-7.
# echo C0 > p0/cpus
--
1.9.1
Detect MBA feature if CPUID.(EAX=10H, ECX=0):EBX.L2[bit 3] = 1.
Add supporting data structures to detect feature details which is done
in later patch using CPUID with EAX=10H, ECX= 3.
Signed-off-by: Vikas Shivappa <[email protected]>
---
arch/x86/include/asm/cpufeatures.h | 2 ++
arch/x86/include/asm/intel_rdt.h | 8 ++++++++
arch/x86/kernel/cpu/intel_rdt.c | 3 +++
arch/x86/kernel/cpu/scattered.c | 1 +
4 files changed, 14 insertions(+)
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index b04bb6d..25d7f52 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -201,6 +201,8 @@
#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
+#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
+
/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index 2add1a7..4c94f18 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -167,6 +167,14 @@ enum {
unsigned int full;
};
+/* CPUID.(EAX=10H, ECX=ResID=3).EAX */
+union cpuid_0x10_3_eax {
+ struct {
+ unsigned int max_delay:12;
+ } split;
+ unsigned int full;
+};
+
/* CPUID.(EAX=10H, ECX=ResID).EDX */
union cpuid_0x10_x_edx {
struct {
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 6507e93..c4cf2e8 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -212,6 +212,9 @@ static inline bool get_rdt_resources(void)
ret = true;
}
+ if (boot_cpu_has(X86_FEATURE_MBA))
+ ret = true;
+
rdt_init_padding();
return ret;
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index d979406..23c2350 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -27,6 +27,7 @@ struct cpuid_bit {
{ X86_FEATURE_CAT_L3, CPUID_EBX, 1, 0x00000010, 0 },
{ X86_FEATURE_CAT_L2, CPUID_EBX, 2, 0x00000010, 0 },
{ X86_FEATURE_CDP_L3, CPUID_ECX, 2, 0x00000010, 1 },
+ { X86_FEATURE_MBA, CPUID_EBX, 3, 0x00000010, 0 },
{ X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 },
{ X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 },
{ X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 },
--
1.9.1
The MBA feature details like minimum bandwidth supported, b/w
granularity etc are obtained via executing CPUID with EAX=10H
,ECX=3.
Setup and initialize the MBA specific extensions to data structures like
global list of RDT resources, RDT resource structure and RDT domain
structure.
Signed-off-by: Vikas Shivappa <[email protected]>
---
arch/x86/include/asm/intel_rdt.h | 92 +++++++++++++++---------
arch/x86/kernel/cpu/intel_rdt.c | 151 ++++++++++++++++++++++++++++++++++++---
2 files changed, 199 insertions(+), 44 deletions(-)
diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index 4c94f18..097134b 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -12,6 +12,7 @@
#define IA32_L3_QOS_CFG 0xc81
#define IA32_L3_CBM_BASE 0xc90
#define IA32_L2_CBM_BASE 0xd10
+#define IA32_MBA_THRTL_BASE 0xd50
#define L3_QOS_CDP_ENABLE 0x01ULL
@@ -69,39 +70,6 @@ struct rftype {
};
/**
- * struct rdt_resource - attributes of an RDT resource
- * @enabled: Is this feature enabled on this machine
- * @capable: Is this feature available on this machine
- * @name: Name to use in "schemata" file
- * @num_closid: Number of CLOSIDs available
- * @default_ctrl: Specifies default cache cbm or mem b/w percent.
- * @data_width: Character width of data when displaying
- * @min_cbm_bits: Minimum number of consecutive bits to be set
- * in a cache bit mask
- * @domains: All domains for this resource
- * @msr_base: Base MSR address for CBMs
- * @cache_level: Which cache level defines scope of this domain
- * @cbm_idx_multi: Multiplier of CBM index
- * @cbm_idx_offset: Offset of CBM index. CBM index is computed by:
- * closid * cbm_idx_multi + cbm_idx_offset
- */
-struct rdt_resource {
- bool enabled;
- bool capable;
- char *name;
- int num_closid;
- int cbm_len;
- int min_cbm_bits;
- u32 default_ctrl;
- int data_width;
- struct list_head domains;
- int msr_base;
- int cache_level;
- int cbm_idx_multi;
- int cbm_idx_offset;
-};
-
-/**
* struct rdt_domain - group of cpus sharing an RDT resource
* @list: all instances of this resource
* @id: unique id for this instance
@@ -131,6 +99,63 @@ struct msr_param {
int high;
};
+/**
+ * struct rdt_resource - attributes of an RDT resource
+ * @enabled: Is this feature enabled on this machine
+ * @capable: Is this feature available on this machine
+ * @name: Name to use in "schemata" file
+ * @num_closid: Number of CLOSIDs available
+ * @default_ctrl: Specifies default cache cbm or mem b/w percent.
+ * @data_width: Character width of data when displaying
+ * @msr_update: Function pointer to update QOS MSRs
+ * @domains: All domains for this resource
+ * @msr_base: Base MSR address for CBMs
+ * @cache_level: Which cache level defines scope of this domain
+ * @cbm_idx_multi: Multiplier of CBM index
+ * @cbm_idx_offset: Offset of CBM index. CBM index is computed by:
+ * closid * cbm_idx_multi + cbm_idx_offset
+ * @cbm_len Number of cbm bits
+ * @min_cbm_bits: Minimum number of consecutive bits to be set
+ * in a cache bit mask
+ * @max_delay: Max throttle delay. Delay is the hardware
+ * understandable value for memory bandwidth.
+ * @min_bw: Minimum memory bandwidth percentage user
+ * can request
+ * @bw_gran: Granularity at which the memory bandwidth
+ * is allocated
+ * @delay_linear: True if memory b/w delay is in linear scale
+ * @mb_map: Mapping of memory b/w percentage to
+ * memory b/w delay values
+ */
+struct rdt_resource {
+ bool enabled;
+ bool capable;
+ char *name;
+ int num_closid;
+ u32 default_ctrl;
+ int data_width;
+ void (*msr_update) (struct rdt_domain *d, struct msr_param *m,
+ struct rdt_resource *r);
+ struct list_head domains;
+ int msr_base;
+ int cache_level;
+ int cbm_idx_multi;
+ int cbm_idx_offset;
+ union {
+ struct { /*cache ctrls*/
+ int cbm_len;
+ int min_cbm_bits;
+ };
+ struct { /*mem ctrls*/
+ u32 max_delay;
+ u32 min_bw;
+ u32 bw_gran;
+ u32 delay_linear;
+ u32 *mb_map;
+ };
+ };
+};
+
extern struct mutex rdtgroup_mutex;
extern struct rdt_resource rdt_resources_all[];
@@ -144,6 +169,7 @@ enum {
RDT_RESOURCE_L3DATA,
RDT_RESOURCE_L3CODE,
RDT_RESOURCE_L2,
+ RDT_RESOURCE_MBA,
/* Must be the last */
RDT_NUM_RESOURCES,
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index c4cf2e8..b9f1cfc 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -32,6 +32,9 @@
#include <asm/intel-family.h>
#include <asm/intel_rdt.h>
+#define MAX_MBA_BW 100u
+#define MBA_IS_LINEAR 0x4
+
/* Mutex to protect rdtgroup access. */
DEFINE_MUTEX(rdtgroup_mutex);
@@ -45,11 +48,17 @@
*/
int max_name_width, max_data_width;
+static void
+mba_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r);
+static void
+cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r);
+
struct rdt_resource rdt_resources_all[] = {
{
.name = "L3",
.domains = domain_init(RDT_RESOURCE_L3),
.msr_base = IA32_L3_CBM_BASE,
+ .msr_update = cat_wrmsr,
.min_cbm_bits = 1,
.cache_level = 3,
.cbm_idx_multi = 1,
@@ -59,6 +68,7 @@ struct rdt_resource rdt_resources_all[] = {
.name = "L3DATA",
.domains = domain_init(RDT_RESOURCE_L3DATA),
.msr_base = IA32_L3_CBM_BASE,
+ .msr_update = cat_wrmsr,
.min_cbm_bits = 1,
.cache_level = 3,
.cbm_idx_multi = 2,
@@ -68,6 +78,7 @@ struct rdt_resource rdt_resources_all[] = {
.name = "L3CODE",
.domains = domain_init(RDT_RESOURCE_L3CODE),
.msr_base = IA32_L3_CBM_BASE,
+ .msr_update = cat_wrmsr,
.min_cbm_bits = 1,
.cache_level = 3,
.cbm_idx_multi = 2,
@@ -77,11 +88,21 @@ struct rdt_resource rdt_resources_all[] = {
.name = "L2",
.domains = domain_init(RDT_RESOURCE_L2),
.msr_base = IA32_L2_CBM_BASE,
+ .msr_update = cat_wrmsr,
.min_cbm_bits = 1,
.cache_level = 2,
.cbm_idx_multi = 1,
.cbm_idx_offset = 0
},
+ {
+ .name = "MB",
+ .domains = domain_init(RDT_RESOURCE_MBA),
+ .msr_base = IA32_MBA_THRTL_BASE,
+ .msr_update = mba_wrmsr,
+ .cache_level = 3,
+ .cbm_idx_multi = 1,
+ .cbm_idx_offset = 0
+ },
};
static int cbm_idx(struct rdt_resource *r, int closid)
@@ -136,6 +157,53 @@ static inline bool cache_alloc_hsw_probe(void)
return false;
}
+/*
+ * rdt_get_mb_table() - get a mapping of bandwidth(b/w) percentage values
+ * exposed to user interface and the h/w understandable delay values.
+ *
+ * The non-linear delay values have the granularity of power of two
+ * and also the h/w does not guarantee a curve for configured delay
+ * values vs. actual b/w enforced.
+ * Hence we need a mapping that is pre calibrated so the user can
+ * express the memory b/w as a percentage value.
+ */
+static inline bool rdt_get_mb_table(struct rdt_resource *r)
+{
+ /*
+ * There are no Intel SKUs as of now to support non-linear delay.
+ */
+ pr_info("MBA b/w map not implemented for cpu:%d, model:%d",
+ boot_cpu_data.x86, boot_cpu_data.x86_model);
+
+ return false;
+}
+
+static bool rdt_get_mem_config(struct rdt_resource *r)
+{
+ union cpuid_0x10_3_eax eax;
+ union cpuid_0x10_x_edx edx;
+ u32 ebx, ecx;
+
+ cpuid_count(0x00000010, 3, &eax.full, &ebx, &ecx, &edx.full);
+ r->num_closid = edx.split.cos_max + 1;
+ r->max_delay = eax.split.max_delay + 1;
+ r->default_ctrl = MAX_MBA_BW;
+ if (ecx & MBA_IS_LINEAR) {
+ r->delay_linear = true;
+ r->min_bw = MAX_MBA_BW - r->max_delay;
+ r->bw_gran = MAX_MBA_BW - r->max_delay;
+ } else {
+ if (!rdt_get_mb_table(r))
+ return false;
+ }
+ r->data_width = 3;
+
+ r->capable = true;
+ r->enabled = true;
+
+ return true;
+}
+
static void rdt_get_cache_config(int idx, struct rdt_resource *r)
{
union cpuid_0x10_1_eax eax;
@@ -212,7 +280,8 @@ static inline bool get_rdt_resources(void)
ret = true;
}
- if (boot_cpu_has(X86_FEATURE_MBA))
+ if (boot_cpu_has(X86_FEATURE_MBA) &&
+ rdt_get_mem_config(&rdt_resources_all[RDT_RESOURCE_MBA]))
ret = true;
rdt_init_padding();
@@ -233,6 +302,47 @@ static int get_cache_id(int cpu, int level)
return -1;
}
+/*
+ * Map the memory b/w percentage value to delay values
+ * that can be written to QOS_MSRs.
+ * There are currently no SKUs which support non linear delay values.
+ */
+static u32 delay_bw_map(unsigned long bw, struct rdt_resource *r)
+{
+ if (r->delay_linear)
+ return MAX_MBA_BW - bw;
+
+ WARN_ONCE(1, "Non Linear delay-bw map not supported but queried\n");
+ return r->default_ctrl;
+}
+
+static void
+mba_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r)
+{
+ int i;
+
+ for (i = m->low; i < m->high; i++) {
+ int idx = cbm_idx(r, i);
+
+ /*
+ * Write the delay value for mba.
+ */
+ wrmsrl(r->msr_base + idx, delay_bw_map(d->ctrl_val[i], r));
+ }
+}
+
+static void
+cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r)
+{
+ int i;
+
+ for (i = m->low; i < m->high; i++) {
+ int idx = cbm_idx(r, i);
+
+ wrmsrl(r->msr_base + idx, d->ctrl_val[i]);
+ }
+}
+
void rdt_ctrl_update(void *arg)
{
struct msr_param *m = (struct msr_param *)arg;
@@ -291,6 +401,33 @@ static struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
return NULL;
}
+static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d)
+{
+ struct msr_param m;
+ u32 *dc;
+ int i;
+
+ dc = kmalloc_array(r->num_closid, sizeof(*d->ctrl_val), GFP_KERNEL);
+ if (!dc)
+ return -ENOMEM;
+
+ d->ctrl_val = dc;
+
+ /*
+ * Initialize the Control MSRs to having no control.
+ * For Cache Allocation: Set all bits in cbm
+ * For Memory Allocation: Set b/w requested to 100
+ */
+ for (i = 0; i < r->num_closid; i++, dc++)
+ *dc = r->default_ctrl;
+
+ m.low = 0;
+ m.high = r->num_closid;
+ r->msr_update(d, &m, r);
+
+ return 0;
+}
+
/*
* domain_add_cpu - Add a cpu to a resource's domain list.
*
@@ -306,7 +443,7 @@ static struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
*/
static void domain_add_cpu(int cpu, struct rdt_resource *r)
{
- int i, id = get_cache_id(cpu, r->cache_level);
+ int id = get_cache_id(cpu, r->cache_level);
struct list_head *add_pos = NULL;
struct rdt_domain *d;
@@ -327,19 +464,11 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
d->id = id;
- d->ctrl_val = kmalloc_array(r->num_closid, sizeof(*d->ctrl_val), GFP_KERNEL);
- if (!d->ctrl_val) {
+ if (domain_setup_ctrlval(r, d)) {
kfree(d);
return;
}
- for (i = 0; i < r->num_closid; i++) {
- int idx = cbm_idx(r, i);
-
- d->ctrl_val[i] = r->default_ctrl;
- wrmsrl(r->msr_base + idx, d->ctrl_val[i]);
- }
-
cpumask_set_cpu(cpu, &d->cpu_mask);
list_add_tail(&d->list, add_pos);
}
--
1.9.1
Each RDT resource has a separate directory with in the
"/sys/fs/resctrl/info" which has files to display information about the
resource. Currently this list of files is a static list of files
specific to cache resource.
As a preparatory patch to add MBA info files, extend this
implementation support dynamic list of files.
Signed-off-by: Vikas Shivappa <[email protected]>
---
arch/x86/include/asm/intel_rdt.h | 6 ++++++
arch/x86/kernel/cpu/intel_rdt.c | 1 +
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 18 ++++++++++++++----
3 files changed, 21 insertions(+), 4 deletions(-)
diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index 097134b..c030637 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -108,6 +108,8 @@ struct msr_param {
* @default_ctrl: Specifies default cache cbm or mem b/w percent.
* @data_width: Character width of data when displaying
* @msr_update: Function pointer to update QOS MSRs
+ * @info_files: resctrl info files for the resource
+ * @nr_info_files: Number of info files
* @domains: All domains for this resource
* @msr_base: Base MSR address for CBMs
* @cache_level: Which cache level defines scope of this domain
@@ -136,6 +138,8 @@ struct rdt_resource {
int data_width;
void (*msr_update) (struct rdt_domain *d, struct msr_param *m,
struct rdt_resource *r);
+ struct rftype *info_files;
+ int nr_info_files;
struct list_head domains;
int msr_base;
int cache_level;
@@ -156,6 +160,8 @@ struct rdt_resource {
};
};
+void rdt_get_cache_infofile(struct rdt_resource *r);
+
extern struct mutex rdtgroup_mutex;
extern struct rdt_resource rdt_resources_all[];
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index b9f1cfc..86142cc 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -215,6 +215,7 @@ static void rdt_get_cache_config(int idx, struct rdt_resource *r)
r->cbm_len = eax.split.cbm_len + 1;
r->default_ctrl = BIT_MASK(eax.split.cbm_len + 1) - 1;
r->data_width = (r->cbm_len + 3) / 4;
+ rdt_get_cache_infofile(r);
r->capable = true;
r->enabled = true;
}
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index c4b6972..a3a0faa 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -519,7 +519,7 @@ static int rdt_min_cbm_bits_show(struct kernfs_open_file *of,
}
/* rdtgroup information files for one cache resource. */
-static struct rftype res_info_files[] = {
+static struct rftype res_cache_info_files[] = {
{
.name = "num_closids",
.mode = 0444,
@@ -540,11 +540,18 @@ static int rdt_min_cbm_bits_show(struct kernfs_open_file *of,
},
};
+void rdt_get_cache_infofile(struct rdt_resource *r)
+{
+ r->info_files = res_cache_info_files;
+ r->nr_info_files = ARRAY_SIZE(res_cache_info_files);
+}
+
static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
{
struct kernfs_node *kn_subdir;
+ struct rftype *res_info_files;
struct rdt_resource *r;
- int ret;
+ int ret, len;
/* create the directory */
kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL);
@@ -563,8 +570,11 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
ret = rdtgroup_kn_set_ugid(kn_subdir);
if (ret)
goto out_destroy;
- ret = rdtgroup_add_files(kn_subdir, res_info_files,
- ARRAY_SIZE(res_info_files));
+
+ res_info_files = r->info_files;
+ len = r->nr_info_files;
+
+ ret = rdtgroup_add_files(kn_subdir, res_info_files, len);
if (ret)
goto out_destroy;
kernfs_activate(kn_subdir);
--
1.9.1
The files in the info directory for MBA are as follows:
num_closids
The maximum number of CLOSids available for MBA
min_bandwidth
The minimum memory bandwidth percentage value
bandwidth_gran
The granularity of the bandwidth control in percent for the
particular CPU SKU. Intermediate values entered are rounded off
to the previous control step available. Available bandwidth
control steps are minimum_bandwidth + N * bandwidth_gran.
delay_linear
When set, the OS writes a linear percentage based value to the
control MSRs ranging from minimum_bandwidth to 100 percent.
This value is informational and has no influence on the values
written to the schemata files. The values written to the
schemata are always bandwidth percentage that is requested.
Signed-off-by: Vikas Shivappa <[email protected]>
---
arch/x86/include/asm/intel_rdt.h | 1 +
arch/x86/kernel/cpu/intel_rdt.c | 1 +
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 64 ++++++++++++++++++++++++++++++++
3 files changed, 66 insertions(+)
diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index c030637..32de56a 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -161,6 +161,7 @@ struct rdt_resource {
};
void rdt_get_cache_infofile(struct rdt_resource *r);
+void rdt_get_mba_infofile(struct rdt_resource *r);
extern struct mutex rdtgroup_mutex;
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 86142cc..a4f56d9 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -197,6 +197,7 @@ static bool rdt_get_mem_config(struct rdt_resource *r)
return false;
}
r->data_width = 3;
+ rdt_get_mba_infofile(r);
r->capable = true;
r->enabled = true;
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index a3a0faa..d97ffc1 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -518,6 +518,36 @@ static int rdt_min_cbm_bits_show(struct kernfs_open_file *of,
return 0;
}
+static int rdt_min_bw_show(struct kernfs_open_file *of,
+ struct seq_file *seq, void *v)
+{
+ struct rdt_resource *r = of->kn->parent->priv;
+
+ seq_printf(seq, "%d\n", r->min_bw);
+
+ return 0;
+}
+
+static int rdt_bw_gran_show(struct kernfs_open_file *of,
+ struct seq_file *seq, void *v)
+{
+ struct rdt_resource *r = of->kn->parent->priv;
+
+ seq_printf(seq, "%d\n", r->bw_gran);
+
+ return 0;
+}
+
+static int rdt_delay_linear_show(struct kernfs_open_file *of,
+ struct seq_file *seq, void *v)
+{
+ struct rdt_resource *r = of->kn->parent->priv;
+
+ seq_printf(seq, "%d\n", r->delay_linear);
+
+ return 0;
+}
+
/* rdtgroup information files for one cache resource. */
static struct rftype res_cache_info_files[] = {
{
@@ -540,6 +570,40 @@ static int rdt_min_cbm_bits_show(struct kernfs_open_file *of,
},
};
+/* rdtgroup information files for memory bandwidth. */
+static struct rftype res_mba_info_files[] = {
+ {
+ .name = "num_closids",
+ .mode = 0444,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = rdt_num_closids_show,
+ },
+ {
+ .name = "min_bandwidth",
+ .mode = 0444,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = rdt_min_bw_show,
+ },
+ {
+ .name = "bandwidth_gran",
+ .mode = 0444,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = rdt_bw_gran_show,
+ },
+ {
+ .name = "delay_linear",
+ .mode = 0444,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = rdt_delay_linear_show,
+ },
+};
+
+void rdt_get_mba_infofile(struct rdt_resource *r)
+{
+ r->info_files = res_mba_info_files;
+ r->nr_info_files = ARRAY_SIZE(res_mba_info_files);
+}
+
void rdt_get_cache_infofile(struct rdt_resource *r)
{
r->info_files = res_cache_info_files;
--
1.9.1
User updates RDT resource controls by updating the schemata file. OS
then parses these updates and updates the corresponding QOS_MSRs.
However the parsing and MSR update functions are specific to cache
resource type and do not support memory resource type.
Define resource specific function pointers to dynamically choose such
functions in order to prepare to add MBA support.
Signed-off-by: Vikas Shivappa <[email protected]>
---
arch/x86/include/asm/intel_rdt.h | 7 +++++++
arch/x86/kernel/cpu/intel_rdt.c | 16 ++++++++++------
arch/x86/kernel/cpu/intel_rdt_schemata.c | 23 +++++++++++++----------
3 files changed, 30 insertions(+), 16 deletions(-)
diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index 32de56a..63fa034 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -107,6 +107,9 @@ struct msr_param {
* @num_closid: Number of CLOSIDs available
* @default_ctrl: Specifies default cache cbm or mem b/w percent.
* @data_width: Character width of data when displaying
+ * @format_str: Per resource format string to show domain value
+ * @parse_ctrlval: Per resource function pointer to parse
+ * the ctrl values
* @msr_update: Function pointer to update QOS MSRs
* @info_files: resctrl info files for the resource
* @nr_info_files: Number of info files
@@ -136,6 +139,9 @@ struct rdt_resource {
int num_closid;
u32 default_ctrl;
int data_width;
+ const char *format_str;
+ int (*parse_ctrlval) (char *buf, struct rdt_resource *r,
+ struct rdt_domain *d);
void (*msr_update) (struct rdt_domain *d, struct msr_param *m,
struct rdt_resource *r);
struct rftype *info_files;
@@ -162,6 +168,7 @@ struct rdt_resource {
void rdt_get_cache_infofile(struct rdt_resource *r);
void rdt_get_mba_infofile(struct rdt_resource *r);
+int parse_cbm(char *buf, struct rdt_resource *r, struct rdt_domain *d);
extern struct mutex rdtgroup_mutex;
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index a4f56d9..695870a 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -58,7 +58,9 @@ struct rdt_resource rdt_resources_all[] = {
.name = "L3",
.domains = domain_init(RDT_RESOURCE_L3),
.msr_base = IA32_L3_CBM_BASE,
+ .parse_ctrlval = parse_cbm,
.msr_update = cat_wrmsr,
+ .format_str = "%d=%0*x",
.min_cbm_bits = 1,
.cache_level = 3,
.cbm_idx_multi = 1,
@@ -68,7 +70,9 @@ struct rdt_resource rdt_resources_all[] = {
.name = "L3DATA",
.domains = domain_init(RDT_RESOURCE_L3DATA),
.msr_base = IA32_L3_CBM_BASE,
+ .parse_ctrlval = parse_cbm,
.msr_update = cat_wrmsr,
+ .format_str = "%d=%0*x",
.min_cbm_bits = 1,
.cache_level = 3,
.cbm_idx_multi = 2,
@@ -78,7 +82,9 @@ struct rdt_resource rdt_resources_all[] = {
.name = "L3CODE",
.domains = domain_init(RDT_RESOURCE_L3CODE),
.msr_base = IA32_L3_CBM_BASE,
+ .parse_ctrlval = parse_cbm,
.msr_update = cat_wrmsr,
+ .format_str = "%d=%0*x",
.min_cbm_bits = 1,
.cache_level = 3,
.cbm_idx_multi = 2,
@@ -88,7 +94,9 @@ struct rdt_resource rdt_resources_all[] = {
.name = "L2",
.domains = domain_init(RDT_RESOURCE_L2),
.msr_base = IA32_L2_CBM_BASE,
+ .parse_ctrlval = parse_cbm,
.msr_update = cat_wrmsr,
+ .format_str = "%d=%0*x",
.min_cbm_bits = 1,
.cache_level = 2,
.cbm_idx_multi = 1,
@@ -349,7 +357,7 @@ void rdt_ctrl_update(void *arg)
{
struct msr_param *m = (struct msr_param *)arg;
struct rdt_resource *r = m->res;
- int i, cpu = smp_processor_id();
+ int cpu = smp_processor_id();
struct rdt_domain *d;
list_for_each_entry(d, &r->domains, list) {
@@ -363,11 +371,7 @@ void rdt_ctrl_update(void *arg)
return;
found:
- for (i = m->low; i < m->high; i++) {
- int idx = cbm_idx(r, i);
-
- wrmsrl(r->msr_base + idx, d->ctrl_val[i]);
- }
+ r->msr_update(d, m, r);
}
/*
diff --git a/arch/x86/kernel/cpu/intel_rdt_schemata.c b/arch/x86/kernel/cpu/intel_rdt_schemata.c
index 7695179..03f9a70 100644
--- a/arch/x86/kernel/cpu/intel_rdt_schemata.c
+++ b/arch/x86/kernel/cpu/intel_rdt_schemata.c
@@ -34,9 +34,14 @@
* are allowed (e.g. FFFFH, 0FF0H, 003CH, etc.).
* Additionally Haswell requires at least two bits set.
*/
-static bool cbm_validate(unsigned long var, struct rdt_resource *r)
+static bool cbm_validate(char *buf, unsigned long *data, struct rdt_resource *r)
{
- unsigned long first_bit, zero_bit;
+ unsigned long first_bit, zero_bit, var;
+ int ret;
+
+ ret = kstrtoul(buf, 16, &var);
+ if (ret)
+ return false;
if (var == 0 || var > r->default_ctrl)
return false;
@@ -49,6 +54,8 @@ static bool cbm_validate(unsigned long var, struct rdt_resource *r)
if ((zero_bit - first_bit) < r->min_cbm_bits)
return false;
+
+ *data = var;
return true;
}
@@ -56,18 +63,14 @@ static bool cbm_validate(unsigned long var, struct rdt_resource *r)
* Read one cache bit mask (hex). Check that it is valid for the current
* resource type.
*/
-static int parse_cbm(char *buf, struct rdt_resource *r, struct rdt_domain *d)
+int parse_cbm(char *buf, struct rdt_resource *r, struct rdt_domain *d)
{
unsigned long data;
- int ret;
if (d->have_new_ctrl)
return -EINVAL;
- ret = kstrtoul(buf, 16, &data);
- if (ret)
- return ret;
- if (!cbm_validate(data, r))
+ if(!cbm_validate(buf, &data, r))
return -EINVAL;
d->new_ctrl = data;
d->have_new_ctrl = true;
@@ -96,7 +99,7 @@ static int parse_line(char *line, struct rdt_resource *r)
return -EINVAL;
list_for_each_entry(d, &r->domains, list) {
if (d->id == dom_id) {
- if (parse_cbm(dom, r, d))
+ if (r->parse_ctrlval(dom, r, d))
return -EINVAL;
goto next;
}
@@ -207,7 +210,7 @@ static void show_doms(struct seq_file *s, struct rdt_resource *r, int closid)
list_for_each_entry(dom, &r->domains, list) {
if (sep)
seq_puts(s, ";");
- seq_printf(s, "%d=%0*x", dom->id, max_data_width,
+ seq_printf(s, r->format_str, dom->id, max_data_width,
dom->ctrl_val[closid]);
sep = true;
}
--
1.9.1
Add support to update the MBA bandwidth values for the domains. The MBA
bandwidth values are specified by updating the schemata.
We do the following to parse the bandwidth(bw) value from
schemata and update the PQOS_MSRS:
1. check the bw to satisfy the minimum and max bandwidth requirements.
2. To meet the granularity requirement intermediate values are rounded
to the next control step available on the hardware.
3. map the bw to delay values and write the delay values to
corresponding domain PQOS_MSRs which are indexed from 0xD50. For
linear scale the delay value = 100 - bw. Currently no Intel SKUs
support non-linear delay values.
Signed-off-by: Vikas Shivappa <[email protected]>
---
arch/x86/include/asm/intel_rdt.h | 1 +
arch/x86/kernel/cpu/intel_rdt.c | 2 ++
arch/x86/kernel/cpu/intel_rdt_schemata.c | 46 ++++++++++++++++++++++++++++++++
3 files changed, 49 insertions(+)
diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index 63fa034..32fbb28 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -169,6 +169,7 @@ struct rdt_resource {
void rdt_get_cache_infofile(struct rdt_resource *r);
void rdt_get_mba_infofile(struct rdt_resource *r);
int parse_cbm(char *buf, struct rdt_resource *r, struct rdt_domain *d);
+int parse_bw(char *buf, struct rdt_resource *r, struct rdt_domain *d);
extern struct mutex rdtgroup_mutex;
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 695870a..fbbe0c7 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -106,7 +106,9 @@ struct rdt_resource rdt_resources_all[] = {
.name = "MB",
.domains = domain_init(RDT_RESOURCE_MBA),
.msr_base = IA32_MBA_THRTL_BASE,
+ .parse_ctrlval = parse_bw,
.msr_update = mba_wrmsr,
+ .format_str = "%d=%*d",
.cache_level = 3,
.cbm_idx_multi = 1,
.cbm_idx_offset = 0
diff --git a/arch/x86/kernel/cpu/intel_rdt_schemata.c b/arch/x86/kernel/cpu/intel_rdt_schemata.c
index 03f9a70..9154695 100644
--- a/arch/x86/kernel/cpu/intel_rdt_schemata.c
+++ b/arch/x86/kernel/cpu/intel_rdt_schemata.c
@@ -29,6 +29,52 @@
#include <asm/intel_rdt.h>
/*
+ * Check whether MBA bandwidth percentage value is correct.
+ * The value is checked against the minimum and max bandwidth
+ * values specified by the hardware. The allocated b/w
+ * percentage is rounded off the next control step
+ * available on the hardware.
+ */
+static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r)
+{
+ unsigned long bw, m;
+ int ret;
+
+ /*
+ * Only linear delay values is supported for current Intel SKUs.
+ */
+ if (!r->delay_linear)
+ return false;
+
+ ret = kstrtoul(buf, 10, &bw);
+ if (ret)
+ return false;
+
+ if (bw < r->min_bw || bw > r->default_ctrl)
+ return false;
+
+ m = (bw + r->bw_gran - 1) / r->bw_gran;
+ *data = m * r->bw_gran;
+
+ return true;
+}
+
+int parse_bw(char *buf, struct rdt_resource *r, struct rdt_domain *d)
+{
+ unsigned long data;
+
+ if (d->have_new_ctrl)
+ return -EINVAL;
+
+ if (!bw_validate(buf, &data, r))
+ return -EINVAL;
+ d->new_ctrl = data;
+ d->have_new_ctrl = true;
+
+ return 0;
+}
+
+/*
* Check whether a cache bit mask is valid. The SDM says:
* Please note that all (and only) contiguous '1' combinations
* are allowed (e.g. FFFFH, 0FF0H, 003CH, etc.).
--
1.9.1
Hello Thomas,
This series has minor changes with respect to V3 addressing all your comments.
Was wondering if there was any feedback or if we still have a chance for 4.12.
Thanks,
Vikas
On Fri, 7 Apr 2017, Vikas Shivappa wrote:
> Sending another version of MBA patch series with changes to V3 version
> as per Thomas feedback here:
> https://marc.info/?l=linux-kernel&m=149125664024881
>
> Changes:
> - Fixed the wrong names in struct document for rdt_domain
> - Changed the mba and cache ctrl values to have seperate structs and put
> them in a union in rdt_resource structure.
>
> patch applies on tip x86/cpus
>
> [PATCH 1/8] Documentation, x86: Intel Memory bandwidth allocation
> [PATCH 2/8] x86/intel_rdt/mba: Generalize the naming to get ready for
> [PATCH 3/8] x86/intel_rdt/mba: Memory b/w allocation feature detect
> [PATCH 4/8] x86/intel_rct/mba: Add MBA structures and initialize MBA
> [PATCH 5/8] x86/intel_rdt: Prep to add info files for MBA
> [PATCH 6/8] x86/intel_rdt/mba: Add info directory files for MBA
> [PATCH 7/8] x86/intel_rdt: Prep to add schemata file for MBA
> [PATCH 8/8] x86/intel_rdt/mba: Add schemata file support for MBA
>
On Wed, 12 Apr 2017, Shivappa Vikas wrote:
> This series has minor changes with respect to V3 addressing all your comments.
> Was wondering if there was any feedback or if we still have a chance for 4.12.
It's on my radar and should make it, unless there is some major hickup.
Thanks,
tglx
Commit-ID: a9cad3d4f046bbd8f096b78d220c8d7074c2e93f
Gitweb: http://git.kernel.org/tip/a9cad3d4f046bbd8f096b78d220c8d7074c2e93f
Author: Vikas Shivappa <[email protected]>
AuthorDate: Fri, 7 Apr 2017 17:33:50 -0700
Committer: Thomas Gleixner <[email protected]>
CommitDate: Fri, 14 Apr 2017 16:10:07 +0200
Documentation, x86: Intel Memory bandwidth allocation
Update the 'intel_rdt_ui' documentation to have Memory bandwidth(b/w)
allocation interface usage.
Signed-off-by: Vikas Shivappa <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Thomas Gleixner <[email protected]>
---
Documentation/x86/intel_rdt_ui.txt | 107 ++++++++++++++++++++++++++++++-------
1 file changed, 87 insertions(+), 20 deletions(-)
diff --git a/Documentation/x86/intel_rdt_ui.txt b/Documentation/x86/intel_rdt_ui.txt
index a1ace91..0f6d847 100644
--- a/Documentation/x86/intel_rdt_ui.txt
+++ b/Documentation/x86/intel_rdt_ui.txt
@@ -4,6 +4,7 @@ Copyright (C) 2016 Intel Corporation
Fenghua Yu <[email protected]>
Tony Luck <[email protected]>
+Vikas Shivappa <[email protected]>
This feature is enabled by the CONFIG_INTEL_RDT_A Kconfig and the
X86 /proc/cpuinfo flag bits "rdt", "cat_l3" and "cdp_l3".
@@ -22,19 +23,34 @@ Info directory
The 'info' directory contains information about the enabled
resources. Each resource has its own subdirectory. The subdirectory
-names reflect the resource names. Each subdirectory contains the
-following files:
+names reflect the resource names.
+Cache resource(L3/L2) subdirectory contains the following files:
-"num_closids": The number of CLOSIDs which are valid for this
- resource. The kernel uses the smallest number of
- CLOSIDs of all enabled resources as limit.
+"num_closids": The number of CLOSIDs which are valid for this
+ resource. The kernel uses the smallest number of
+ CLOSIDs of all enabled resources as limit.
-"cbm_mask": The bitmask which is valid for this resource. This
- mask is equivalent to 100%.
+"cbm_mask": The bitmask which is valid for this resource.
+ This mask is equivalent to 100%.
-"min_cbm_bits": The minimum number of consecutive bits which must be
- set when writing a mask.
+"min_cbm_bits": The minimum number of consecutive bits which
+ must be set when writing a mask.
+Memory bandwitdh(MB) subdirectory contains the following files:
+
+"min_bandwidth": The minimum memory bandwidth percentage which
+ user can request.
+
+"bandwidth_gran": The granularity in which the memory bandwidth
+ percentage is allocated. The allocated
+ b/w percentage is rounded off to the next
+ control step available on the hardware. The
+ available bandwidth control steps are:
+ min_bandwidth + N * bandwidth_gran.
+
+"delay_linear": Indicates if the delay scale is linear or
+ non-linear. This field is purely informational
+ only.
Resource groups
---------------
@@ -110,6 +126,22 @@ and 0xA are not. On a system with a 20-bit mask each bit represents 5%
of the capacity of the cache. You could partition the cache into four
equal parts with masks: 0x1f, 0x3e0, 0x7c00, 0xf8000.
+Memory bandwidth(b/w) percentage
+--------------------------------
+For Memory b/w resource, user controls the resource by indicating the
+percentage of total memory b/w.
+
+The minimum bandwidth percentage value for each cpu model is predefined
+and can be looked up through "info/MB/min_bandwidth". The bandwidth
+granularity that is allocated is also dependent on the cpu model and can
+be looked up at "info/MB/bandwidth_gran". The available bandwidth
+control steps are: min_bw + N * bw_gran. Intermediate values are rounded
+to the next control step available on the hardware.
+
+The bandwidth throttling is a core specific mechanism on some of Intel
+SKUs. Using a high bandwidth and a low bandwidth setting on two threads
+sharing a core will result in both threads being throttled to use the
+low bandwidth.
L3 details (code and data prioritization disabled)
--------------------------------------------------
@@ -132,6 +164,13 @@ schemata format is always:
L2:<cache_id0>=<cbm>;<cache_id1>=<cbm>;...
+Memory b/w Allocation details
+-----------------------------
+
+Memory b/w domain is L3 cache.
+
+ MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;...
+
Reading/writing the schemata file
---------------------------------
Reading the schemata file will show the state of all resources
@@ -149,13 +188,14 @@ L3CODE:0=fffff;1=fffff;2=fffff;3=fffff
Example 1
---------
On a two socket machine (one L3 cache per socket) with just four bits
-for cache bit masks
+for cache bit masks, minimum b/w of 10% with a memory bandwidth
+granularity of 10%
# mount -t resctrl resctrl /sys/fs/resctrl
# cd /sys/fs/resctrl
# mkdir p0 p1
-# echo "L3:0=3;1=c" > /sys/fs/resctrl/p0/schemata
-# echo "L3:0=3;1=3" > /sys/fs/resctrl/p1/schemata
+# echo "L3:0=3;1=c\nMB:0=50;1=50" > /sys/fs/resctrl/p0/schemata
+# echo "L3:0=3;1=3\nMB:0=50;1=50" > /sys/fs/resctrl/p1/schemata
The default resource group is unmodified, so we have access to all parts
of all caches (its schemata file reads "L3:0=f;1=f").
@@ -164,6 +204,14 @@ Tasks that are under the control of group "p0" may only allocate from the
"lower" 50% on cache ID 0, and the "upper" 50% of cache ID 1.
Tasks in group "p1" use the "lower" 50% of cache on both sockets.
+Similarly, tasks that are under the control of group "p0" may use a
+maximum memory b/w of 50% on socket0 and 50% on socket 1.
+Tasks in group "p1" may also use 50% memory b/w on both sockets.
+Note that unlike cache masks, memory b/w cannot specify whether these
+allocations can overlap or not. The allocations specifies the maximum
+b/w that the group may be able to use and the system admin can configure
+the b/w accordingly.
+
Example 2
---------
Again two sockets, but this time with a more realistic 20-bit mask.
@@ -177,9 +225,10 @@ of L3 cache on socket 0.
# cd /sys/fs/resctrl
First we reset the schemata for the default group so that the "upper"
-50% of the L3 cache on socket 0 cannot be used by ordinary tasks:
+50% of the L3 cache on socket 0 and 50% of memory b/w cannot be used by
+ordinary tasks:
-# echo "L3:0=3ff;1=fffff" > schemata
+# echo "L3:0=3ff;1=fffff\nMB:0=50;1=100" > schemata
Next we make a resource group for our first real time task and give
it access to the "top" 25% of the cache on socket 0.
@@ -202,6 +251,20 @@ Ditto for the second real time task (with the remaining 25% of cache):
# echo 5678 > p1/tasks
# taskset -cp 2 5678
+For the same 2 socket system with memory b/w resource and CAT L3 the
+schemata would look like(Assume min_bandwidth 10 and bandwidth_gran is
+10):
+
+For our first real time task this would request 20% memory b/w on socket
+0.
+
+# echo -e "L3:0=f8000;1=fffff\nMB:0=20;1=100" > p0/schemata
+
+For our second real time task this would request an other 20% memory b/w
+on socket 0.
+
+# echo -e "L3:0=f8000;1=fffff\nMB:0=20;1=100" > p0/schemata
+
Example 3
---------
@@ -215,18 +278,22 @@ the tasks.
# cd /sys/fs/resctrl
First we reset the schemata for the default group so that the "upper"
-50% of the L3 cache on socket 0 cannot be used by ordinary tasks:
+50% of the L3 cache on socket 0, and 50% of memory bandwidth on socket 0
+cannot be used by ordinary tasks:
-# echo "L3:0=3ff" > schemata
+# echo "L3:0=3ff\nMB:0=50" > schemata
-Next we make a resource group for our real time cores and give
-it access to the "top" 50% of the cache on socket 0.
+Next we make a resource group for our real time cores and give it access
+to the "top" 50% of the cache on socket 0 and 50% of memory bandwidth on
+socket 0.
# mkdir p0
-# echo "L3:0=ffc00;" > p0/schemata
+# echo "L3:0=ffc00\nMB:0=50" > p0/schemata
Finally we move core 4-7 over to the new group and make sure that the
-kernel and the tasks running there get 50% of the cache.
+kernel and the tasks running there get 50% of the cache. They should
+also get 50% of memory bandwidth assuming that the cores 4-7 are SMT
+siblings and only the real time threads are scheduled on the cores 4-7.
# echo C0 > p0/cpus
Commit-ID: 2545e9f51ea860736c4dc1e90a44ed75e9c91e3b
Gitweb: http://git.kernel.org/tip/2545e9f51ea860736c4dc1e90a44ed75e9c91e3b
Author: Vikas Shivappa <[email protected]>
AuthorDate: Fri, 7 Apr 2017 17:33:51 -0700
Committer: Thomas Gleixner <[email protected]>
CommitDate: Fri, 14 Apr 2017 16:10:07 +0200
x86/intel_rdt: Cleanup namespace to support multiple resource types
Lot of data structures and functions are named after cache specific
resources(named after cbm, cache etc). In many cases other non cache
resources may need to share the same data structures/functions.
Generalize such naming to prepare to add more resources like memory
bandwidth.
Signed-off-by: Vikas Shivappa <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Thomas Gleixner <[email protected]>
---
arch/x86/include/asm/intel_rdt.h | 22 +++++++++++-----------
arch/x86/kernel/cpu/intel_rdt.c | 28 ++++++++++++++--------------
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 16 ++++++++--------
arch/x86/kernel/cpu/intel_rdt_schemata.c | 20 ++++++++++----------
4 files changed, 43 insertions(+), 43 deletions(-)
diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index 611c823..55e0459 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -79,7 +79,7 @@ struct rftype {
* @capable: Is this feature available on this machine
* @name: Name to use in "schemata" file
* @num_closid: Number of CLOSIDs available
- * @max_cbm: Largest Cache Bit Mask allowed
+ * @default_ctrl: Specifies default cache cbm or mem b/w percent.
* @data_width: Character width of data when displaying
* @min_cbm_bits: Minimum number of consecutive bits to be set
* in a cache bit mask
@@ -97,7 +97,7 @@ struct rdt_resource {
int num_closid;
int cbm_len;
int min_cbm_bits;
- u32 max_cbm;
+ u32 default_ctrl;
int data_width;
struct list_head domains;
int msr_base;
@@ -111,17 +111,17 @@ struct rdt_resource {
* @list: all instances of this resource
* @id: unique id for this instance
* @cpu_mask: which cpus share this resource
- * @cbm: array of cache bit masks (indexed by CLOSID)
- * @new_cbm: new cbm value to be loaded
- * @have_new_cbm: did user provide new_cbm for this domain
+ * @ctrl_val: array of cache or mem ctrl values (indexed by CLOSID)
+ * @new_ctrl: new ctrl value to be loaded
+ * @have_new_ctrl: did user provide new_ctrl for this domain
*/
struct rdt_domain {
struct list_head list;
int id;
struct cpumask cpu_mask;
- u32 *cbm;
- u32 new_cbm;
- bool have_new_cbm;
+ u32 *ctrl_val;
+ u32 new_ctrl;
+ bool have_new_ctrl;
};
/**
@@ -172,8 +172,8 @@ union cpuid_0x10_1_eax {
unsigned int full;
};
-/* CPUID.(EAX=10H, ECX=ResID=1).EDX */
-union cpuid_0x10_1_edx {
+/* CPUID.(EAX=10H, ECX=ResID).EDX */
+union cpuid_0x10_x_edx {
struct {
unsigned int cos_max:16;
} split;
@@ -182,7 +182,7 @@ union cpuid_0x10_1_edx {
DECLARE_PER_CPU_READ_MOSTLY(int, cpu_closid);
-void rdt_cbm_update(void *arg);
+void rdt_ctrl_update(void *arg);
struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn);
void rdtgroup_kn_unlock(struct kernfs_node *kn);
ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index d2e5f92..92d8431 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -125,7 +125,7 @@ static inline bool cache_alloc_hsw_probe(void)
r->num_closid = 4;
r->cbm_len = 20;
- r->max_cbm = max_cbm;
+ r->default_ctrl = max_cbm;
r->min_cbm_bits = 2;
r->capable = true;
r->enabled = true;
@@ -136,16 +136,16 @@ static inline bool cache_alloc_hsw_probe(void)
return false;
}
-static void rdt_get_config(int idx, struct rdt_resource *r)
+static void rdt_get_cache_config(int idx, struct rdt_resource *r)
{
union cpuid_0x10_1_eax eax;
- union cpuid_0x10_1_edx edx;
+ union cpuid_0x10_x_edx edx;
u32 ebx, ecx;
cpuid_count(0x00000010, idx, &eax.full, &ebx, &ecx, &edx.full);
r->num_closid = edx.split.cos_max + 1;
r->cbm_len = eax.split.cbm_len + 1;
- r->max_cbm = BIT_MASK(eax.split.cbm_len + 1) - 1;
+ r->default_ctrl = BIT_MASK(eax.split.cbm_len + 1) - 1;
r->data_width = (r->cbm_len + 3) / 4;
r->capable = true;
r->enabled = true;
@@ -158,7 +158,7 @@ static void rdt_get_cdp_l3_config(int type)
r->num_closid = r_l3->num_closid / 2;
r->cbm_len = r_l3->cbm_len;
- r->max_cbm = r_l3->max_cbm;
+ r->default_ctrl = r_l3->default_ctrl;
r->data_width = (r->cbm_len + 3) / 4;
r->capable = true;
/*
@@ -181,7 +181,7 @@ static int get_cache_id(int cpu, int level)
return -1;
}
-void rdt_cbm_update(void *arg)
+void rdt_ctrl_update(void *arg)
{
struct msr_param *m = (struct msr_param *)arg;
struct rdt_resource *r = m->res;
@@ -202,7 +202,7 @@ found:
for (i = m->low; i < m->high; i++) {
int idx = cbm_idx(r, i);
- wrmsrl(r->msr_base + idx, d->cbm[i]);
+ wrmsrl(r->msr_base + idx, d->ctrl_val[i]);
}
}
@@ -275,8 +275,8 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
d->id = id;
- d->cbm = kmalloc_array(r->num_closid, sizeof(*d->cbm), GFP_KERNEL);
- if (!d->cbm) {
+ d->ctrl_val = kmalloc_array(r->num_closid, sizeof(*d->ctrl_val), GFP_KERNEL);
+ if (!d->ctrl_val) {
kfree(d);
return;
}
@@ -284,8 +284,8 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
for (i = 0; i < r->num_closid; i++) {
int idx = cbm_idx(r, i);
- d->cbm[i] = r->max_cbm;
- wrmsrl(r->msr_base + idx, d->cbm[i]);
+ d->ctrl_val[i] = r->default_ctrl;
+ wrmsrl(r->msr_base + idx, d->ctrl_val[i]);
}
cpumask_set_cpu(cpu, &d->cpu_mask);
@@ -305,7 +305,7 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
cpumask_clear_cpu(cpu, &d->cpu_mask);
if (cpumask_empty(&d->cpu_mask)) {
- kfree(d->cbm);
+ kfree(d->ctrl_val);
list_del(&d->list);
kfree(d);
}
@@ -383,7 +383,7 @@ static __init bool get_rdt_resources(void)
return false;
if (boot_cpu_has(X86_FEATURE_CAT_L3)) {
- rdt_get_config(1, &rdt_resources_all[RDT_RESOURCE_L3]);
+ rdt_get_cache_config(1, &rdt_resources_all[RDT_RESOURCE_L3]);
if (boot_cpu_has(X86_FEATURE_CDP_L3)) {
rdt_get_cdp_l3_config(RDT_RESOURCE_L3DATA);
rdt_get_cdp_l3_config(RDT_RESOURCE_L3CODE);
@@ -392,7 +392,7 @@ static __init bool get_rdt_resources(void)
}
if (boot_cpu_has(X86_FEATURE_CAT_L2)) {
/* CPUID 0x10.2 fields are same format at 0x10.1 */
- rdt_get_config(2, &rdt_resources_all[RDT_RESOURCE_L2]);
+ rdt_get_cache_config(2, &rdt_resources_all[RDT_RESOURCE_L2]);
ret = true;
}
return ret;
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index 6870ebf..380ee9d 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -519,12 +519,12 @@ static int rdt_num_closids_show(struct kernfs_open_file *of,
return 0;
}
-static int rdt_cbm_mask_show(struct kernfs_open_file *of,
+static int rdt_default_ctrl_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
struct rdt_resource *r = of->kn->parent->priv;
- seq_printf(seq, "%x\n", r->max_cbm);
+ seq_printf(seq, "%x\n", r->default_ctrl);
return 0;
}
@@ -551,7 +551,7 @@ static struct rftype res_info_files[] = {
.name = "cbm_mask",
.mode = 0444,
.kf_ops = &rdtgroup_kf_single_ops,
- .seq_show = rdt_cbm_mask_show,
+ .seq_show = rdt_default_ctrl_show,
},
{
.name = "min_cbm_bits",
@@ -801,7 +801,7 @@ out:
return dentry;
}
-static int reset_all_cbms(struct rdt_resource *r)
+static int reset_all_ctrls(struct rdt_resource *r)
{
struct msr_param msr_param;
cpumask_var_t cpu_mask;
@@ -824,14 +824,14 @@ static int reset_all_cbms(struct rdt_resource *r)
cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
for (i = 0; i < r->num_closid; i++)
- d->cbm[i] = r->max_cbm;
+ d->ctrl_val[i] = r->default_ctrl;
}
cpu = get_cpu();
/* Update CBM on this cpu if it's in cpu_mask. */
if (cpumask_test_cpu(cpu, cpu_mask))
- rdt_cbm_update(&msr_param);
+ rdt_ctrl_update(&msr_param);
/* Update CBM on all other cpus in cpu_mask. */
- smp_call_function_many(cpu_mask, rdt_cbm_update, &msr_param, 1);
+ smp_call_function_many(cpu_mask, rdt_ctrl_update, &msr_param, 1);
put_cpu();
free_cpumask_var(cpu_mask);
@@ -917,7 +917,7 @@ static void rdt_kill_sb(struct super_block *sb)
/*Put everything back to default values. */
for_each_enabled_rdt_resource(r)
- reset_all_cbms(r);
+ reset_all_ctrls(r);
cdp_disable();
rmdir_all_sub();
static_branch_disable(&rdt_enable_key);
diff --git a/arch/x86/kernel/cpu/intel_rdt_schemata.c b/arch/x86/kernel/cpu/intel_rdt_schemata.c
index 8594db4..7695179 100644
--- a/arch/x86/kernel/cpu/intel_rdt_schemata.c
+++ b/arch/x86/kernel/cpu/intel_rdt_schemata.c
@@ -38,7 +38,7 @@ static bool cbm_validate(unsigned long var, struct rdt_resource *r)
{
unsigned long first_bit, zero_bit;
- if (var == 0 || var > r->max_cbm)
+ if (var == 0 || var > r->default_ctrl)
return false;
first_bit = find_first_bit(&var, r->cbm_len);
@@ -61,7 +61,7 @@ static int parse_cbm(char *buf, struct rdt_resource *r, struct rdt_domain *d)
unsigned long data;
int ret;
- if (d->have_new_cbm)
+ if (d->have_new_ctrl)
return -EINVAL;
ret = kstrtoul(buf, 16, &data);
@@ -69,8 +69,8 @@ static int parse_cbm(char *buf, struct rdt_resource *r, struct rdt_domain *d)
return ret;
if (!cbm_validate(data, r))
return -EINVAL;
- d->new_cbm = data;
- d->have_new_cbm = true;
+ d->new_ctrl = data;
+ d->have_new_ctrl = true;
return 0;
}
@@ -119,9 +119,9 @@ static int update_domains(struct rdt_resource *r, int closid)
msr_param.res = r;
list_for_each_entry(d, &r->domains, list) {
- if (d->have_new_cbm && d->new_cbm != d->cbm[closid]) {
+ if (d->have_new_ctrl && d->new_ctrl != d->ctrl_val[closid]) {
cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
- d->cbm[closid] = d->new_cbm;
+ d->ctrl_val[closid] = d->new_ctrl;
}
}
if (cpumask_empty(cpu_mask))
@@ -129,9 +129,9 @@ static int update_domains(struct rdt_resource *r, int closid)
cpu = get_cpu();
/* Update CBM on this cpu if it's in cpu_mask. */
if (cpumask_test_cpu(cpu, cpu_mask))
- rdt_cbm_update(&msr_param);
+ rdt_ctrl_update(&msr_param);
/* Update CBM on other cpus. */
- smp_call_function_many(cpu_mask, rdt_cbm_update, &msr_param, 1);
+ smp_call_function_many(cpu_mask, rdt_ctrl_update, &msr_param, 1);
put_cpu();
done:
@@ -164,7 +164,7 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
for_each_enabled_rdt_resource(r)
list_for_each_entry(dom, &r->domains, list)
- dom->have_new_cbm = false;
+ dom->have_new_ctrl = false;
while ((tok = strsep(&buf, "\n")) != NULL) {
resname = strsep(&tok, ":");
@@ -208,7 +208,7 @@ static void show_doms(struct seq_file *s, struct rdt_resource *r, int closid)
if (sep)
seq_puts(s, ";");
seq_printf(s, "%d=%0*x", dom->id, max_data_width,
- dom->cbm[closid]);
+ dom->ctrl_val[closid]);
sep = true;
}
seq_puts(s, "\n");
Commit-ID: ab66a33b032eb5b8186aeaf648127bce829c9efd
Gitweb: http://git.kernel.org/tip/ab66a33b032eb5b8186aeaf648127bce829c9efd
Author: Vikas Shivappa <[email protected]>
AuthorDate: Fri, 7 Apr 2017 17:33:52 -0700
Committer: Thomas Gleixner <[email protected]>
CommitDate: Fri, 14 Apr 2017 16:10:07 +0200
x86/intel_rdt/mba: Memory bandwith allocation feature detect
Detect MBA feature if CPUID.(EAX=10H, ECX=0):EBX.L2[bit 3] = 1.
Add supporting data structures to detect feature details which is done
in later patch using CPUID with EAX=10H, ECX= 3.
Signed-off-by: Vikas Shivappa <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Thomas Gleixner <[email protected]>
---
arch/x86/include/asm/cpufeatures.h | 2 ++
arch/x86/include/asm/intel_rdt.h | 8 ++++++++
arch/x86/kernel/cpu/intel_rdt.c | 4 ++++
arch/x86/kernel/cpu/scattered.c | 1 +
4 files changed, 15 insertions(+)
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index b04bb6d..25d7f52 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -201,6 +201,8 @@
#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
+#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
+
/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index 51e4a1c..6295594 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -184,6 +184,14 @@ union cpuid_0x10_1_eax {
unsigned int full;
};
+/* CPUID.(EAX=10H, ECX=ResID=3).EAX */
+union cpuid_0x10_3_eax {
+ struct {
+ unsigned int max_delay:12;
+ } split;
+ unsigned int full;
+};
+
/* CPUID.(EAX=10H, ECX=ResID).EDX */
union cpuid_0x10_x_edx {
struct {
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 8486abe..82eafd6 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -430,6 +430,10 @@ static __init bool get_rdt_resources(void)
rdt_get_cache_config(2, &rdt_resources_all[RDT_RESOURCE_L2]);
ret = true;
}
+
+ if (boot_cpu_has(X86_FEATURE_MBA))
+ ret = true;
+
return ret;
}
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index d979406..23c2350 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -27,6 +27,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_CAT_L3, CPUID_EBX, 1, 0x00000010, 0 },
{ X86_FEATURE_CAT_L2, CPUID_EBX, 2, 0x00000010, 0 },
{ X86_FEATURE_CDP_L3, CPUID_ECX, 2, 0x00000010, 1 },
+ { X86_FEATURE_MBA, CPUID_EBX, 3, 0x00000010, 0 },
{ X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 },
{ X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 },
{ X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 },
Commit-ID: 05b93417ce5b924c6652de19fdcc27439ab37c90
Gitweb: http://git.kernel.org/tip/05b93417ce5b924c6652de19fdcc27439ab37c90
Author: Vikas Shivappa <[email protected]>
AuthorDate: Fri, 7 Apr 2017 17:33:53 -0700
Committer: Thomas Gleixner <[email protected]>
CommitDate: Fri, 14 Apr 2017 16:10:08 +0200
x86/intel_rdt/mba: Add primary support for Memory Bandwidth Allocation (MBA)
The MBA feature details like minimum bandwidth supported, bandwidth
granularity etc are obtained via executing CPUID with EAX=10H ,ECX=3.
Setup and initialize the MBA specific extensions to data structures like
global list of RDT resources, RDT resource structure and RDT domain
structure.
[ tglx: Split out the seperate structure and the CBM related parts ]
Signed-off-by: Vikas Shivappa <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Thomas Gleixner <[email protected]>
---
arch/x86/include/asm/intel_rdt.h | 24 ++++++++++-
arch/x86/kernel/cpu/intel_rdt.c | 89 +++++++++++++++++++++++++++++++++++++++-
2 files changed, 110 insertions(+), 3 deletions(-)
diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index 6295594..0620fc9 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -12,6 +12,7 @@
#define IA32_L3_QOS_CFG 0xc81
#define IA32_L3_CBM_BASE 0xc90
#define IA32_L2_CBM_BASE 0xd10
+#define IA32_MBA_THRTL_BASE 0xd50
#define L3_QOS_CDP_ENABLE 0x01ULL
@@ -120,6 +121,23 @@ struct rdt_cache {
};
/**
+ * struct rdt_membw - Memory bandwidth allocation related data
+ * @max_delay: Max throttle delay. Delay is the hardware
+ * representation for memory bandwidth.
+ * @min_bw: Minimum memory bandwidth percentage user can request
+ * @bw_gran: Granularity at which the memory bandwidth is allocated
+ * @delay_linear: True if memory B/W delay is in linear scale
+ * @mb_map: Mapping of memory B/W percentage to memory B/W delay
+ */
+struct rdt_membw {
+ u32 max_delay;
+ u32 min_bw;
+ u32 bw_gran;
+ u32 delay_linear;
+ u32 *mb_map;
+};
+
+/**
* struct rdt_resource - attributes of an RDT resource
* @enabled: Is this feature enabled on this machine
* @capable: Is this feature available on this machine
@@ -145,7 +163,10 @@ struct rdt_resource {
struct rdt_resource *r);
int data_width;
struct list_head domains;
- struct rdt_cache cache;
+ union {
+ struct rdt_cache cache;
+ struct rdt_membw membw;
+ };
};
extern struct mutex rdtgroup_mutex;
@@ -161,6 +182,7 @@ enum {
RDT_RESOURCE_L3DATA,
RDT_RESOURCE_L3CODE,
RDT_RESOURCE_L2,
+ RDT_RESOURCE_MBA,
/* Must be the last */
RDT_NUM_RESOURCES,
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 82eafd6..ae1aec1 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -32,6 +32,9 @@
#include <asm/intel-family.h>
#include <asm/intel_rdt.h>
+#define MAX_MBA_BW 100u
+#define MBA_IS_LINEAR 0x4
+
/* Mutex to protect rdtgroup access. */
DEFINE_MUTEX(rdtgroup_mutex);
@@ -44,6 +47,8 @@ DEFINE_PER_CPU_READ_MOSTLY(int, cpu_closid);
int max_name_width, max_data_width;
static void
+mba_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r);
+static void
cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r);
#define domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].domains)
@@ -97,6 +102,13 @@ struct rdt_resource rdt_resources_all[] = {
.cbm_idx_offset = 0,
},
},
+ {
+ .name = "MB",
+ .domains = domain_init(RDT_RESOURCE_MBA),
+ .msr_base = IA32_MBA_THRTL_BASE,
+ .msr_update = mba_wrmsr,
+ .cache_level = 3,
+ },
};
static unsigned int cbm_idx(struct rdt_resource *r, unsigned int closid)
@@ -151,6 +163,53 @@ static inline bool cache_alloc_hsw_probe(void)
return false;
}
+/*
+ * rdt_get_mb_table() - get a mapping of bandwidth(b/w) percentage values
+ * exposed to user interface and the h/w understandable delay values.
+ *
+ * The non-linear delay values have the granularity of power of two
+ * and also the h/w does not guarantee a curve for configured delay
+ * values vs. actual b/w enforced.
+ * Hence we need a mapping that is pre calibrated so the user can
+ * express the memory b/w as a percentage value.
+ */
+static inline bool rdt_get_mb_table(struct rdt_resource *r)
+{
+ /*
+ * There are no Intel SKUs as of now to support non-linear delay.
+ */
+ pr_info("MBA b/w map not implemented for cpu:%d, model:%d",
+ boot_cpu_data.x86, boot_cpu_data.x86_model);
+
+ return false;
+}
+
+static bool rdt_get_mem_config(struct rdt_resource *r)
+{
+ union cpuid_0x10_3_eax eax;
+ union cpuid_0x10_x_edx edx;
+ u32 ebx, ecx;
+
+ cpuid_count(0x00000010, 3, &eax.full, &ebx, &ecx, &edx.full);
+ r->num_closid = edx.split.cos_max + 1;
+ r->membw.max_delay = eax.split.max_delay + 1;
+ r->default_ctrl = MAX_MBA_BW;
+ if (ecx & MBA_IS_LINEAR) {
+ r->membw.delay_linear = true;
+ r->membw.min_bw = MAX_MBA_BW - r->membw.max_delay;
+ r->membw.bw_gran = MAX_MBA_BW - r->membw.max_delay;
+ } else {
+ if (!rdt_get_mb_table(r))
+ return false;
+ }
+ r->data_width = 3;
+
+ r->capable = true;
+ r->enabled = true;
+
+ return true;
+}
+
static void rdt_get_cache_config(int idx, struct rdt_resource *r)
{
union cpuid_0x10_1_eax eax;
@@ -196,6 +255,30 @@ static int get_cache_id(int cpu, int level)
return -1;
}
+/*
+ * Map the memory b/w percentage value to delay values
+ * that can be written to QOS_MSRs.
+ * There are currently no SKUs which support non linear delay values.
+ */
+static u32 delay_bw_map(unsigned long bw, struct rdt_resource *r)
+{
+ if (r->membw.delay_linear)
+ return MAX_MBA_BW - bw;
+
+ pr_warn_once("Non Linear delay-bw map not supported but queried\n");
+ return r->default_ctrl;
+}
+
+static void
+mba_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r)
+{
+ unsigned int i;
+
+ /* Write the delay values for mba. */
+ for (i = m->low; i < m->high; i++)
+ wrmsrl(r->msr_base + i, delay_bw_map(d->ctrl_val[i], r));
+}
+
static void
cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r)
{
@@ -431,8 +514,10 @@ static __init bool get_rdt_resources(void)
ret = true;
}
- if (boot_cpu_has(X86_FEATURE_MBA))
- ret = true;
+ if (boot_cpu_has(X86_FEATURE_MBA)) {
+ if (rdt_get_mem_config(&rdt_resources_all[RDT_RESOURCE_MBA]))
+ ret = true;
+ }
return ret;
}
Commit-ID: 6a507a6ad8a6955a7d57255377edcef576823749
Gitweb: http://git.kernel.org/tip/6a507a6ad8a6955a7d57255377edcef576823749
Author: Vikas Shivappa <[email protected]>
AuthorDate: Fri, 7 Apr 2017 17:33:54 -0700
Committer: Thomas Gleixner <[email protected]>
CommitDate: Fri, 14 Apr 2017 16:10:08 +0200
x86/intel_rdt: Make information files resource specific
Cache allocation and memory bandwidth allocation require different
information files in the resctrl/info directory, but the current
implementation does not allow to have files per resource.
Add the necessary fields to the resource struct and assign the files
dynamically depending on the resource type.
Signed-off-by: Vikas Shivappa <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Thomas Gleixner <[email protected]>
---
arch/x86/include/asm/intel_rdt.h | 6 ++++++
arch/x86/kernel/cpu/intel_rdt.c | 1 +
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 18 ++++++++++++++----
3 files changed, 21 insertions(+), 4 deletions(-)
diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index 0620fc9..b0f0a60 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -150,6 +150,8 @@ struct rdt_membw {
* @data_width: Character width of data when displaying
* @domains: All domains for this resource
* @cache: Cache allocation related data
+ * @info_files: resctrl info files for the resource
+ * @nr_info_files: Number of info files
*/
struct rdt_resource {
bool enabled;
@@ -167,8 +169,12 @@ struct rdt_resource {
struct rdt_cache cache;
struct rdt_membw membw;
};
+ struct rftype *info_files;
+ int nr_info_files;
};
+void rdt_get_cache_infofile(struct rdt_resource *r);
+
extern struct mutex rdtgroup_mutex;
extern struct rdt_resource rdt_resources_all[];
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index ae1aec1..b387e04 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -221,6 +221,7 @@ static void rdt_get_cache_config(int idx, struct rdt_resource *r)
r->cache.cbm_len = eax.split.cbm_len + 1;
r->default_ctrl = BIT_MASK(eax.split.cbm_len + 1) - 1;
r->data_width = (r->cache.cbm_len + 3) / 4;
+ rdt_get_cache_infofile(r);
r->capable = true;
r->enabled = true;
}
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index 3ec230b..65d957b 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -540,7 +540,7 @@ static int rdt_min_cbm_bits_show(struct kernfs_open_file *of,
}
/* rdtgroup information files for one cache resource. */
-static struct rftype res_info_files[] = {
+static struct rftype res_cache_info_files[] = {
{
.name = "num_closids",
.mode = 0444,
@@ -561,11 +561,18 @@ static struct rftype res_info_files[] = {
},
};
+void rdt_get_cache_infofile(struct rdt_resource *r)
+{
+ r->info_files = res_cache_info_files;
+ r->nr_info_files = ARRAY_SIZE(res_cache_info_files);
+}
+
static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
{
struct kernfs_node *kn_subdir;
+ struct rftype *res_info_files;
struct rdt_resource *r;
- int ret;
+ int ret, len;
/* create the directory */
kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL);
@@ -584,8 +591,11 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
ret = rdtgroup_kn_set_ugid(kn_subdir);
if (ret)
goto out_destroy;
- ret = rdtgroup_add_files(kn_subdir, res_info_files,
- ARRAY_SIZE(res_info_files));
+
+ res_info_files = r->info_files;
+ len = r->nr_info_files;
+
+ ret = rdtgroup_add_files(kn_subdir, res_info_files, len);
if (ret)
goto out_destroy;
kernfs_activate(kn_subdir);
Commit-ID: db69ef65636e6da135680d75d8646bd7e76136bc
Gitweb: http://git.kernel.org/tip/db69ef65636e6da135680d75d8646bd7e76136bc
Author: Vikas Shivappa <[email protected]>
AuthorDate: Fri, 7 Apr 2017 17:33:55 -0700
Committer: Thomas Gleixner <[email protected]>
CommitDate: Fri, 14 Apr 2017 16:10:08 +0200
x86/intel_rdt/mba: Add info directory files for Memory Bandwidth Allocation
The files in the info directory for MBA are as follows:
num_closids
The maximum number of CLOSids available for MBA
min_bandwidth
The minimum memory bandwidth percentage value
bandwidth_gran
The granularity of the bandwidth control in percent for the
particular CPU SKU. Intermediate values entered are rounded off
to the previous control step available. Available bandwidth
control steps are minimum_bandwidth + N * bandwidth_gran.
delay_linear
When set, the OS writes a linear percentage based value to the
control MSRs ranging from minimum_bandwidth to 100 percent.
This value is informational and has no influence on the values
written to the schemata files. The values written to the
schemata are always bandwidth percentage that is requested.
Signed-off-by: Vikas Shivappa <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Thomas Gleixner <[email protected]>
---
arch/x86/include/asm/intel_rdt.h | 1 +
arch/x86/kernel/cpu/intel_rdt.c | 1 +
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 62 ++++++++++++++++++++++++++++++--
3 files changed, 62 insertions(+), 2 deletions(-)
diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index b0f0a60..167fe10 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -174,6 +174,7 @@ struct rdt_resource {
};
void rdt_get_cache_infofile(struct rdt_resource *r);
+void rdt_get_mba_infofile(struct rdt_resource *r);
extern struct mutex rdtgroup_mutex;
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index b387e04..438efef 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -203,6 +203,7 @@ static bool rdt_get_mem_config(struct rdt_resource *r)
return false;
}
r->data_width = 3;
+ rdt_get_mba_infofile(r);
r->capable = true;
r->enabled = true;
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index 65d957b..f5af0cc 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -515,7 +515,6 @@ static int rdt_num_closids_show(struct kernfs_open_file *of,
struct rdt_resource *r = of->kn->parent->priv;
seq_printf(seq, "%d\n", r->num_closid);
-
return 0;
}
@@ -525,7 +524,6 @@ static int rdt_default_ctrl_show(struct kernfs_open_file *of,
struct rdt_resource *r = of->kn->parent->priv;
seq_printf(seq, "%x\n", r->default_ctrl);
-
return 0;
}
@@ -535,7 +533,33 @@ static int rdt_min_cbm_bits_show(struct kernfs_open_file *of,
struct rdt_resource *r = of->kn->parent->priv;
seq_printf(seq, "%u\n", r->cache.min_cbm_bits);
+ return 0;
+}
+
+static int rdt_min_bw_show(struct kernfs_open_file *of,
+ struct seq_file *seq, void *v)
+{
+ struct rdt_resource *r = of->kn->parent->priv;
+ seq_printf(seq, "%u\n", r->membw.min_bw);
+ return 0;
+}
+
+static int rdt_bw_gran_show(struct kernfs_open_file *of,
+ struct seq_file *seq, void *v)
+{
+ struct rdt_resource *r = of->kn->parent->priv;
+
+ seq_printf(seq, "%u\n", r->membw.bw_gran);
+ return 0;
+}
+
+static int rdt_delay_linear_show(struct kernfs_open_file *of,
+ struct seq_file *seq, void *v)
+{
+ struct rdt_resource *r = of->kn->parent->priv;
+
+ seq_printf(seq, "%u\n", r->membw.delay_linear);
return 0;
}
@@ -561,6 +585,40 @@ static struct rftype res_cache_info_files[] = {
},
};
+/* rdtgroup information files for memory bandwidth. */
+static struct rftype res_mba_info_files[] = {
+ {
+ .name = "num_closids",
+ .mode = 0444,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = rdt_num_closids_show,
+ },
+ {
+ .name = "min_bandwidth",
+ .mode = 0444,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = rdt_min_bw_show,
+ },
+ {
+ .name = "bandwidth_gran",
+ .mode = 0444,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = rdt_bw_gran_show,
+ },
+ {
+ .name = "delay_linear",
+ .mode = 0444,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = rdt_delay_linear_show,
+ },
+};
+
+void rdt_get_mba_infofile(struct rdt_resource *r)
+{
+ r->info_files = res_mba_info_files;
+ r->nr_info_files = ARRAY_SIZE(res_mba_info_files);
+}
+
void rdt_get_cache_infofile(struct rdt_resource *r)
{
r->info_files = res_cache_info_files;
Commit-ID: c6ea67de52c29a8b45e5fc7569fc4336bfd557b0
Gitweb: http://git.kernel.org/tip/c6ea67de52c29a8b45e5fc7569fc4336bfd557b0
Author: Vikas Shivappa <[email protected]>
AuthorDate: Fri, 7 Apr 2017 17:33:56 -0700
Committer: Thomas Gleixner <[email protected]>
CommitDate: Fri, 14 Apr 2017 16:10:09 +0200
x86/intel_rdt: Make schemata file parsers resource specific
The schemata files are the user space interface to update resource
controls. The parser is hardwired to support only cache resources, which do
not fit the requirements of memory resources.
Add a function pointer for a parser to the struct rdt_resource and switch
the cache parsing over.
[ tglx: Massaged changelog ]
Signed-off-by: Vikas Shivappa <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Thomas Gleixner <[email protected]>
---
arch/x86/include/asm/intel_rdt.h | 6 ++++++
arch/x86/kernel/cpu/intel_rdt.c | 8 ++++++++
arch/x86/kernel/cpu/intel_rdt_schemata.c | 31 +++++++++++++++++--------------
3 files changed, 31 insertions(+), 14 deletions(-)
diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index 167fe10..4a90057 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -152,6 +152,8 @@ struct rdt_membw {
* @cache: Cache allocation related data
* @info_files: resctrl info files for the resource
* @nr_info_files: Number of info files
+ * @format_str: Per resource format string to show domain value
+ * @parse_ctrlval: Per resource function pointer to parse control values
*/
struct rdt_resource {
bool enabled;
@@ -171,10 +173,14 @@ struct rdt_resource {
};
struct rftype *info_files;
int nr_info_files;
+ const char *format_str;
+ int (*parse_ctrlval) (char *buf, struct rdt_resource *r,
+ struct rdt_domain *d);
};
void rdt_get_cache_infofile(struct rdt_resource *r);
void rdt_get_mba_infofile(struct rdt_resource *r);
+int parse_cbm(char *buf, struct rdt_resource *r, struct rdt_domain *d);
extern struct mutex rdtgroup_mutex;
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 438efef..1e410ea 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -65,6 +65,8 @@ struct rdt_resource rdt_resources_all[] = {
.cbm_idx_mult = 1,
.cbm_idx_offset = 0,
},
+ .parse_ctrlval = parse_cbm,
+ .format_str = "%d=%0*x",
},
{
.name = "L3DATA",
@@ -77,6 +79,8 @@ struct rdt_resource rdt_resources_all[] = {
.cbm_idx_mult = 2,
.cbm_idx_offset = 0,
},
+ .parse_ctrlval = parse_cbm,
+ .format_str = "%d=%0*x",
},
{
.name = "L3CODE",
@@ -89,6 +93,8 @@ struct rdt_resource rdt_resources_all[] = {
.cbm_idx_mult = 2,
.cbm_idx_offset = 1,
},
+ .parse_ctrlval = parse_cbm,
+ .format_str = "%d=%0*x",
},
{
.name = "L2",
@@ -101,6 +107,8 @@ struct rdt_resource rdt_resources_all[] = {
.cbm_idx_mult = 1,
.cbm_idx_offset = 0,
},
+ .parse_ctrlval = parse_cbm,
+ .format_str = "%d=%0*x",
},
{
.name = "MB",
diff --git a/arch/x86/kernel/cpu/intel_rdt_schemata.c b/arch/x86/kernel/cpu/intel_rdt_schemata.c
index 5097ac6..c72c9cc 100644
--- a/arch/x86/kernel/cpu/intel_rdt_schemata.c
+++ b/arch/x86/kernel/cpu/intel_rdt_schemata.c
@@ -34,22 +34,29 @@
* are allowed (e.g. FFFFH, 0FF0H, 003CH, etc.).
* Additionally Haswell requires at least two bits set.
*/
-static bool cbm_validate(unsigned long var, struct rdt_resource *r)
+static bool cbm_validate(char *buf, unsigned long *data, struct rdt_resource *r)
{
- unsigned long first_bit, zero_bit;
+ unsigned long first_bit, zero_bit, val;
unsigned int cbm_len = r->cache.cbm_len;
+ int ret;
+
+ ret = kstrtoul(buf, 16, &val);
+ if (ret)
+ return false;
- if (var == 0 || var > r->default_ctrl)
+ if (val == 0 || val > r->default_ctrl)
return false;
- first_bit = find_first_bit(&var, cbm_len);
- zero_bit = find_next_zero_bit(&var, cbm_len, first_bit);
+ first_bit = find_first_bit(&val, cbm_len);
+ zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
- if (find_next_bit(&var, cbm_len, zero_bit) < cbm_len)
+ if (find_next_bit(&val, cbm_len, zero_bit) < cbm_len)
return false;
if ((zero_bit - first_bit) < r->cache.min_cbm_bits)
return false;
+
+ *data = val;
return true;
}
@@ -57,18 +64,14 @@ static bool cbm_validate(unsigned long var, struct rdt_resource *r)
* Read one cache bit mask (hex). Check that it is valid for the current
* resource type.
*/
-static int parse_cbm(char *buf, struct rdt_resource *r, struct rdt_domain *d)
+int parse_cbm(char *buf, struct rdt_resource *r, struct rdt_domain *d)
{
unsigned long data;
- int ret;
if (d->have_new_ctrl)
return -EINVAL;
- ret = kstrtoul(buf, 16, &data);
- if (ret)
- return ret;
- if (!cbm_validate(data, r))
+ if(!cbm_validate(buf, &data, r))
return -EINVAL;
d->new_ctrl = data;
d->have_new_ctrl = true;
@@ -97,7 +100,7 @@ next:
return -EINVAL;
list_for_each_entry(d, &r->domains, list) {
if (d->id == dom_id) {
- if (parse_cbm(dom, r, d))
+ if (r->parse_ctrlval(dom, r, d))
return -EINVAL;
goto next;
}
@@ -208,7 +211,7 @@ static void show_doms(struct seq_file *s, struct rdt_resource *r, int closid)
list_for_each_entry(dom, &r->domains, list) {
if (sep)
seq_puts(s, ";");
- seq_printf(s, "%d=%0*x", dom->id, max_data_width,
+ seq_printf(s, r->format_str, dom->id, max_data_width,
dom->ctrl_val[closid]);
sep = true;
}
Commit-ID: 64e8ed3d4a6dcd6139a869a3e760e625cb0d3022
Gitweb: http://git.kernel.org/tip/64e8ed3d4a6dcd6139a869a3e760e625cb0d3022
Author: Vikas Shivappa <[email protected]>
AuthorDate: Fri, 7 Apr 2017 17:33:57 -0700
Committer: Thomas Gleixner <[email protected]>
CommitDate: Fri, 14 Apr 2017 16:10:09 +0200
x86/intel_rdt/mba: Add schemata file support for MBA
Add support to update the MBA bandwidth values for the domains via the
schemata file.
- Verify that the bandwidth value is valid
- Round to the next control step depending on the bandwidth granularity of
the hardware
- Convert the bandwidth to delay values and write the delay values to
the corresponding domain PQOS_MSRs.
[ tglx: Massaged changelog ]
Signed-off-by: Vikas Shivappa <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Thomas Gleixner <[email protected]>
---
arch/x86/include/asm/intel_rdt.h | 1 +
arch/x86/kernel/cpu/intel_rdt.c | 2 ++
arch/x86/kernel/cpu/intel_rdt_schemata.c | 43 ++++++++++++++++++++++++++++++++
3 files changed, 46 insertions(+)
diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index 4a90057..bd184e1 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -181,6 +181,7 @@ struct rdt_resource {
void rdt_get_cache_infofile(struct rdt_resource *r);
void rdt_get_mba_infofile(struct rdt_resource *r);
int parse_cbm(char *buf, struct rdt_resource *r, struct rdt_domain *d);
+int parse_bw(char *buf, struct rdt_resource *r, struct rdt_domain *d);
extern struct mutex rdtgroup_mutex;
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 1e410ea..731f70a 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -116,6 +116,8 @@ struct rdt_resource rdt_resources_all[] = {
.msr_base = IA32_MBA_THRTL_BASE,
.msr_update = mba_wrmsr,
.cache_level = 3,
+ .parse_ctrlval = parse_bw,
+ .format_str = "%d=%*d",
},
};
diff --git a/arch/x86/kernel/cpu/intel_rdt_schemata.c b/arch/x86/kernel/cpu/intel_rdt_schemata.c
index c72c9cc..9467a00 100644
--- a/arch/x86/kernel/cpu/intel_rdt_schemata.c
+++ b/arch/x86/kernel/cpu/intel_rdt_schemata.c
@@ -29,6 +29,49 @@
#include <asm/intel_rdt.h>
/*
+ * Check whether MBA bandwidth percentage value is correct. The value is
+ * checked against the minimum and max bandwidth values specified by the
+ * hardware. The allocated bandwidth percentage is rounded to the next
+ * control step available on the hardware.
+ */
+static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r)
+{
+ unsigned long bw;
+ int ret;
+
+ /*
+ * Only linear delay values is supported for current Intel SKUs.
+ */
+ if (!r->membw.delay_linear)
+ return false;
+
+ ret = kstrtoul(buf, 10, &bw);
+ if (ret)
+ return false;
+
+ if (bw < r->membw.min_bw || bw > r->default_ctrl)
+ return false;
+
+ *data = roundup(bw, (unsigned long)r->membw.bw_gran);
+ return true;
+}
+
+int parse_bw(char *buf, struct rdt_resource *r, struct rdt_domain *d)
+{
+ unsigned long data;
+
+ if (d->have_new_ctrl)
+ return -EINVAL;
+
+ if (!bw_validate(buf, &data, r))
+ return -EINVAL;
+ d->new_ctrl = data;
+ d->have_new_ctrl = true;
+
+ return 0;
+}
+
+/*
* Check whether a cache bit mask is valid. The SDM says:
* Please note that all (and only) contiguous '1' combinations
* are allowed (e.g. FFFFH, 0FF0H, 003CH, etc.).
On Thu, 13 Apr 2017, Thomas Gleixner wrote:
> On Wed, 12 Apr 2017, Shivappa Vikas wrote:
> > This series has minor changes with respect to V3 addressing all your comments.
> > Was wondering if there was any feedback or if we still have a chance for 4.12.
>
> It's on my radar and should make it, unless there is some major hickup.
To be honest, I almost dropped it because as usual you cobbled it together
in a hurry just to get it out the door.
I asked for putting the CBM and MBA related data into seperate structs and
make a anon union of them in struct rdt_resource. Instead you went and made
it a anon union of anon structs, so you did not have to change anything
else in the code. What's the point of this? That's a completely useless
exercise and even worse than the data lump which was there before.
I also asked several times in the past to split preparatory stuff from new
stuff. No, the msr_update crap comes in one go. You introduce an update
function and instead of replacing _ALL_ loops you keep one and then fix it
up in some completely unrelated patch.
The ordering of the new struct members was also completely random along
with the kernel doc comments not being aligned.
As a bonus, you reimplemented roundup() open coded in the bandwidth
validation function.
Instead of wasting my time for another round of review and another delivery
of half baken crap, I fixed it up myself. The result is pushed out to
tip/x86/cpu.
Please do the following:
1) Verify that it still works as I have no hardware to test it. Once you
confirmed, it's going to show up in -next. So please do that ASAP,
i.e. yesterday.
2) Go through the patches one by one and compare it to your own to figure
out yourself how it should be done. Next time, I'm simply going to drop
such crap whether that makes it miss the merge window or not.
Yours grumpy
tglx
On Fri, 14 Apr 2017, Thomas Gleixner wrote:
> On Thu, 13 Apr 2017, Thomas Gleixner wrote:
>
>> On Wed, 12 Apr 2017, Shivappa Vikas wrote:
>>> This series has minor changes with respect to V3 addressing all your comments.
>>> Was wondering if there was any feedback or if we still have a chance for 4.12.
>>
>> It's on my radar and should make it, unless there is some major hickup.
>
> To be honest, I almost dropped it because as usual you cobbled it together
> in a hurry just to get it out the door.
>
> I asked for putting the CBM and MBA related data into seperate structs and
> make a anon union of them in struct rdt_resource. Instead you went and made
> it a anon union of anon structs, so you did not have to change anything
> else in the code. What's the point of this? That's a completely useless
> exercise and even worse than the data lump which was there before.
>
> I also asked several times in the past to split preparatory stuff from new
> stuff. No, the msr_update crap comes in one go. You introduce an update
> function and instead of replacing _ALL_ loops you keep one and then fix it
> up in some completely unrelated patch.
>
> The ordering of the new struct members was also completely random along
> with the kernel doc comments not being aligned.
>
> As a bonus, you reimplemented roundup() open coded in the bandwidth
> validation function.
>
> Instead of wasting my time for another round of review and another delivery
> of half baken crap, I fixed it up myself. The result is pushed out to
> tip/x86/cpu.
>
> Please do the following:
>
> 1) Verify that it still works as I have no hardware to test it. Once you
> confirmed, it's going to show up in -next. So please do that ASAP,
> i.e. yesterday.
>
> 2) Go through the patches one by one and compare it to your own to figure
> out yourself how it should be done. Next time, I'm simply going to drop
> such crap whether that makes it miss the merge window or not.
Ok doing the testing now. Will update soon.
Also will followup with the type of changes and implement the same convention in
the future patches.
Thanks,
Vikas
>
> Yours grumpy
>
> tglx
>
On Fri, 14 Apr 2017, Shivappa Vikas wrote:
>
>
> On Fri, 14 Apr 2017, Thomas Gleixner wrote:
>>
>> Please do the following:
>>
>> 1) Verify that it still works as I have no hardware to test it. Once you
>> confirmed, it's going to show up in -next. So please do that ASAP,
>> i.e. yesterday.
>>
>> 2) Go through the patches one by one and compare it to your own to figure
>> out yourself how it should be done. Next time, I'm simply going to drop
>> such crap whether that makes it miss the merge window or not.
>
> Ok doing the testing now. Will update soon.
> Also will followup with the type of changes and implement the same convention
> in the future patches.
All your changes are tested to be fine when used on the hardware and function
as before. Although we did discover some minor parsing issues (which existed in
the version i sent originally..) and will send a fix on top of
current tip/x86/cpu soon.
Thanks,
Vikas
>
> Thanks,
> Vikas
>
>>
>> Yours grumpy
>>
>> tglx
>>
>