2008-07-18 21:00:55

by Langsdorf, Mark

[permalink] [raw]
Subject: [PATCH 01/01] x86: L3 cache index disable for 2.6.26

New versions of AMD processors have support to disable parts
of their L3 caches if too many MCEs are generated by the
L3 cache.

This patch provides a /sysfs interface under the cache
hierarchy to display which caches indices are disabled
(if any) and to monitoring applications to disable a
cache index.

This patch does not set an automatic policy to disable
the L3 cache. Policy decisions would need to be made
by a RAS handler. This patch merely makes it easier to
see what indices are currently disabled.

Signed-off-by: Mark Langsdorf <[email protected]>

---

-Mark Langsdorf
Operating System Research Center
AMD


diff -r 3836aaac6e15 arch/x86/kernel/cpu/intel_cacheinfo.c
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c Tue Jul 15 14:55:36 2008 -0500
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c Thu Jul 17 16:18:40 2008 -0500
@@ -16,6 +16,7 @@

#include <asm/processor.h>
#include <asm/smp.h>
+#include <asm/k8.h>

#define LVL_1_INST 1
#define LVL_1_DATA 2
@@ -129,6 +130,7 @@ struct _cpuid4_info {
union _cpuid4_leaf_ebx ebx;
union _cpuid4_leaf_ecx ecx;
unsigned long size;
+ unsigned long can_disable;
cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */
};

@@ -250,6 +252,13 @@ static void __cpuinit amd_cpuid4(int lea
(ebx->split.ways_of_associativity + 1) - 1;
}

+static void __cpuinit amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf)
+{
+ if (index < 3)
+ return;
+ this_leaf->can_disable = 1;
+}
+
static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
{
union _cpuid4_leaf_eax eax;
@@ -257,9 +266,12 @@ static int __cpuinit cpuid4_cache_lookup
union _cpuid4_leaf_ecx ecx;
unsigned edx;

- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
amd_cpuid4(index, &eax, &ebx, &ecx);
- else
+ if (boot_cpu_data.x86 >= 0x10)
+ amd_check_l3_disable(index, this_leaf);
+
+ } else
cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
if (eax.split.type == CACHE_TYPE_NULL)
return -EIO; /* better error ? */
@@ -636,6 +648,61 @@ static ssize_t show_type(struct _cpuid4_
}
}

+#define to_object(k) container_of(k, struct _index_kobject, kobj)
+#define to_attr(a) container_of(a, struct _cache_attr, attr)
+
+static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf)
+{
+ struct pci_dev *dev;
+ if (this_leaf->can_disable) {
+ int i;
+ ssize_t ret = 0;
+ int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
+ dev = k8_northbridges[node];
+
+ for (i = 0; i < 2; i++) {
+ unsigned int reg;
+ pci_read_config_dword(dev, 0x1BC + i * 4, &reg);
+ ret += sprintf(buf, "%sEntry: %d\n", buf, i);
+ ret += sprintf(buf, "%sReads: %s\tNew Entries: %s\n",
+ buf,
+ reg & 0x80000000 ? "Disabled" : "Allowed",
+ reg & 0x40000000 ? "Disabled" : "Allowed");
+ ret += sprintf(buf, "%sSubCache: %x\tIndex: %x\n", buf,
+ (reg & 0x30000) >> 16, reg & 0xfff);
+
+ }
+ return ret;
+ }
+ return sprintf(buf, "Feature not enabled\n");
+}
+
+static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, size_t count)
+{
+ struct pci_dev *dev;
+ if (this_leaf->can_disable) {
+ /* write the MSR value */
+ unsigned int ret;
+ unsigned int index, val;
+ int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
+ dev = k8_northbridges[node];
+
+ if (strlen(buf) > 15)
+ return -EINVAL;
+ ret = sscanf(buf, "%x %x", &index, &val);
+ if (ret != 2)
+ return -EINVAL;
+ if (index > 1)
+ return -EINVAL;
+ val |= 0xc0000000;
+ pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000);
+ wbinvd();
+ pci_write_config_dword(dev, 0x1BC + index * 4, val);
+ return 1;
+ }
+ return 0;
+}
+
struct _cache_attr {
struct attribute attr;
ssize_t (*show)(struct _cpuid4_info *, char *);
@@ -655,6 +722,8 @@ define_one_ro(size);
define_one_ro(size);
define_one_ro(shared_cpu_map);
define_one_ro(shared_cpu_list);
+
+static struct _cache_attr cache_disable = __ATTR(cache_disable, 0644, show_cache_disable, store_cache_disable);

static struct attribute * default_attrs[] = {
&type.attr,
@@ -666,11 +735,9 @@ static struct attribute * default_attrs[
&size.attr,
&shared_cpu_map.attr,
&shared_cpu_list.attr,
+ &cache_disable.attr,
NULL
};
-
-#define to_object(k) container_of(k, struct _index_kobject, kobj)
-#define to_attr(a) container_of(a, struct _cache_attr, attr)

static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf)
{
@@ -688,7 +755,15 @@ static ssize_t store(struct kobject * ko
static ssize_t store(struct kobject * kobj, struct attribute * attr,
const char * buf, size_t count)
{
- return 0;
+ struct _cache_attr *fattr = to_attr(attr);
+ struct _index_kobject *this_leaf = to_object(kobj);
+ ssize_t ret;
+
+ ret = fattr->store ?
+ fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
+ buf, count) :
+ 0;
+ return ret;
}

static struct sysfs_ops sysfs_ops = {

-------------------------------------------------------


2008-07-21 11:37:38

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 01/01] x86: L3 cache index disable for 2.6.26


* Mark Langsdorf <[email protected]> wrote:

> New versions of AMD processors have support to disable parts of their
> L3 caches if too many MCEs are generated by the L3 cache.
>
> This patch provides a /sysfs interface under the cache hierarchy to
> display which caches indices are disabled (if any) and to monitoring
> applications to disable a cache index.
>
> This patch does not set an automatic policy to disable the L3 cache.
> Policy decisions would need to be made by a RAS handler. This patch
> merely makes it easier to see what indices are currently disabled.
>
> Signed-off-by: Mark Langsdorf <[email protected]>

applied to tip/x86/cpu, thanks Mark.

I've done some coding style fixes for the new functions you've
introduced, see that commit below.

Ingo

------------------->
commit 6cec0203a915739140c0a7fdf3498aa417123501
Author: Ingo Molnar <[email protected]>
Date: Mon Jul 21 13:34:21 2008 +0200

x86: L3 cache index disable for 2.6.26, cleanups

No change in functionality.

Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/kernel/cpu/intel_cacheinfo.c | 115 +++++++++++++++++---------------
1 files changed, 61 insertions(+), 54 deletions(-)

diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 503c847..491892c 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -253,14 +253,16 @@ static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
(ebx->split.ways_of_associativity + 1) - 1;
}

-static void __cpuinit amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf)
+static void __cpuinit
+amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf)
{
if (index < 3)
return;
this_leaf->can_disable = 1;
}

-static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
+static int
+__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
{
union _cpuid4_leaf_eax eax;
union _cpuid4_leaf_ebx ebx;
@@ -271,19 +273,20 @@ static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_le
amd_cpuid4(index, &eax, &ebx, &ecx);
if (boot_cpu_data.x86 >= 0x10)
amd_check_l3_disable(index, this_leaf);
-
- } else
- cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
+ } else {
+ cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
+ }
+
if (eax.split.type == CACHE_TYPE_NULL)
return -EIO; /* better error ? */

this_leaf->eax = eax;
this_leaf->ebx = ebx;
this_leaf->ecx = ecx;
- this_leaf->size = (ecx.split.number_of_sets + 1) *
- (ebx.split.coherency_line_size + 1) *
- (ebx.split.physical_line_partition + 1) *
- (ebx.split.ways_of_associativity + 1);
+ this_leaf->size = (ecx.split.number_of_sets + 1) *
+ (ebx.split.coherency_line_size + 1) *
+ (ebx.split.physical_line_partition + 1) *
+ (ebx.split.ways_of_associativity + 1);
return 0;
}

@@ -649,59 +652,63 @@ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) {
}
}

-#define to_object(k) container_of(k, struct _index_kobject, kobj)
-#define to_attr(a) container_of(a, struct _cache_attr, attr)
+#define to_object(k) container_of(k, struct _index_kobject, kobj)
+#define to_attr(a) container_of(a, struct _cache_attr, attr)

static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf)
{
- struct pci_dev *dev;
- if (this_leaf->can_disable) {
- int i;
- ssize_t ret = 0;
- int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
- dev = k8_northbridges[node];
-
- for (i = 0; i < 2; i++) {
- unsigned int reg;
- pci_read_config_dword(dev, 0x1BC + i * 4, &reg);
- ret += sprintf(buf, "%sEntry: %d\n", buf, i);
- ret += sprintf(buf, "%sReads: %s\tNew Entries: %s\n",
- buf,
- reg & 0x80000000 ? "Disabled" : "Allowed",
- reg & 0x40000000 ? "Disabled" : "Allowed");
- ret += sprintf(buf, "%sSubCache: %x\tIndex: %x\n", buf,
- (reg & 0x30000) >> 16, reg & 0xfff);
+ int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
+ struct pci_dev *dev = k8_northbridges[node];
+ ssize_t ret = 0;
+ int i;

- }
- return ret;
+ if (!this_leaf->can_disable)
+ return sprintf(buf, "Feature not enabled\n");
+
+ for (i = 0; i < 2; i++) {
+ unsigned int reg;
+
+ pci_read_config_dword(dev, 0x1BC + i * 4, &reg);
+
+ ret += sprintf(buf, "%sEntry: %d\n", buf, i);
+ ret += sprintf(buf, "%sReads: %s\tNew Entries: %s\n",
+ buf,
+ reg & 0x80000000 ? "Disabled" : "Allowed",
+ reg & 0x40000000 ? "Disabled" : "Allowed");
+ ret += sprintf(buf, "%sSubCache: %x\tIndex: %x\n",
+ buf, (reg & 0x30000) >> 16, reg & 0xfff);
}
- return sprintf(buf, "Feature not enabled\n");
+ return ret;
}

-static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, size_t count)
+static ssize_t
+store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf,
+ size_t count)
{
- struct pci_dev *dev;
- if (this_leaf->can_disable) {
- /* write the MSR value */
- unsigned int ret;
- unsigned int index, val;
- int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
- dev = k8_northbridges[node];
-
- if (strlen(buf) > 15)
- return -EINVAL;
- ret = sscanf(buf, "%x %x", &index, &val);
- if (ret != 2)
- return -EINVAL;
- if (index > 1)
- return -EINVAL;
- val |= 0xc0000000;
- pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000);
- wbinvd();
- pci_write_config_dword(dev, 0x1BC + index * 4, val);
- return 1;
- }
- return 0;
+ int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
+ struct pci_dev *dev = k8_northbridges[node];
+ unsigned int ret, index, val;
+
+ if (!this_leaf->can_disable)
+ return 0;
+
+ /* write the MSR value */
+
+ if (strlen(buf) > 15)
+ return -EINVAL;
+
+ ret = sscanf(buf, "%x %x", &index, &val);
+ if (ret != 2)
+ return -EINVAL;
+ if (index > 1)
+ return -EINVAL;
+
+ val |= 0xc0000000;
+ pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000);
+ wbinvd();
+ pci_write_config_dword(dev, 0x1BC + index * 4, val);
+
+ return 1;
}

struct _cache_attr {

2008-07-21 12:48:46

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 01/01] x86: L3 cache index disable for 2.6.26


* Ingo Molnar <[email protected]> wrote:

> > This patch provides a /sysfs interface under the cache hierarchy to
> > display which caches indices are disabled (if any) and to monitoring
> > applications to disable a cache index.
> >
> > This patch does not set an automatic policy to disable the L3 cache.
> > Policy decisions would need to be made by a RAS handler. This patch
> > merely makes it easier to see what indices are currently disabled.
> >
> > Signed-off-by: Mark Langsdorf <[email protected]>
>
> applied to tip/x86/cpu, thanks Mark.
>
> I've done some coding style fixes for the new functions you've
> introduced, see that commit below.

-tip testing found the following build failure:

arch/x86/kernel/built-in.o: In function `show_cache_disable':
intel_cacheinfo.c:(.text+0xbbf2): undefined reference to `k8_northbridges'
arch/x86/kernel/built-in.o: In function `store_cache_disable':
intel_cacheinfo.c:(.text+0xbd91): undefined reference to `k8_northbridges'

please send a delta fix patch against the tip/x86/cpu branch:

http://people.redhat.com/mingo/tip.git/README

which has your patch plus the cleanup applied.

Ingo


Attachments:
(No filename) (1.13 kB)
config (56.85 kB)
Download all attachments

2008-07-22 18:02:27

by Langsdorf, Mark

[permalink] [raw]
Subject: Re: [PATCH 01/01] x86: L3 cache index disable for 2.6.26

On Monday 21 July 2008, Ingo Molnar wrote:
> > applied to tip/x86/cpu, thanks Mark.
> >
> > I've done some coding style fixes for the new functions you've
> > introduced, see that commit below.
>
> -tip testing found the following build failure:
>
> arch/x86/kernel/built-in.o: In function `show_cache_disable':
> intel_cacheinfo.c:(.text+0xbbf2): undefined reference to `k8_northbridges'
> arch/x86/kernel/built-in.o: In function `store_cache_disable':
> intel_cacheinfo.c:(.text+0xbd91): undefined reference to `k8_northbridges'
>
> please send a delta fix patch against the tip/x86/cpu branch:
>
> http://people.redhat.com/mingo/tip.git/README
>
> which has your patch plus the cleanup applied.

delta fix patch follows. It removes the dependency on k8_northbridges.

-Mark Langsdorf
Operating System Research Center
AMD

Signed-off-by: Mark Langsdorf <[email protected]>

diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 491892c..08ee65a 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -13,10 +13,10 @@
#include <linux/compiler.h>
#include <linux/cpu.h>
#include <linux/sched.h>
+#include <linux/pci.h>

#include <asm/processor.h>
#include <asm/smp.h>
-#include <asm/k8.h>

#define LVL_1_INST 1
#define LVL_1_DATA 2
@@ -135,6 +135,12 @@ struct _cpuid4_info {
cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */
};

+static struct pci_device_id k8_nb_id[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) },
+ {}
+};
+
unsigned short num_cache_leaves;

/* AMD doesn't have CPUID4. Emulate it here to report the same
@@ -655,16 +661,39 @@ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) {
#define to_object(k) container_of(k, struct _index_kobject, kobj)
#define to_attr(a) container_of(a, struct _cache_attr, attr)

+static struct pci_dev *get_k8_northbridge(int node)
+{
+ struct pci_dev *dev = NULL;
+ int i;
+
+ for (i = 0; i <= node; i++) {
+ do {
+ dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
+ if (!dev)
+ break;
+ } while (!pci_match_id(&k8_nb_id[0], dev));
+ if (!dev)
+ break;
+ }
+ return dev;
+}
+
static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf)
{
int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
- struct pci_dev *dev = k8_northbridges[node];
+ struct pci_dev *dev = NULL;
ssize_t ret = 0;
int i;

if (!this_leaf->can_disable)
return sprintf(buf, "Feature not enabled\n");

+ dev = get_k8_northbridge(node);
+ if (!dev) {
+ printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n");
+ return -EINVAL;
+ }
+
for (i = 0; i < 2; i++) {
unsigned int reg;

@@ -686,14 +715,12 @@ store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf,
size_t count)
{
int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
- struct pci_dev *dev = k8_northbridges[node];
+ struct pci_dev *dev = NULL;
unsigned int ret, index, val;

if (!this_leaf->can_disable)
return 0;

- /* write the MSR value */
-
if (strlen(buf) > 15)
return -EINVAL;

@@ -704,6 +731,12 @@ store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf,
return -EINVAL;

val |= 0xc0000000;
+ dev = get_k8_northbridge(node);
+ if (!dev) {
+ printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n");
+ return -EINVAL;
+ }
+
pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000);
wbinvd();
pci_write_config_dword(dev, 0x1BC + index * 4, val);

2008-07-28 14:24:18

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 01/01] x86: L3 cache index disable for 2.6.26


* Mark Langsdorf <[email protected]> wrote:

> On Monday 21 July 2008, Ingo Molnar wrote:
> > > applied to tip/x86/cpu, thanks Mark.
> > >
> > > I've done some coding style fixes for the new functions you've
> > > introduced, see that commit below.
> >
> > -tip testing found the following build failure:
> >
> > arch/x86/kernel/built-in.o: In function `show_cache_disable':
> > intel_cacheinfo.c:(.text+0xbbf2): undefined reference to `k8_northbridges'
> > arch/x86/kernel/built-in.o: In function `store_cache_disable':
> > intel_cacheinfo.c:(.text+0xbd91): undefined reference to `k8_northbridges'
> >
> > please send a delta fix patch against the tip/x86/cpu branch:
> >
> > http://people.redhat.com/mingo/tip.git/README
> >
> > which has your patch plus the cleanup applied.
>
> delta fix patch follows. It removes the dependency on k8_northbridges.

applied, thanks. I fixed up some whitespace damage, see below. That file
could use a thorough cleanup ...

Ingo

---------------------->
Subject: re: x86 l3 cache index disable for 2 6 26 fix
From: Ingo Molnar <[email protected]>
Date: Mon Jul 28 16:20:08 CEST 2008

Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/kernel/cpu/intel_cacheinfo.c | 39 +++++++++++++++++-----------------
1 file changed, 20 insertions(+), 19 deletions(-)

Index: tip/arch/x86/kernel/cpu/intel_cacheinfo.c
===================================================================
--- tip.orig/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ tip/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -1,8 +1,8 @@
/*
- * Routines to indentify caches on Intel CPU.
+ * Routines to indentify caches on Intel CPU.
*
- * Changes:
- * Venkatesh Pallipadi : Adding cache identification through cpuid(4)
+ * Changes:
+ * Venkatesh Pallipadi : Adding cache identification through cpuid(4)
* Ashok Raj <[email protected]>: Work with CPU hotplug infrastructure.
* Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD.
*/
@@ -136,9 +136,9 @@ struct _cpuid4_info {
};

static struct pci_device_id k8_nb_id[] = {
- { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) },
- { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) },
- {}
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) },
+ {}
};

unsigned short num_cache_leaves;
@@ -190,9 +190,10 @@ static unsigned short assocs[] __cpuinit
static unsigned char levels[] __cpuinitdata = { 1, 1, 2, 3 };
static unsigned char types[] __cpuinitdata = { 1, 2, 3, 3 };

-static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
- union _cpuid4_leaf_ebx *ebx,
- union _cpuid4_leaf_ecx *ecx)
+static void __cpuinit
+amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
+ union _cpuid4_leaf_ebx *ebx,
+ union _cpuid4_leaf_ecx *ecx)
{
unsigned dummy;
unsigned line_size, lines_per_tag, assoc, size_in_kb;
@@ -264,7 +265,7 @@ amd_check_l3_disable(int index, struct _
{
if (index < 3)
return;
- this_leaf->can_disable = 1;
+ this_leaf->can_disable = 1;
}

static int
@@ -474,7 +475,7 @@ unsigned int __cpuinit init_intel_cachei

/* pointer to _cpuid4_info array (for each cache leaf) */
static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info);
-#define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y]))
+#define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y]))

#ifdef CONFIG_SMP
static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
@@ -511,7 +512,7 @@ static void __cpuinit cache_remove_share

this_leaf = CPUID4_INFO_IDX(cpu, index);
for_each_cpu_mask(sibling, this_leaf->shared_cpu_map) {
- sibling_leaf = CPUID4_INFO_IDX(sibling, index);
+ sibling_leaf = CPUID4_INFO_IDX(sibling, index);
cpu_clear(cpu, sibling_leaf->shared_cpu_map);
}
}
@@ -593,7 +594,7 @@ struct _index_kobject {

/* pointer to array of kobjects for cpuX/cache/indexY */
static DEFINE_PER_CPU(struct _index_kobject *, index_kobject);
-#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(index_kobject, x))[y]))
+#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(index_kobject, x))[y]))

#define show_one_plus(file_name, object, val) \
static ssize_t show_##file_name \
@@ -675,7 +676,7 @@ static struct pci_dev *get_k8_northbridg
if (!dev)
break;
}
- return dev;
+ return dev;
}

static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf)
@@ -736,7 +737,7 @@ store_cache_disable(struct _cpuid4_info
printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n");
return -EINVAL;
}
-
+
pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000);
wbinvd();
pci_write_config_dword(dev, 0x1BC + index * 4, val);
@@ -789,7 +790,7 @@ static ssize_t show(struct kobject * kob
ret = fattr->show ?
fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
buf) :
- 0;
+ 0;
return ret;
}

@@ -800,9 +801,9 @@ static ssize_t store(struct kobject * ko
struct _index_kobject *this_leaf = to_object(kobj);
ssize_t ret;

- ret = fattr->store ?
- fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
- buf, count) :
+ ret = fattr->store ?
+ fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
+ buf, count) :
0;
return ret;
}

2008-07-28 14:49:48

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 01/01] x86: L3 cache index disable for 2.6.26


* Ingo Molnar <[email protected]> wrote:

> > > please send a delta fix patch against the tip/x86/cpu branch:
> > >
> > > http://people.redhat.com/mingo/tip.git/README
> > >
> > > which has your patch plus the cleanup applied.
> >
> > delta fix patch follows. It removes the dependency on k8_northbridges.
>
> applied, thanks. I fixed up some whitespace damage, see below. That
> file could use a thorough cleanup ...

it still doesnt work, on !PCI:

arch/x86/kernel/cpu/intel_cacheinfo.c: In function 'get_k8_northbridge':
arch/x86/kernel/cpu/intel_cacheinfo.c:675: error: implicit declaration of function 'pci_match_id'
make[2]: *** [arch/x86/kernel/cpu/intel_cacheinfo.o] Error 1

config and fix attached.

Ingo

-------------->
>From 7571a249c8d8e577c1811ab12a06bc106ab466b7 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <[email protected]>
Date: Mon, 28 Jul 2008 16:45:49 +0200
Subject: [PATCH] x86: L3 cache index disable for 2.6.26, fix #2

fix !PCI build failure:

arch/x86/kernel/cpu/intel_cacheinfo.c: In function 'get_k8_northbridge':
arch/x86/kernel/cpu/intel_cacheinfo.c:675: error: implicit declaration of function 'pci_match_id'

Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/kernel/cpu/intel_cacheinfo.c | 9 +++++++++
1 files changed, 9 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 0ea539f..3f46afb 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -135,11 +135,13 @@ struct _cpuid4_info {
cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */
};

+#ifdef CONFIG_PCI
static struct pci_device_id k8_nb_id[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) },
{}
};
+#endif

unsigned short num_cache_leaves;

@@ -662,6 +664,7 @@ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) {
#define to_object(k) container_of(k, struct _index_kobject, kobj)
#define to_attr(a) container_of(a, struct _cache_attr, attr)

+#ifdef CONFIG_PCI
static struct pci_dev *get_k8_northbridge(int node)
{
struct pci_dev *dev = NULL;
@@ -678,6 +681,12 @@ static struct pci_dev *get_k8_northbridge(int node)
}
return dev;
}
+#else
+static struct pci_dev *get_k8_northbridge(int node)
+{
+ return NULL;
+}
+#endif

static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf)
{


Attachments:
(No filename) (2.37 kB)
config (41.79 kB)
Download all attachments

2008-07-28 14:57:33

by Langsdorf, Mark

[permalink] [raw]
Subject: RE: [PATCH 01/01] x86: L3 cache index disable for 2.6.26

> > > > please send a delta fix patch against the tip/x86/cpu branch:
> > > >
> > > > http://people.redhat.com/mingo/tip.git/README
> > > >
> > > > which has your patch plus the cleanup applied.
> > >
> > > delta fix patch follows. It removes the dependency on
> k8_northbridges.
> >
> > applied, thanks. I fixed up some whitespace damage, see below. That
> > file could use a thorough cleanup ...
>
> it still doesnt work, on !PCI:
>
> arch/x86/kernel/cpu/intel_cacheinfo.c: In function
> 'get_k8_northbridge':
> arch/x86/kernel/cpu/intel_cacheinfo.c:675: error: implicit
> declaration of function 'pci_match_id'
> make[2]: *** [arch/x86/kernel/cpu/intel_cacheinfo.o] Error 1
>
> config and fix attached.

Thanks!

-Mark Langsdorf
Operating System Research Center
AMD

2008-08-08 22:13:01

by Pavel Machek

[permalink] [raw]
Subject: Re: [PATCH 01/01] x86: L3 cache index disable for 2.6.26

Hi!

> New versions of AMD processors have support to disable parts
> of their L3 caches if too many MCEs are generated by the
> L3 cache.
>
> This patch provides a /sysfs interface under the cache
> hierarchy to display which caches indices are disabled
> (if any) and to monitoring applications to disable a
> cache index.
...

> +static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf)
> +{
> + struct pci_dev *dev;
> + if (this_leaf->can_disable) {
> + int i;
> + ssize_t ret = 0;
> + int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
> + dev = k8_northbridges[node];
> +
> + for (i = 0; i < 2; i++) {
> + unsigned int reg;
> + pci_read_config_dword(dev, 0x1BC + i * 4, &reg);
> + ret += sprintf(buf, "%sEntry: %d\n", buf, i);
> + ret += sprintf(buf, "%sReads: %s\tNew Entries: %s\n",
> + buf,
> + reg & 0x80000000 ? "Disabled" : "Allowed",
> + reg & 0x40000000 ? "Disabled" : "Allowed");
> + ret += sprintf(buf, "%sSubCache: %x\tIndex: %x\n", buf,
> + (reg & 0x30000) >> 16, reg & 0xfff);
> +
> + }
> + return ret;
> + }
> + return sprintf(buf, "Feature not enabled\n");
> +}

I think there's one-value-per-file rule in sysfs...

I guess it is better to return -EOPNOTSUP (or something) instead of
english text explaining that...

No, really, what you created is impossible to parse -- /proc like
nightmare.

--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

2008-08-12 15:59:29

by Langsdorf, Mark

[permalink] [raw]
Subject: Re: [PATCH 01/01][retry 1] x86: L3 cache index disable for 2.6.26

On Friday 08 August 2008, Pavel Machek wrote:

> I think there's one-value-per-file rule in sysfs...
>
> I guess it is better to return -EOPNOTSUP (or something) instead of
> english text explaining that...
>
> No, really, what you created is impossible to parse -- /proc like
> nightmare.

Okay, this is a simpler version that includes most of Ingo's
clean-ups and style changes. It only displays the two
cache index values. Is this acceptable?



New versions of AMD processors have support to disable parts
of their L3 caches if too many MCEs are generated by the
L3 cache. ?

This patch provides a /sysfs interface under the cache
hierarchy to display which caches indices are disabled
(if any) and to ALLOW monitoring applications to disable a
cache index.

This patch does not set an automatic policy to disable
the L3 cache. ?Policy decisions would need to be made
by a RAS handler. ?This patch merely makes it easier to
see what indices are currently disabled.

Signed-off-by: Mark Langsdorf <[email protected]>

diff -r f3f819497a68 arch/x86/kernel/cpu/intel_cacheinfo.c
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c Thu Aug 07 04:24:53 2008 -0500
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c Tue Aug 12 05:29:37 2008 -0500
@@ -130,6 +130,7 @@ struct _cpuid4_info {
union _cpuid4_leaf_ebx ebx;
union _cpuid4_leaf_ecx ecx;
unsigned long size;
+ unsigned long can_disable;
cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */
};

@@ -251,6 +252,14 @@ static void __cpuinit amd_cpuid4(int lea
(ebx->split.ways_of_associativity + 1) - 1;
}

+static void __cpuinit
+amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf)
+{
+ if (index < 3)
+ return;
+ this_leaf->can_disable = 1;
+}
+
static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
{
union _cpuid4_leaf_eax eax;
@@ -258,10 +267,13 @@ static int __cpuinit cpuid4_cache_lookup
union _cpuid4_leaf_ecx ecx;
unsigned edx;

- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
amd_cpuid4(index, &eax, &ebx, &ecx);
- else
- cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
+ if (boot_cpu_data.x86 >= 0x10)
+ amd_check_l3_disable(index, this_leaf);
+ } else {
+ cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
+ }
if (eax.split.type == CACHE_TYPE_NULL)
return -EIO; /* better error ? */

@@ -269,9 +281,9 @@ static int __cpuinit cpuid4_cache_lookup
this_leaf->ebx = ebx;
this_leaf->ecx = ecx;
this_leaf->size = (ecx.split.number_of_sets + 1) *
- (ebx.split.coherency_line_size + 1) *
- (ebx.split.physical_line_partition + 1) *
- (ebx.split.ways_of_associativity + 1);
+ (ebx.split.coherency_line_size + 1) *
+ (ebx.split.physical_line_partition + 1) *
+ (ebx.split.ways_of_associativity + 1);
return 0;
}

@@ -574,6 +586,9 @@ static DEFINE_PER_CPU(struct _index_kobj
static DEFINE_PER_CPU(struct _index_kobject *, index_kobject);
#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(index_kobject, x))[y]))

+#define to_object(k) container_of(k, struct _index_kobject, kobj)
+#define to_attr(a) container_of(a, struct _cache_attr, attr)
+
#define show_one_plus(file_name, object, val) \
static ssize_t show_##file_name \
(struct _cpuid4_info *this_leaf, char *buf) \
@@ -618,6 +633,83 @@ static inline ssize_t show_shared_cpu_li
static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf)
{
return show_shared_cpu_map_func(leaf, 1, buf);
+}
+
+#if defined(CONFIG_PCI) && defined(CONFIG_K8_NB)
+#include <linux/pci.h>
+#include <asm/k8.h>
+static struct pci_dev *get_k8_northbridge(int node)
+{
+ return k8_northbridges[node];
+}
+#else
+static inline int pci_write_config_dword(struct pci_dev *dev, int where,
+ u32 val)
+{
+ return 0;
+}
+
+static inline int pci_read_config_dword(struct pci_dev *dev, int where,
+ u32 *val)
+{
+ return 0;
+}
+
+static struct pci_dev *get_k8_northbridge(int node)
+{
+ return NULL;
+}
+#endif
+
+static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf)
+{
+ int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
+ struct pci_dev *dev = get_k8_northbridge(node);
+ ssize_t ret = 0;
+ int i;
+
+ if (!this_leaf->can_disable)
+ return sprintf(buf, "-1");
+
+ for (i = 0; i < 2; i++) {
+ unsigned int reg = 0;
+
+ pci_read_config_dword(dev, 0x1BC + i * 4, &reg);
+
+ ret += sprintf(buf, "%s %x\t", buf, reg);
+ }
+ ret += sprintf(buf,"%s\n", buf);
+
+ return ret;
+}
+
+static ssize_t
+store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf,
+ size_t count)
+{
+ int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
+ struct pci_dev *dev = get_k8_northbridge(node);
+ ssize_t ret = 0;
+ unsigned int index, val;
+
+ if (!this_leaf->can_disable)
+ return 0;
+
+ if (strlen(buf) > 15)
+ return -EINVAL;
+
+ ret = sscanf(buf, "%x %x\n", &index, &val);
+ if (ret != 2)
+ return -EINVAL;
+ if (index > 1)
+ return -EINVAL;
+
+ val |= 0xc0000000;
+ pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000);
+ wbinvd();
+ pci_write_config_dword(dev, 0x1BC + index * 4, val);
+
+ return strlen(buf);
}

static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) {
@@ -657,6 +749,8 @@ define_one_ro(shared_cpu_map);
define_one_ro(shared_cpu_map);
define_one_ro(shared_cpu_list);

+static struct _cache_attr cache_disable = __ATTR(cache_disable, 0644, show_cache_disable, store_cache_disable);
+
static struct attribute * default_attrs[] = {
&type.attr,
&level.attr,
@@ -667,11 +761,9 @@ static struct attribute * default_attrs[
&size.attr,
&shared_cpu_map.attr,
&shared_cpu_list.attr,
+ &cache_disable.attr,
NULL
};
-
-#define to_object(k) container_of(k, struct _index_kobject, kobj)
-#define to_attr(a) container_of(a, struct _cache_attr, attr)

static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf)
{
@@ -689,7 +781,15 @@ static ssize_t store(struct kobject * ko
static ssize_t store(struct kobject * kobj, struct attribute * attr,
const char * buf, size_t count)
{
- return 0;
+ struct _cache_attr *fattr = to_attr(attr);
+ struct _index_kobject *this_leaf = to_object(kobj);
+ ssize_t ret;
+
+ ret = fattr->store ?
+ fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
+ buf, count) :
+ 0;
+ return ret;
}

static struct sysfs_ops sysfs_ops = {

2008-08-12 21:55:56

by Pavel Machek

[permalink] [raw]
Subject: Re: [PATCH 01/01][retry 1] x86: L3 cache index disable for 2.6.26

Hi!

> > I think there's one-value-per-file rule in sysfs...
> >
> > I guess it is better to return -EOPNOTSUP (or something) instead of
> > english text explaining that...
> >
> > No, really, what you created is impossible to parse -- /proc like
> > nightmare.
>
> Okay, this is a simpler version that includes most of Ingo's
> clean-ups and style changes. It only displays the two
> cache index values. Is this acceptable?

Not sure, lets ask greg. And it probably should have few lines
in Documentation going with it, so we know new interface is added and
how it looks.

> +static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf)
> +{
> + int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
> + struct pci_dev *dev = get_k8_northbridge(node);
> + ssize_t ret = 0;
> + int i;
> +
> + if (!this_leaf->can_disable)
> + return sprintf(buf, "-1");

This should return -ERRNO, right?

> + for (i = 0; i < 2; i++) {
> + unsigned int reg = 0;
> +
> + pci_read_config_dword(dev, 0x1BC + i * 4, &reg);
> +
> + ret += sprintf(buf, "%s %x\t", buf, reg);
> + }
> + ret += sprintf(buf,"%s\n", buf);

So you print "buf" few times? Why? And you use both \t and \n as deliminer...

--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

2008-08-12 22:02:17

by Langsdorf, Mark

[permalink] [raw]
Subject: RE: [PATCH 01/01][retry 1] x86: L3 cache index disable for 2.6.26

> > Okay, this is a simpler version that includes most of Ingo's
> > clean-ups and style changes. It only displays the two
> > cache index values. Is this acceptable?
>
> Not sure, lets ask greg. And it probably should have few lines
> in Documentation going with it, so we know new interface is added and
> how it looks.

Okay, I'll add that and resubmit tomorrow.

> > +static ssize_t show_cache_disable(struct _cpuid4_info
> *this_leaf, char *buf)
> > +{
> > + int node =
> cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
> > + struct pci_dev *dev = get_k8_northbridge(node);
> > + ssize_t ret = 0;
> > + int i;
> > +
> > + if (!this_leaf->can_disable)
> > + return sprintf(buf, "-1");
>
> This should return -ERRNO, right?

Right, thanks.

> > + for (i = 0; i < 2; i++) {
> > + unsigned int reg = 0;
> > +
> > + pci_read_config_dword(dev, 0x1BC + i * 4, &reg);
> > +
> > + ret += sprintf(buf, "%s %x\t", buf, reg);
> > + }
> > + ret += sprintf(buf,"%s\n", buf);
>
> So you print "buf" few times? Why? And you use both \t and \n
> as deliminer...

I'm printing the values of the two config registers into
the string buffer, separated by tabs, and terminated by
an EOL. Is there a prefered way to do that instead of
what I have?

-Mark Langsdorf
Operating System Research Center
AMD

2008-08-12 22:06:19

by Pavel Machek

[permalink] [raw]
Subject: Re: [PATCH 01/01][retry 1] x86: L3 cache index disable for 2.6.26


> > > + for (i = 0; i < 2; i++) {
> > > + unsigned int reg = 0;
> > > +
> > > + pci_read_config_dword(dev, 0x1BC + i * 4, &reg);
> > > +
> > > + ret += sprintf(buf, "%s %x\t", buf, reg);
> > > + }
> > > + ret += sprintf(buf,"%s\n", buf);
> >
> > So you print "buf" few times? Why? And you use both \t and \n
> > as deliminer...
>
> I'm printing the values of the two config registers into
> the string buffer, separated by tabs, and terminated by
> an EOL. Is there a prefered way to do that instead of
> what I have?

Hmm, I misparsed that.

Yes, we have some helpers for sysfs writing... SEQ_printf(), IIRC.

Is this even valid C?

ret += sprintf(buf, "%s %x\t", buf, reg);

You are printing into buffer you are passing as argument. That seems
fragile.

Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

2008-08-12 22:56:18

by Greg KH

[permalink] [raw]
Subject: Re: [PATCH 01/01][retry 1] x86: L3 cache index disable for 2.6.26

On Tue, Aug 12, 2008 at 11:56:59PM +0200, Pavel Machek wrote:
> Hi!
>
> > > I think there's one-value-per-file rule in sysfs...
> > >
> > > I guess it is better to return -EOPNOTSUP (or something) instead of
> > > english text explaining that...
> > >
> > > No, really, what you created is impossible to parse -- /proc like
> > > nightmare.
> >
> > Okay, this is a simpler version that includes most of Ingo's
> > clean-ups and style changes. It only displays the two
> > cache index values. Is this acceptable?
>
> Not sure, lets ask greg. And it probably should have few lines
> in Documentation going with it, so we know new interface is added and
> how it looks.

ALL new sysfs files require an entry in Documentation/ABI/ showing what
the file is for and how to use it. See the README file in that
directory for the format to use.

> > +static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf)
> > +{
> > + int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
> > + struct pci_dev *dev = get_k8_northbridge(node);
> > + ssize_t ret = 0;
> > + int i;
> > +
> > + if (!this_leaf->can_disable)
> > + return sprintf(buf, "-1");
>
> This should return -ERRNO, right?
>
> > + for (i = 0; i < 2; i++) {
> > + unsigned int reg = 0;
> > +
> > + pci_read_config_dword(dev, 0x1BC + i * 4, &reg);
> > +
> > + ret += sprintf(buf, "%s %x\t", buf, reg);
> > + }
> > + ret += sprintf(buf,"%s\n", buf);
>
> So you print "buf" few times? Why? And you use both \t and \n as deliminer...

Why are you printing more than one value per sysfs file? That's almost
never allowed.

thanks,

greg k-h

2008-08-12 22:56:31

by Greg KH

[permalink] [raw]
Subject: Re: [PATCH 01/01][retry 1] x86: L3 cache index disable for 2.6.26

On Tue, Aug 12, 2008 at 05:01:46PM -0500, Langsdorf, Mark wrote:
> > > + for (i = 0; i < 2; i++) {
> > > + unsigned int reg = 0;
> > > +
> > > + pci_read_config_dword(dev, 0x1BC + i * 4, &reg);
> > > +
> > > + ret += sprintf(buf, "%s %x\t", buf, reg);
> > > + }
> > > + ret += sprintf(buf,"%s\n", buf);
> >
> > So you print "buf" few times? Why? And you use both \t and \n
> > as deliminer...
>
> I'm printing the values of the two config registers into
> the string buffer, separated by tabs, and terminated by
> an EOL. Is there a prefered way to do that instead of
> what I have?

Yes, two different files, one for each config register.

thanks,

greg k-h

2008-08-13 19:57:44

by Langsdorf, Mark

[permalink] [raw]
Subject: [PATCH 01/01][retry 2] x86: L3 cache index disable for 2.6.26

New versions of AMD processors have support to disable parts
of their L3 caches if too many MCEs are generated by the
L3 cache. ?

This patch provides a /sysfs interface under the cache
hierarchy to display which caches indices are disabled
(if any) and to monitoring applications to disable a
cache index.

This patch does not set an automatic policy to disable
the L3 cache. ?Policy decisions would need to be made
by a RAS handler. ?This patch merely makes it easier to
see what indices are currently disabled.

Signed-off-by: Mark Langsdorf <[email protected]>

diff -r e683983d4dd0 Documentation/ABI/testing/sysfs-devices-cache_disable
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Documentation/ABI/testing/sysfs-devices-cache_disable Wed Aug 13 09:06:52 2008 -0500
@@ -0,0 +1,18 @@
+What: /sys/devices/system/cpu/cpu*/cache/index*/cache_disable_X
+Date: Augsust 2008
+KernelVersion: 2.6.27
+Contact: [email protected]
+Description: These files exist in every cpu's cache index directories.
+ There are currently 2 cache_disable_# files in each
+ directory. Reading from these files on a supported
+ processor will return that cache disable index value
+ for that processor and node. Writing to one of these
+ files will cause the specificed cache index to be disable.
+
+ Currently, only AMD Family 10h Processors support cache index
+ disable, and only for their L3 caches. See the BIOS and
+ Kernel Developer's Guide at
+ http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/31116.PDF
+ for formatting information and other details on the
+ cache index disable.
+Users: [email protected]
diff -r e683983d4dd0 arch/x86/kernel/cpu/intel_cacheinfo.c
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c Tue Aug 12 08:46:38 2008 -0500
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c Wed Aug 13 08:47:59 2008 -0500
@@ -130,6 +130,7 @@ struct _cpuid4_info {
union _cpuid4_leaf_ebx ebx;
union _cpuid4_leaf_ecx ecx;
unsigned long size;
+ unsigned long can_disable;
cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */
};

@@ -251,6 +252,14 @@ static void __cpuinit amd_cpuid4(int lea
(ebx->split.ways_of_associativity + 1) - 1;
}

+static void __cpuinit
+amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf)
+{
+ if (index < 3)
+ return;
+ this_leaf->can_disable = 1;
+}
+
static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
{
union _cpuid4_leaf_eax eax;
@@ -258,10 +267,13 @@ static int __cpuinit cpuid4_cache_lookup
union _cpuid4_leaf_ecx ecx;
unsigned edx;

- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
amd_cpuid4(index, &eax, &ebx, &ecx);
- else
- cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
+ if (boot_cpu_data.x86 >= 0x10)
+ amd_check_l3_disable(index, this_leaf);
+ } else {
+ cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
+ }
if (eax.split.type == CACHE_TYPE_NULL)
return -EIO; /* better error ? */

@@ -269,9 +281,9 @@ static int __cpuinit cpuid4_cache_lookup
this_leaf->ebx = ebx;
this_leaf->ecx = ecx;
this_leaf->size = (ecx.split.number_of_sets + 1) *
- (ebx.split.coherency_line_size + 1) *
- (ebx.split.physical_line_partition + 1) *
- (ebx.split.ways_of_associativity + 1);
+ (ebx.split.coherency_line_size + 1) *
+ (ebx.split.physical_line_partition + 1) *
+ (ebx.split.ways_of_associativity + 1);
return 0;
}

@@ -574,6 +586,9 @@ static DEFINE_PER_CPU(struct _index_kobj
static DEFINE_PER_CPU(struct _index_kobject *, index_kobject);
#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(index_kobject, x))[y]))

+#define to_object(k) container_of(k, struct _index_kobject, kobj)
+#define to_attr(a) container_of(a, struct _cache_attr, attr)
+
#define show_one_plus(file_name, object, val) \
static ssize_t show_##file_name \
(struct _cpuid4_info *this_leaf, char *buf) \
@@ -619,6 +634,92 @@ static inline ssize_t show_shared_cpu_li
{
return show_shared_cpu_map_func(leaf, 1, buf);
}
+
+#if defined(CONFIG_PCI) && defined(CONFIG_K8_NB)
+#include <linux/pci.h>
+#include <asm/k8.h>
+static struct pci_dev *get_k8_northbridge(int node)
+{
+ return k8_northbridges[node];
+}
+#else
+static inline int pci_write_config_dword(struct pci_dev *dev, int where,
+ u32 val)
+{
+ return 0;
+}
+
+static inline int pci_read_config_dword(struct pci_dev *dev, int where,
+ u32 *val)
+{
+ return 0;
+}
+
+static struct pci_dev *get_k8_northbridge(int node)
+{
+ return NULL;
+}
+#endif
+
+static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
+ unsigned int index)
+{
+ int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
+ struct pci_dev *dev = get_k8_northbridge(node);
+ unsigned int reg = 0;
+
+ if (!this_leaf->can_disable)
+ return 0;
+
+ pci_read_config_dword(dev, 0x1BC + index * 4, &reg);
+ return sprintf(buf, "%x\n", reg);
+}
+
+#define SHOW_CACHE_DISABLE(index) \
+static ssize_t \
+show_cache_disable_##index(struct _cpuid4_info *this_leaf, char *buf) \
+{ \
+ return show_cache_disable(this_leaf, buf, index); \
+}
+
+static ssize_t
+store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf,
+ size_t count, unsigned int index)
+{
+ int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
+ struct pci_dev *dev = get_k8_northbridge(node);
+ ssize_t ret = 0;
+ unsigned int val;
+
+ if (!this_leaf->can_disable)
+ return 0;
+
+ if (strlen(buf) > 10)
+ return -EINVAL;
+
+ ret = sscanf(buf, "%x\n", &val);
+ if (ret != 1)
+ return -EINVAL;
+
+ val |= 0xc0000000;
+ pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000);
+ wbinvd();
+ pci_write_config_dword(dev, 0x1BC + index * 4, val);
+ return strlen(buf);
+}
+
+#define STORE_CACHE_DISABLE(index) \
+static ssize_t \
+store_cache_disable_##index(struct _cpuid4_info *this_leaf, \
+ const char *buf, size_t count) \
+{ \
+ return store_cache_disable(this_leaf, buf, count, index); \
+}
+
+SHOW_CACHE_DISABLE(0)
+STORE_CACHE_DISABLE(0)
+SHOW_CACHE_DISABLE(1)
+STORE_CACHE_DISABLE(1)

static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) {
switch(this_leaf->eax.split.type) {
@@ -657,6 +758,10 @@ define_one_ro(shared_cpu_map);
define_one_ro(shared_cpu_map);
define_one_ro(shared_cpu_list);

+static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644, show_cache_disable_0, store_cache_disable_0);
+static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, show_cache_disable_1, store_cache_disable_1);
+
+
static struct attribute * default_attrs[] = {
&type.attr,
&level.attr,
@@ -667,11 +772,10 @@ static struct attribute * default_attrs[
&size.attr,
&shared_cpu_map.attr,
&shared_cpu_list.attr,
+ &cache_disable_0.attr,
+ &cache_disable_1.attr,
NULL
};
-
-#define to_object(k) container_of(k, struct _index_kobject, kobj)
-#define to_attr(a) container_of(a, struct _cache_attr, attr)

static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf)
{
@@ -689,7 +793,15 @@ static ssize_t store(struct kobject * ko
static ssize_t store(struct kobject * kobj, struct attribute * attr,
const char * buf, size_t count)
{
- return 0;
+ struct _cache_attr *fattr = to_attr(attr);
+ struct _index_kobject *this_leaf = to_object(kobj);
+ ssize_t ret;
+
+ ret = fattr->store ?
+ fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
+ buf, count) :
+ 0;
+ return ret;
}

static struct sysfs_ops sysfs_ops = {

2008-08-13 20:37:27

by Pavel Machek

[permalink] [raw]
Subject: Re: [PATCH 01/01][retry 2] x86: L3 cache index disable for 2.6.26

Hi!
> diff -r e683983d4dd0 Documentation/ABI/testing/sysfs-devices-cache_disable
> --- /dev/null Thu Jan 01 00:00:00 1970 +0000
> +++ b/Documentation/ABI/testing/sysfs-devices-cache_disable Wed Aug 13 09:06:52 2008 -0500
> @@ -0,0 +1,18 @@
> +What: /sys/devices/system/cpu/cpu*/cache/index*/cache_disable_X
> +Date: Augsust 2008
> +KernelVersion: 2.6.27
> +Contact: [email protected]
> +Description: These files exist in every cpu's cache index directories.
> + There are currently 2 cache_disable_# files in each
> + directory. Reading from these files on a supported
> + processor will return that cache disable index value
> + for that processor and node. Writing to one of these
> + files will cause the specificed cache index to be disable.

disabled.

> +#if defined(CONFIG_PCI) && defined(CONFIG_K8_NB)
> +#include <linux/pci.h>
> +#include <asm/k8.h>
> +static struct pci_dev *get_k8_northbridge(int node)
> +{
> + return k8_northbridges[node];
> +}
> +#else
> +static inline int pci_write_config_dword(struct pci_dev *dev, int where,
> + u32 val)
> +{
> + return 0;
> +}

Spaces vs. tabs problem visible here. ...

> +static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
> + unsigned int index)
> +{
> + int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
> + struct pci_dev *dev = get_k8_northbridge(node);
> + unsigned int reg = 0;
> +
> + if (!this_leaf->can_disable)
> + return 0;
> +
> + pci_read_config_dword(dev, 0x1BC + index * 4, &reg);
> + return sprintf(buf, "%x\n", reg);
> +}

...and getting serious here.


> +#define SHOW_CACHE_DISABLE(index) \
> +static ssize_t \
> +show_cache_disable_##index(struct _cpuid4_info *this_leaf, char *buf) \
> +{ \
> + return show_cache_disable(this_leaf, buf, index); \
> +}
> +
> +static ssize_t
> +store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf,
> + size_t count, unsigned int index)
> +{
> + int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
> + struct pci_dev *dev = get_k8_northbridge(node);
> + ssize_t ret = 0;
> + unsigned int val;
> +
> + if (!this_leaf->can_disable)
> + return 0;

So if someone asks you to disable part of cache you can't disable, you
return success?

> + if (strlen(buf) > 10)
> + return -EINVAL;
> +
> + ret = sscanf(buf, "%x\n", &val);
> + if (ret != 1)
> + return -EINVAL;
....
> + return strlen(buf);

Is this safe to do? What if buf is not null-terminated?

return count?

Actually, the sscanf is problematic, too, right?


Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

2008-08-13 23:55:34

by Greg KH

[permalink] [raw]
Subject: Re: [PATCH 01/01][retry 2] x86: L3 cache index disable for 2.6.26

On Wed, Aug 13, 2008 at 03:02:42PM -0500, Mark Langsdorf wrote:
> +#if defined(CONFIG_PCI) && defined(CONFIG_K8_NB)
> +#include <linux/pci.h>

Not needed to be hid behind a #if.

> +#include <asm/k8.h>

#includes go at the top of files.

> +static struct pci_dev *get_k8_northbridge(int node)
> +{
> + return k8_northbridges[node];
> +}
> +#else
> +static inline int pci_write_config_dword(struct pci_dev *dev, int where,
> + u32 val)

Don't redefine these common functions, pci.h will handle this properly
if for some reason CONFIG_PCI is not enabled.

thanks,

greg k-h

2008-08-14 13:38:38

by Langsdorf, Mark

[permalink] [raw]
Subject: Re: [PATCH 01/01][retry 3] x86: L3 cache index disable for 2.6.26

New versions of AMD processors have support to disable parts
of their L3 caches if too many MCEs are generated by the
L3 cache. ?

This patch provides a /sysfs interface under the cache
hierarchy to display which caches indices are disabled
(if any) and to monitoring applications to disable a
cache index.

This patch does not set an automatic policy to disable
the L3 cache. ?Policy decisions would need to be made
by a RAS handler. ?This patch merely makes it easier to
see what indices are currently disabled.

Signed-off-by: Mark Langsdorf <[email protected]>


diff -r e683983d4dd0 Documentation/ABI/testing/sysfs-devices-cache_disable
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Documentation/ABI/testing/sysfs-devices-cache_disable Thu Aug 14 02:54:30 2008 -0500
@@ -0,0 +1,18 @@
+What: /sys/devices/system/cpu/cpu*/cache/index*/cache_disable_X
+Date: Augsust 2008
+KernelVersion: 2.6.27
+Contact: [email protected]
+Description: These files exist in every cpu's cache index directories.
+ There are currently 2 cache_disable_# files in each
+ directory. Reading from these files on a supported
+ processor will return that cache disable index value
+ for that processor and node. Writing to one of these
+ files will cause the specificed cache index to be disable.
+
+ Currently, only AMD Family 10h Processors support cache index
+ disable, and only for their L3 caches. See the BIOS and
+ Kernel Developer's Guide at
+ http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/31116.PDF
+ for formatting information and other details on the
+ cache index disable.
+Users: [email protected]
diff -r e683983d4dd0 arch/x86/kernel/cpu/intel_cacheinfo.c
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c Tue Aug 12 08:46:38 2008 -0500
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c Thu Aug 14 02:54:00 2008 -0500
@@ -16,6 +16,9 @@

#include <asm/processor.h>
#include <asm/smp.h>
+
+#include <linux/pci.h>
+#include <asm/k8.h>

#define LVL_1_INST 1
#define LVL_1_DATA 2
@@ -130,6 +133,7 @@ struct _cpuid4_info {
union _cpuid4_leaf_ebx ebx;
union _cpuid4_leaf_ecx ecx;
unsigned long size;
+ unsigned long can_disable;
cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */
};

@@ -251,6 +255,14 @@ static void __cpuinit amd_cpuid4(int lea
(ebx->split.ways_of_associativity + 1) - 1;
}

+static void __cpuinit
+amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf)
+{
+ if (index < 3)
+ return;
+ this_leaf->can_disable = 1;
+}
+
static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
{
union _cpuid4_leaf_eax eax;
@@ -258,10 +270,13 @@ static int __cpuinit cpuid4_cache_lookup
union _cpuid4_leaf_ecx ecx;
unsigned edx;

- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
amd_cpuid4(index, &eax, &ebx, &ecx);
- else
- cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
+ if (boot_cpu_data.x86 >= 0x10)
+ amd_check_l3_disable(index, this_leaf);
+ } else {
+ cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
+ }
if (eax.split.type == CACHE_TYPE_NULL)
return -EIO; /* better error ? */

@@ -269,9 +284,9 @@ static int __cpuinit cpuid4_cache_lookup
this_leaf->ebx = ebx;
this_leaf->ecx = ecx;
this_leaf->size = (ecx.split.number_of_sets + 1) *
- (ebx.split.coherency_line_size + 1) *
- (ebx.split.physical_line_partition + 1) *
- (ebx.split.ways_of_associativity + 1);
+ (ebx.split.coherency_line_size + 1) *
+ (ebx.split.physical_line_partition + 1) *
+ (ebx.split.ways_of_associativity + 1);
return 0;
}

@@ -574,6 +589,9 @@ static DEFINE_PER_CPU(struct _index_kobj
static DEFINE_PER_CPU(struct _index_kobject *, index_kobject);
#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(index_kobject, x))[y]))

+#define to_object(k) container_of(k, struct _index_kobject, kobj)
+#define to_attr(a) container_of(a, struct _cache_attr, attr)
+
#define show_one_plus(file_name, object, val) \
static ssize_t show_##file_name \
(struct _cpuid4_info *this_leaf, char *buf) \
@@ -619,6 +637,78 @@ static inline ssize_t show_shared_cpu_li
{
return show_shared_cpu_map_func(leaf, 1, buf);
}
+
+#if defined(CONFIG_PCI) && defined(CONFIG_K8_NB)
+static struct pci_dev *get_k8_northbridge(int node)
+{
+ return k8_northbridges[node];
+}
+#else
+static struct pci_dev *get_k8_northbridge(int node)
+{
+ return NULL;
+}
+#endif
+
+static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
+ unsigned int index)
+{
+ int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
+ struct pci_dev *dev = get_k8_northbridge(node);
+ unsigned int reg = 0;
+
+ if (!this_leaf->can_disable)
+ return -EINVAL;
+
+ pci_read_config_dword(dev, 0x1BC + index * 4, &reg);
+ return sprintf(buf, "%x\n", reg);
+}
+
+#define SHOW_CACHE_DISABLE(index) \
+static ssize_t \
+show_cache_disable_##index(struct _cpuid4_info *this_leaf, char *buf) \
+{ \
+ return show_cache_disable(this_leaf, buf, index); \
+}
+
+static ssize_t
+store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf,
+ size_t count, unsigned int index)
+{
+ int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
+ struct pci_dev *dev = get_k8_northbridge(node);
+ ssize_t ret = 0;
+ unsigned int val;
+
+ if (!this_leaf->can_disable)
+ return -EINVAL;
+
+ if (strlen(buf) > 10)
+ return -EINVAL;
+
+ ret = sscanf(buf, "%x", &val);
+ if (ret != 1)
+ return -EINVAL;
+
+ val |= 0xc0000000;
+ pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000);
+ wbinvd();
+ pci_write_config_dword(dev, 0x1BC + index * 4, val);
+ return ret ? ret : count;
+}
+
+#define STORE_CACHE_DISABLE(index) \
+static ssize_t \
+store_cache_disable_##index(struct _cpuid4_info *this_leaf, \
+ const char *buf, size_t count) \
+{ \
+ return store_cache_disable(this_leaf, buf, count, index); \
+}
+
+SHOW_CACHE_DISABLE(0)
+STORE_CACHE_DISABLE(0)
+SHOW_CACHE_DISABLE(1)
+STORE_CACHE_DISABLE(1)

static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) {
switch(this_leaf->eax.split.type) {
@@ -657,6 +747,10 @@ define_one_ro(shared_cpu_map);
define_one_ro(shared_cpu_map);
define_one_ro(shared_cpu_list);

+static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644, show_cache_disable_0, store_cache_disable_0);
+static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, show_cache_disable_1, store_cache_disable_1);
+
+
static struct attribute * default_attrs[] = {
&type.attr,
&level.attr,
@@ -667,11 +761,10 @@ static struct attribute * default_attrs[
&size.attr,
&shared_cpu_map.attr,
&shared_cpu_list.attr,
+ &cache_disable_0.attr,
+ &cache_disable_1.attr,
NULL
};
-
-#define to_object(k) container_of(k, struct _index_kobject, kobj)
-#define to_attr(a) container_of(a, struct _cache_attr, attr)

static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf)
{
@@ -689,7 +782,15 @@ static ssize_t store(struct kobject * ko
static ssize_t store(struct kobject * kobj, struct attribute * attr,
const char * buf, size_t count)
{
- return 0;
+ struct _cache_attr *fattr = to_attr(attr);
+ struct _index_kobject *this_leaf = to_object(kobj);
+ ssize_t ret;
+
+ ret = fattr->store ?
+ fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
+ buf, count) :
+ 0;
+ return ret;
}

static struct sysfs_ops sysfs_ops = {

2008-08-14 13:59:31

by Pavel Machek

[permalink] [raw]
Subject: Re: [PATCH 01/01][retry 3] x86: L3 cache index disable for 2.6.26

Hi!

> New versions of AMD processors have support to disable parts
> of their L3 caches if too many MCEs are generated by the
> L3 cache. ?
>
> This patch provides a /sysfs interface under the cache
> hierarchy to display which caches indices are disabled
> (if any) and to monitoring applications to disable a
> cache index.
>
> This patch does not set an automatic policy to disable
> the L3 cache. ?Policy decisions would need to be made
> by a RAS handler. ?This patch merely makes it easier to
> see what indices are currently disabled.
>
> Signed-off-by: Mark Langsdorf <[email protected]>
>
>
> diff -r e683983d4dd0 Documentation/ABI/testing/sysfs-devices-cache_disable
> --- /dev/null Thu Jan 01 00:00:00 1970 +0000
> +++ b/Documentation/ABI/testing/sysfs-devices-cache_disable Thu Aug 14 02:54:30 2008 -0500
> @@ -0,0 +1,18 @@
> +What: /sys/devices/system/cpu/cpu*/cache/index*/cache_disable_X
> +Date: Augsust 2008
> +KernelVersion: 2.6.27
> +Contact: [email protected]
> +Description: These files exist in every cpu's cache index directories.
> + There are currently 2 cache_disable_# files in each
> + directory. Reading from these files on a supported
> + processor will return that cache disable index value
> + for that processor and node. Writing to one of these
> + files will cause the specificed cache index to be disable.
> +
> + Currently, only AMD Family 10h Processors support cache index
> + disable, and only for their L3 caches. See the BIOS and
> + Kernel Developer's Guide at
> + http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/31116.PDF
> + for formatting information and other details on the
> + cache index disable.
> +Users: [email protected]
> diff -r e683983d4dd0 arch/x86/kernel/cpu/intel_cacheinfo.c
> --- a/arch/x86/kernel/cpu/intel_cacheinfo.c Tue Aug 12 08:46:38 2008 -0500
> +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c Thu Aug 14 02:54:00 2008 -0500
> @@ -16,6 +16,9 @@
>
> #include <asm/processor.h>
> #include <asm/smp.h>
> +
> +#include <linux/pci.h>
> +#include <asm/k8.h>
>
> #define LVL_1_INST 1
> #define LVL_1_DATA 2
> @@ -130,6 +133,7 @@ struct _cpuid4_info {
> union _cpuid4_leaf_ebx ebx;
> union _cpuid4_leaf_ecx ecx;
> unsigned long size;
> + unsigned long can_disable;
> cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */
> };
>
> @@ -251,6 +255,14 @@ static void __cpuinit amd_cpuid4(int lea
> (ebx->split.ways_of_associativity + 1) - 1;
> }
>
> +static void __cpuinit
> +amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf)
> +{
> + if (index < 3)
> + return;
> + this_leaf->can_disable = 1;
> +}
> +
> static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
> {
> union _cpuid4_leaf_eax eax;
> @@ -258,10 +270,13 @@ static int __cpuinit cpuid4_cache_lookup
> union _cpuid4_leaf_ecx ecx;
> unsigned edx;
>
> - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
> + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
> amd_cpuid4(index, &eax, &ebx, &ecx);
> - else
> - cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
> + if (boot_cpu_data.x86 >= 0x10)
> + amd_check_l3_disable(index, this_leaf);
> + } else {
> + cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
> + }
> if (eax.split.type == CACHE_TYPE_NULL)
> return -EIO; /* better error ? */
>
> @@ -269,9 +284,9 @@ static int __cpuinit cpuid4_cache_lookup
> this_leaf->ebx = ebx;
> this_leaf->ecx = ecx;
> this_leaf->size = (ecx.split.number_of_sets + 1) *
> - (ebx.split.coherency_line_size + 1) *
> - (ebx.split.physical_line_partition + 1) *
> - (ebx.split.ways_of_associativity + 1);
> + (ebx.split.coherency_line_size + 1) *
> + (ebx.split.physical_line_partition + 1) *
> + (ebx.split.ways_of_associativity + 1);
> return 0;
> }
>
> @@ -574,6 +589,9 @@ static DEFINE_PER_CPU(struct _index_kobj
> static DEFINE_PER_CPU(struct _index_kobject *, index_kobject);

> +static ssize_t
> +store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf,
> + size_t count, unsigned int index)
> +{
> + int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
> + struct pci_dev *dev = get_k8_northbridge(node);
> + ssize_t ret = 0;
> + unsigned int val;
> +
> + if (!this_leaf->can_disable)
> + return -EINVAL;
> +
> + if (strlen(buf) > 10)
> + return -EINVAL;
> +
> + ret = sscanf(buf, "%x", &val);
> + if (ret != 1)
> + return -EINVAL;

Is it okay to strlen() on user-supplied data? Do they have to be
null-terminated? What about sscanf?

> + val |= 0xc0000000;
> + pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000);
> + wbinvd();
> + pci_write_config_dword(dev, 0x1BC + index * 4, val);

Should it do if capable() test before doing such stuff?

--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

2008-08-14 14:02:57

by Langsdorf, Mark

[permalink] [raw]
Subject: RE: [PATCH 01/01][retry 3] x86: L3 cache index disable for 2.6.26

> > +
> > + ret = sscanf(buf, "%x", &val);
> > + if (ret != 1)
> > + return -EINVAL;
>
> Is it okay to strlen() on user-supplied data? Do they have to be
> null-terminated? What about sscanf?

The strlen can go - it's no longer necessary.

If sscanf isn't safe in this context, there's a lot of code
in drivers/cpufreq/cpufreq.c that I cribbed from that needs
to be changed, too.

> > + val |= 0xc0000000;
> > + pci_write_config_dword(dev, 0x1BC + index * 4, val
> & ~0x40000000);
> > + wbinvd();
> > + pci_write_config_dword(dev, 0x1BC + index * 4, val);
>
> Should it do if capable() test before doing such stuff?

I don't think so. If it got this far, it is an AMD processor
from family 0x10 or later, so it has wbinvd().

-Mark Langsdorf
Operating System Research Center
AMD

2008-08-14 14:06:51

by Greg KH

[permalink] [raw]
Subject: Re: [PATCH 01/01][retry 3] x86: L3 cache index disable for 2.6.26

On Thu, Aug 14, 2008 at 08:43:43AM -0500, Mark Langsdorf wrote:
> +#if defined(CONFIG_PCI) && defined(CONFIG_K8_NB)
> +static struct pci_dev *get_k8_northbridge(int node)
> +{
> + return k8_northbridges[node];
> +}
> +#else
> +static struct pci_dev *get_k8_northbridge(int node)
> +{
> + return NULL;
> +}
> +#endif

This should go into the header file and not the .c file right?

It's best to keep #ifdef things in .h files.

Also, you can drop the CONFIG_K8_NB test if you do that as well, as it's
already in your .h file :)

> +static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
> + unsigned int index)
> +{
> + int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
> + struct pci_dev *dev = get_k8_northbridge(node);
> + unsigned int reg = 0;
> +
> + if (!this_leaf->can_disable)

Trailing space, did you run this through scripts/checkpatch.pl?

thanks,

greg k-h

2008-08-14 14:18:47

by Langsdorf, Mark

[permalink] [raw]
Subject: Re: [PATCH 01/01][retry 4] x86: L3 cache index disable for 2.6.26

New versions of AMD processors have support to disable parts
of their L3 caches if too many MCEs are generated by the
L3 cache. ?

This patch provides a /sysfs interface under the cache
hierarchy to display which caches indices are disabled
(if any) and to monitoring applications to disable a
cache index.

This patch does not set an automatic policy to disable
the L3 cache. ?Policy decisions would need to be made
by a RAS handler. ?This patch merely makes it easier to
see what indices are currently disabled.

Signed-off-by: Mark Langsdorf <[email protected]>

diff -r e683983d4dd0 Documentation/ABI/testing/sysfs-devices-cache_disable
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Documentation/ABI/testing/sysfs-devices-cache_disable Thu Aug 14 03:49:19 2008 -0500
@@ -0,0 +1,18 @@
+What: /sys/devices/system/cpu/cpu*/cache/index*/cache_disable_X
+Date: August 2008
+KernelVersion: 2.6.27
+Contact: [email protected]
+Description: These files exist in every cpu's cache index directories.
+ There are currently 2 cache_disable_# files in each
+ directory. Reading from these files on a supported
+ processor will return that cache disable index value
+ for that processor and node. Writing to one of these
+ files will cause the specificed cache index to be disabled.
+
+ Currently, only AMD Family 10h Processors support cache index
+ disable, and only for their L3 caches. See the BIOS and
+ Kernel Developer's Guide at
+ http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/31116.PDF
+ for formatting information and other details on the
+ cache index disable.
+Users: [email protected]
diff -r e683983d4dd0 arch/x86/kernel/cpu/intel_cacheinfo.c
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c Tue Aug 12 08:46:38 2008 -0500
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c Thu Aug 14 03:59:45 2008 -0500
@@ -16,6 +16,9 @@

#include <asm/processor.h>
#include <asm/smp.h>
+
+#include <linux/pci.h>
+#include <asm/k8.h>

#define LVL_1_INST 1
#define LVL_1_DATA 2
@@ -130,6 +133,7 @@ struct _cpuid4_info {
union _cpuid4_leaf_ebx ebx;
union _cpuid4_leaf_ecx ecx;
unsigned long size;
+ unsigned long can_disable;
cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */
};

@@ -251,6 +255,14 @@ static void __cpuinit amd_cpuid4(int lea
(ebx->split.ways_of_associativity + 1) - 1;
}

+static void __cpuinit
+amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf)
+{
+ if (index < 3)
+ return;
+ this_leaf->can_disable = 1;
+}
+
static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
{
union _cpuid4_leaf_eax eax;
@@ -258,10 +270,13 @@ static int __cpuinit cpuid4_cache_lookup
union _cpuid4_leaf_ecx ecx;
unsigned edx;

- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
amd_cpuid4(index, &eax, &ebx, &ecx);
- else
- cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
+ if (boot_cpu_data.x86 >= 0x10)
+ amd_check_l3_disable(index, this_leaf);
+ } else {
+ cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
+ }
if (eax.split.type == CACHE_TYPE_NULL)
return -EIO; /* better error ? */

@@ -269,9 +284,9 @@ static int __cpuinit cpuid4_cache_lookup
this_leaf->ebx = ebx;
this_leaf->ecx = ecx;
this_leaf->size = (ecx.split.number_of_sets + 1) *
- (ebx.split.coherency_line_size + 1) *
- (ebx.split.physical_line_partition + 1) *
- (ebx.split.ways_of_associativity + 1);
+ (ebx.split.coherency_line_size + 1) *
+ (ebx.split.physical_line_partition + 1) *
+ (ebx.split.ways_of_associativity + 1);
return 0;
}

@@ -574,6 +589,9 @@ static DEFINE_PER_CPU(struct _index_kobj
static DEFINE_PER_CPU(struct _index_kobject *, index_kobject);
#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(index_kobject, x))[y]))

+#define to_object(k) container_of(k, struct _index_kobject, kobj)
+#define to_attr(a) container_of(a, struct _cache_attr, attr)
+
#define show_one_plus(file_name, object, val) \
static ssize_t show_##file_name \
(struct _cpuid4_info *this_leaf, char *buf) \
@@ -619,6 +637,63 @@ static inline ssize_t show_shared_cpu_li
{
return show_shared_cpu_map_func(leaf, 1, buf);
}
+
+static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
+ unsigned int index)
+{
+ int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
+ struct pci_dev *dev = get_k8_northbridge(node);
+ unsigned int reg = 0;
+
+ if (!this_leaf->can_disable)
+ return -EINVAL;
+
+ pci_read_config_dword(dev, 0x1BC + index * 4, &reg);
+ return sprintf(buf, "%x\n", reg);
+}
+
+#define SHOW_CACHE_DISABLE(index) \
+static ssize_t \
+show_cache_disable_##index(struct _cpuid4_info *this_leaf, char *buf) \
+{ \
+ return show_cache_disable(this_leaf, buf, index); \
+}
+
+static ssize_t
+store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf,
+ size_t count, unsigned int index)
+{
+ int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
+ struct pci_dev *dev = get_k8_northbridge(node);
+ ssize_t ret = 0;
+ unsigned int val;
+
+ if (!this_leaf->can_disable)
+ return -EINVAL;
+
+ ret = sscanf(buf, "%x", &val);
+ if (ret != 1)
+ return -EINVAL;
+
+ val |= 0xc0000000;
+ pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000);
+ wbinvd();
+ pci_write_config_dword(dev, 0x1BC + index * 4, val);
+ return count;
+}
+
+#define STORE_CACHE_DISABLE(index) \
+static ssize_t \
+store_cache_disable_##index(struct _cpuid4_info *this_leaf, \
+ const char *buf, size_t count) \
+{ \
+ return store_cache_disable(this_leaf, buf, count, index); \
+}
+
+SHOW_CACHE_DISABLE(0)
+STORE_CACHE_DISABLE(0)
+SHOW_CACHE_DISABLE(1)
+STORE_CACHE_DISABLE(1)

static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) {
switch(this_leaf->eax.split.type) {
@@ -657,6 +732,12 @@ define_one_ro(shared_cpu_map);
define_one_ro(shared_cpu_map);
define_one_ro(shared_cpu_list);

+static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
+ show_cache_disable_0, store_cache_disable_0);
+static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
+ show_cache_disable_1, store_cache_disable_1);
+
+
static struct attribute * default_attrs[] = {
&type.attr,
&level.attr,
@@ -667,11 +748,10 @@ static struct attribute * default_attrs[
&size.attr,
&shared_cpu_map.attr,
&shared_cpu_list.attr,
+ &cache_disable_0.attr,
+ &cache_disable_1.attr,
NULL
};
-
-#define to_object(k) container_of(k, struct _index_kobject, kobj)
-#define to_attr(a) container_of(a, struct _cache_attr, attr)

static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf)
{
@@ -689,7 +769,15 @@ static ssize_t store(struct kobject * ko
static ssize_t store(struct kobject * kobj, struct attribute * attr,
const char * buf, size_t count)
{
- return 0;
+ struct _cache_attr *fattr = to_attr(attr);
+ struct _index_kobject *this_leaf = to_object(kobj);
+ ssize_t ret;
+
+ ret = fattr->store ?
+ fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
+ buf, count) :
+ 0;
+ return ret;
}

static struct sysfs_ops sysfs_ops = {

2008-08-14 15:45:44

by Pavel Machek

[permalink] [raw]
Subject: Re: [PATCH 01/01][retry 3] x86: L3 cache index disable for 2.6.26

Hi!

> > > + val |= 0xc0000000;
> > > + pci_write_config_dword(dev, 0x1BC + index * 4, val
> > & ~0x40000000);
> > > + wbinvd();
> > > + pci_write_config_dword(dev, 0x1BC + index * 4, val);
> >
> > Should it do if capable() test before doing such stuff?
>
> I don't think so. If it got this far, it is an AMD processor
> from family 0x10 or later, so it has wbinvd().

No, I meant we should check for permissions. Filesystem permissions
are traditionaly not enough for stuff like this. if
(capable(CAP_ADMIN)) or something?
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

2008-08-14 16:41:57

by Langsdorf, Mark

[permalink] [raw]
Subject: RE: [PATCH 01/01][retry 3] x86: L3 cache index disable for 2.6.26

> > > > + val |= 0xc0000000;
> > > > + pci_write_config_dword(dev, 0x1BC + index * 4, val
> > > & ~0x40000000);
> > > > + wbinvd();
> > > > + pci_write_config_dword(dev, 0x1BC + index * 4, val);
> > >
> > > Should it do if capable() test before doing such stuff?
> >
> > I don't think so. If it got this far, it is an AMD processor
> > from family 0x10 or later, so it has wbinvd().
>
> No, I meant we should check for permissions. Filesystem permissions
> are traditionaly not enough for stuff like this. if
> (capable(CAP_ADMIN)) or something?

I see what you mean. I'll add a check again capable(CAP_SYS_ADMIN).

-Mark Langsdorf
Operating System Research Center
AMD

2008-08-14 16:43:37

by Langsdorf, Mark

[permalink] [raw]
Subject: Re: [PATCH 01/01][retry 5] x86: L3 cache index disable for 2.6.26

New versions of AMD processors have support to disable parts
of their L3 caches if too many MCEs are generated by the
L3 cache. ?

This patch provides a /sysfs interface under the cache
hierarchy to display which caches indices are disabled
(if any) and to monitoring applications to disable a
cache index.

This patch does not set an automatic policy to disable
the L3 cache. ?Policy decisions would need to be made
by a RAS handler. ?This patch merely makes it easier to
see what indices are currently disabled.

Signed-off-by: Mark Langsdorf <[email protected]>

diff -r e683983d4dd0 Documentation/ABI/testing/sysfs-devices-cache_disable
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Documentation/ABI/testing/sysfs-devices-cache_disable Thu Aug 14 03:49:19 2008 -0500
@@ -0,0 +1,18 @@
+What: /sys/devices/system/cpu/cpu*/cache/index*/cache_disable_X
+Date: August 2008
+KernelVersion: 2.6.27
+Contact: [email protected]
+Description: These files exist in every cpu's cache index directories.
+ There are currently 2 cache_disable_# files in each
+ directory. Reading from these files on a supported
+ processor will return that cache disable index value
+ for that processor and node. Writing to one of these
+ files will cause the specificed cache index to be disabled.
+
+ Currently, only AMD Family 10h Processors support cache index
+ disable, and only for their L3 caches. See the BIOS and
+ Kernel Developer's Guide at
+ http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/31116.PDF
+ for formatting information and other details on the
+ cache index disable.
+Users: [email protected]
diff -r e683983d4dd0 arch/x86/kernel/cpu/intel_cacheinfo.c
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c Tue Aug 12 08:46:38 2008 -0500
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c Thu Aug 14 06:23:13 2008 -0500
@@ -16,6 +16,9 @@

#include <asm/processor.h>
#include <asm/smp.h>
+
+#include <linux/pci.h>
+#include <asm/k8.h>

#define LVL_1_INST 1
#define LVL_1_DATA 2
@@ -130,6 +133,7 @@ struct _cpuid4_info {
union _cpuid4_leaf_ebx ebx;
union _cpuid4_leaf_ecx ecx;
unsigned long size;
+ unsigned long can_disable;
cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */
};

@@ -251,6 +255,14 @@ static void __cpuinit amd_cpuid4(int lea
(ebx->split.ways_of_associativity + 1) - 1;
}

+static void __cpuinit
+amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf)
+{
+ if (index < 3)
+ return;
+ this_leaf->can_disable = 1;
+}
+
static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
{
union _cpuid4_leaf_eax eax;
@@ -258,10 +270,13 @@ static int __cpuinit cpuid4_cache_lookup
union _cpuid4_leaf_ecx ecx;
unsigned edx;

- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
amd_cpuid4(index, &eax, &ebx, &ecx);
- else
- cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
+ if (boot_cpu_data.x86 >= 0x10)
+ amd_check_l3_disable(index, this_leaf);
+ } else {
+ cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
+ }
if (eax.split.type == CACHE_TYPE_NULL)
return -EIO; /* better error ? */

@@ -269,9 +284,9 @@ static int __cpuinit cpuid4_cache_lookup
this_leaf->ebx = ebx;
this_leaf->ecx = ecx;
this_leaf->size = (ecx.split.number_of_sets + 1) *
- (ebx.split.coherency_line_size + 1) *
- (ebx.split.physical_line_partition + 1) *
- (ebx.split.ways_of_associativity + 1);
+ (ebx.split.coherency_line_size + 1) *
+ (ebx.split.physical_line_partition + 1) *
+ (ebx.split.ways_of_associativity + 1);
return 0;
}

@@ -574,6 +589,9 @@ static DEFINE_PER_CPU(struct _index_kobj
static DEFINE_PER_CPU(struct _index_kobject *, index_kobject);
#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(index_kobject, x))[y]))

+#define to_object(k) container_of(k, struct _index_kobject, kobj)
+#define to_attr(a) container_of(a, struct _cache_attr, attr)
+
#define show_one_plus(file_name, object, val) \
static ssize_t show_##file_name \
(struct _cpuid4_info *this_leaf, char *buf) \
@@ -619,6 +637,66 @@ static inline ssize_t show_shared_cpu_li
{
return show_shared_cpu_map_func(leaf, 1, buf);
}
+
+static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
+ unsigned int index)
+{
+ int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
+ struct pci_dev *dev = get_k8_northbridge(node);
+ unsigned int reg = 0;
+
+ if (!this_leaf->can_disable)
+ return -EINVAL;
+
+ pci_read_config_dword(dev, 0x1BC + index * 4, &reg);
+ return sprintf(buf, "%x\n", reg);
+}
+
+#define SHOW_CACHE_DISABLE(index) \
+static ssize_t \
+show_cache_disable_##index(struct _cpuid4_info *this_leaf, char *buf) \
+{ \
+ return show_cache_disable(this_leaf, buf, index); \
+}
+
+static ssize_t
+store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf,
+ size_t count, unsigned int index)
+{
+ int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
+ struct pci_dev *dev = get_k8_northbridge(node);
+ ssize_t ret = 0;
+ unsigned int val;
+
+ if (!this_leaf->can_disable)
+ return -EINVAL;
+
+ ret = sscanf(buf, "%x", &val);
+ if (ret != 1)
+ return -EINVAL;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ val |= 0xc0000000;
+ pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000);
+ wbinvd();
+ pci_write_config_dword(dev, 0x1BC + index * 4, val);
+ return count;
+}
+
+#define STORE_CACHE_DISABLE(index) \
+static ssize_t \
+store_cache_disable_##index(struct _cpuid4_info *this_leaf, \
+ const char *buf, size_t count) \
+{ \
+ return store_cache_disable(this_leaf, buf, count, index); \
+}
+
+SHOW_CACHE_DISABLE(0)
+STORE_CACHE_DISABLE(0)
+SHOW_CACHE_DISABLE(1)
+STORE_CACHE_DISABLE(1)

static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) {
switch(this_leaf->eax.split.type) {
@@ -657,6 +735,12 @@ define_one_ro(shared_cpu_map);
define_one_ro(shared_cpu_map);
define_one_ro(shared_cpu_list);

+static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
+ show_cache_disable_0, store_cache_disable_0);
+static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
+ show_cache_disable_1, store_cache_disable_1);
+
+
static struct attribute * default_attrs[] = {
&type.attr,
&level.attr,
@@ -667,11 +751,10 @@ static struct attribute * default_attrs[
&size.attr,
&shared_cpu_map.attr,
&shared_cpu_list.attr,
+ &cache_disable_0.attr,
+ &cache_disable_1.attr,
NULL
};
-
-#define to_object(k) container_of(k, struct _index_kobject, kobj)
-#define to_attr(a) container_of(a, struct _cache_attr, attr)

static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf)
{
@@ -689,7 +772,15 @@ static ssize_t store(struct kobject * ko
static ssize_t store(struct kobject * kobj, struct attribute * attr,
const char * buf, size_t count)
{
- return 0;
+ struct _cache_attr *fattr = to_attr(attr);
+ struct _index_kobject *this_leaf = to_object(kobj);
+ ssize_t ret;
+
+ ret = fattr->store ?
+ fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
+ buf, count) :
+ 0;
+ return ret;
}

static struct sysfs_ops sysfs_ops = {

2008-08-14 17:13:22

by Greg KH

[permalink] [raw]
Subject: Re: [PATCH 01/01][retry 5] x86: L3 cache index disable for 2.6.26

On Thu, Aug 14, 2008 at 11:48:37AM -0500, Mark Langsdorf wrote:
> New versions of AMD processors have support to disable parts
> of their L3 caches if too many MCEs are generated by the
> L3 cache. ?
>
> This patch provides a /sysfs interface under the cache
> hierarchy to display which caches indices are disabled
> (if any) and to monitoring applications to disable a
> cache index.
>
> This patch does not set an automatic policy to disable
> the L3 cache. ?Policy decisions would need to be made
> by a RAS handler. ?This patch merely makes it easier to
> see what indices are currently disabled.
>
> Signed-off-by: Mark Langsdorf <[email protected]>
>
> diff -r e683983d4dd0 Documentation/ABI/testing/sysfs-devices-cache_disable
> --- /dev/null Thu Jan 01 00:00:00 1970 +0000
> +++ b/Documentation/ABI/testing/sysfs-devices-cache_disable Thu Aug 14 03:49:19 2008 -0500
> @@ -0,0 +1,18 @@
> +What: /sys/devices/system/cpu/cpu*/cache/index*/cache_disable_X
> +Date: August 2008
> +KernelVersion: 2.6.27
> +Contact: [email protected]
> +Description: These files exist in every cpu's cache index directories.
> + There are currently 2 cache_disable_# files in each
> + directory. Reading from these files on a supported
> + processor will return that cache disable index value
> + for that processor and node. Writing to one of these
> + files will cause the specificed cache index to be disabled.
> +
> + Currently, only AMD Family 10h Processors support cache index
> + disable, and only for their L3 caches. See the BIOS and
> + Kernel Developer's Guide at
> + http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/31116.PDF
> + for formatting information and other details on the
> + cache index disable.
> +Users: [email protected]
> diff -r e683983d4dd0 arch/x86/kernel/cpu/intel_cacheinfo.c
> --- a/arch/x86/kernel/cpu/intel_cacheinfo.c Tue Aug 12 08:46:38 2008 -0500
> +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c Thu Aug 14 06:23:13 2008 -0500
> @@ -16,6 +16,9 @@
>
> #include <asm/processor.h>
> #include <asm/smp.h>
> +
> +#include <linux/pci.h>
> +#include <asm/k8.h>
>
> #define LVL_1_INST 1
> #define LVL_1_DATA 2
> @@ -130,6 +133,7 @@ struct _cpuid4_info {
> union _cpuid4_leaf_ebx ebx;
> union _cpuid4_leaf_ecx ecx;
> unsigned long size;
> + unsigned long can_disable;

Why use an unsigned long for a single bit value? bool perhaps?

thanks,

greg k-h

2008-08-14 18:27:48

by Langsdorf, Mark

[permalink] [raw]
Subject: Re: [PATCH 01/01][retry 5] x86: L3 cache index disable for 2.6.26

New versions of AMD processors have support to disable parts
of their L3 caches if too many MCEs are generated by the
L3 cache. ?

This patch provides a /sysfs interface under the cache
hierarchy to display which caches indices are disabled
(if any) and to monitoring applications to disable a
cache index.

This patch does not set an automatic policy to disable
the L3 cache. ?Policy decisions would need to be made
by a RAS handler. ?This patch merely makes it easier to
see what indices are currently disabled.

Signed-off-by: Mark Langsdorf <[email protected]>
diff -r e683983d4dd0 Documentation/ABI/testing/sysfs-devices-cache_disable
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Documentation/ABI/testing/sysfs-devices-cache_disable Thu Aug 14 03:49:19 2008 -0500
@@ -0,0 +1,18 @@
+What: /sys/devices/system/cpu/cpu*/cache/index*/cache_disable_X
+Date: August 2008
+KernelVersion: 2.6.27
+Contact: [email protected]
+Description: These files exist in every cpu's cache index directories.
+ There are currently 2 cache_disable_# files in each
+ directory. Reading from these files on a supported
+ processor will return that cache disable index value
+ for that processor and node. Writing to one of these
+ files will cause the specificed cache index to be disabled.
+
+ Currently, only AMD Family 10h Processors support cache index
+ disable, and only for their L3 caches. See the BIOS and
+ Kernel Developer's Guide at
+ http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/31116.PDF
+ for formatting information and other details on the
+ cache index disable.
+Users: [email protected]
diff -r e683983d4dd0 arch/x86/kernel/cpu/intel_cacheinfo.c
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c Tue Aug 12 08:46:38 2008 -0500
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c Thu Aug 14 08:06:12 2008 -0500
@@ -16,6 +16,9 @@

#include <asm/processor.h>
#include <asm/smp.h>
+
+#include <linux/pci.h>
+#include <asm/k8.h>

#define LVL_1_INST 1
#define LVL_1_DATA 2
@@ -130,6 +133,7 @@ struct _cpuid4_info {
union _cpuid4_leaf_ebx ebx;
union _cpuid4_leaf_ecx ecx;
unsigned long size;
+ bool can_disable;
cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */
};

@@ -251,6 +255,14 @@ static void __cpuinit amd_cpuid4(int lea
(ebx->split.ways_of_associativity + 1) - 1;
}

+static void __cpuinit
+amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf)
+{
+ if (index < 3)
+ return;
+ this_leaf->can_disable = 1;
+}
+
static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
{
union _cpuid4_leaf_eax eax;
@@ -258,10 +270,13 @@ static int __cpuinit cpuid4_cache_lookup
union _cpuid4_leaf_ecx ecx;
unsigned edx;

- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
amd_cpuid4(index, &eax, &ebx, &ecx);
- else
- cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
+ if (boot_cpu_data.x86 >= 0x10)
+ amd_check_l3_disable(index, this_leaf);
+ } else {
+ cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
+ }
if (eax.split.type == CACHE_TYPE_NULL)
return -EIO; /* better error ? */

@@ -269,9 +284,9 @@ static int __cpuinit cpuid4_cache_lookup
this_leaf->ebx = ebx;
this_leaf->ecx = ecx;
this_leaf->size = (ecx.split.number_of_sets + 1) *
- (ebx.split.coherency_line_size + 1) *
- (ebx.split.physical_line_partition + 1) *
- (ebx.split.ways_of_associativity + 1);
+ (ebx.split.coherency_line_size + 1) *
+ (ebx.split.physical_line_partition + 1) *
+ (ebx.split.ways_of_associativity + 1);
return 0;
}

@@ -574,6 +589,9 @@ static DEFINE_PER_CPU(struct _index_kobj
static DEFINE_PER_CPU(struct _index_kobject *, index_kobject);
#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(index_kobject, x))[y]))

+#define to_object(k) container_of(k, struct _index_kobject, kobj)
+#define to_attr(a) container_of(a, struct _cache_attr, attr)
+
#define show_one_plus(file_name, object, val) \
static ssize_t show_##file_name \
(struct _cpuid4_info *this_leaf, char *buf) \
@@ -619,6 +637,66 @@ static inline ssize_t show_shared_cpu_li
{
return show_shared_cpu_map_func(leaf, 1, buf);
}
+
+static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
+ unsigned int index)
+{
+ int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
+ struct pci_dev *dev = get_k8_northbridge(node);
+ unsigned int reg = 0;
+
+ if (!this_leaf->can_disable)
+ return -EINVAL;
+
+ pci_read_config_dword(dev, 0x1BC + index * 4, &reg);
+ return sprintf(buf, "%x\n", reg);
+}
+
+#define SHOW_CACHE_DISABLE(index) \
+static ssize_t \
+show_cache_disable_##index(struct _cpuid4_info *this_leaf, char *buf) \
+{ \
+ return show_cache_disable(this_leaf, buf, index); \
+}
+
+static ssize_t
+store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf,
+ size_t count, unsigned int index)
+{
+ int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
+ struct pci_dev *dev = get_k8_northbridge(node);
+ ssize_t ret = 0;
+ unsigned int val;
+
+ if (!this_leaf->can_disable)
+ return -EINVAL;
+
+ ret = sscanf(buf, "%x", &val);
+ if (ret != 1)
+ return -EINVAL;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ val |= 0xc0000000;
+ pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000);
+ wbinvd();
+ pci_write_config_dword(dev, 0x1BC + index * 4, val);
+ return count;
+}
+
+#define STORE_CACHE_DISABLE(index) \
+static ssize_t \
+store_cache_disable_##index(struct _cpuid4_info *this_leaf, \
+ const char *buf, size_t count) \
+{ \
+ return store_cache_disable(this_leaf, buf, count, index); \
+}
+
+SHOW_CACHE_DISABLE(0)
+STORE_CACHE_DISABLE(0)
+SHOW_CACHE_DISABLE(1)
+STORE_CACHE_DISABLE(1)

static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) {
switch(this_leaf->eax.split.type) {
@@ -657,6 +735,12 @@ define_one_ro(shared_cpu_map);
define_one_ro(shared_cpu_map);
define_one_ro(shared_cpu_list);

+static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
+ show_cache_disable_0, store_cache_disable_0);
+static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
+ show_cache_disable_1, store_cache_disable_1);
+
+
static struct attribute * default_attrs[] = {
&type.attr,
&level.attr,
@@ -667,11 +751,10 @@ static struct attribute * default_attrs[
&size.attr,
&shared_cpu_map.attr,
&shared_cpu_list.attr,
+ &cache_disable_0.attr,
+ &cache_disable_1.attr,
NULL
};
-
-#define to_object(k) container_of(k, struct _index_kobject, kobj)
-#define to_attr(a) container_of(a, struct _cache_attr, attr)

static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf)
{
@@ -689,7 +772,15 @@ static ssize_t store(struct kobject * ko
static ssize_t store(struct kobject * kobj, struct attribute * attr,
const char * buf, size_t count)
{
- return 0;
+ struct _cache_attr *fattr = to_attr(attr);
+ struct _index_kobject *this_leaf = to_object(kobj);
+ ssize_t ret;
+
+ ret = fattr->store ?
+ fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
+ buf, count) :
+ 0;
+ return ret;
}

static struct sysfs_ops sysfs_ops = {

2008-08-15 16:42:39

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 01/01][retry 5] x86: L3 cache index disable for 2.6.26


* Mark Langsdorf <[email protected]> wrote:

> New versions of AMD processors have support to disable parts
> of their L3 caches if too many MCEs are generated by the
> L3 cache. ?
>
> This patch provides a /sysfs interface under the cache
> hierarchy to display which caches indices are disabled
> (if any) and to monitoring applications to disable a
> cache index.
>
> This patch does not set an automatic policy to disable
> the L3 cache. ?Policy decisions would need to be made
> by a RAS handler. ?This patch merely makes it easier to
> see what indices are currently disabled.

hm, looks good - but since i've got an earlier version of this included
in -tip already:

commit a24e8d36f5fc047dac9af6200322ed393f2e3175
Author: Mark Langsdorf <[email protected]>
Date: Tue Jul 22 13:06:02 2008 -0500

x86: L3 cache index disable for 2.6.26

could you please send a delta patch against tip/master?

http://people.redhat.com/mingo/tip.git/README

Thanks,

Ingo

2008-08-15 19:21:42

by Langsdorf, Mark

[permalink] [raw]
Subject: RE: [PATCH 01/01][retry 5] x86: L3 cache index disable for 2.6.26

> > This patch does not set an automatic policy to disable
> > the L3 cache. ?Policy decisions would need to be made
> > by a RAS handler. ?This patch merely makes it easier to
> > see what indices are currently disabled.
>
> hm, looks good - but since i've got an earlier version of
> this included
> in -tip already:
>
> commit a24e8d36f5fc047dac9af6200322ed393f2e3175
> Author: Mark Langsdorf <[email protected]>
> Date: Tue Jul 22 13:06:02 2008 -0500
>
> x86: L3 cache index disable for 2.6.26
>
> could you please send a delta patch against tip/master?
>
> http://people.redhat.com/mingo/tip.git/README

tip/master looks likes it has the version 5 of my patch
in it already. Am I missing something?

-Mark Langsdorf
Operating System Research Center
AMD

2008-08-15 19:57:50

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 01/01][retry 5] x86: L3 cache index disable for 2.6.26


* Langsdorf, Mark <[email protected]> wrote:

> > > This patch does not set an automatic policy to disable
> > > the L3 cache. ?Policy decisions would need to be made
> > > by a RAS handler. ?This patch merely makes it easier to
> > > see what indices are currently disabled.
> >
> > hm, looks good - but since i've got an earlier version of
> > this included
> > in -tip already:
> >
> > commit a24e8d36f5fc047dac9af6200322ed393f2e3175
> > Author: Mark Langsdorf <[email protected]>
> > Date: Tue Jul 22 13:06:02 2008 -0500
> >
> > x86: L3 cache index disable for 2.6.26
> >
> > could you please send a delta patch against tip/master?
> >
> > http://people.redhat.com/mingo/tip.git/README
>
> tip/master looks likes it has the version 5 of my patch
> in it already. Am I missing something?

ok, that's good - that's the latest, right?

Ingo

2008-08-15 20:02:32

by Langsdorf, Mark

[permalink] [raw]
Subject: RE: [PATCH 01/01][retry 5] x86: L3 cache index disable for 2.6.26

> > > Author: Mark Langsdorf <[email protected]>
> > > Date: Tue Jul 22 13:06:02 2008 -0500
> > >
> > > x86: L3 cache index disable for 2.6.26
> > >
> > > could you please send a delta patch against tip/master?
> > >
> > > http://people.redhat.com/mingo/tip.git/README
> >
> > tip/master looks likes it has the version 5 of my patch
> > in it already. Am I missing something?
>
> ok, that's good - that's the latest, right?

Yes. Single inline function with asm instructions and
a documented clflush check in asm/processor.h and the
call to wbinvd_halt() in both processor_32.c and
processor_64.h. It looks like we're done here.

-Mark Langsdorf
Operating System Research Center
AMD