2010-12-20 17:14:19

by Hans Rosenfeld

[permalink] [raw]
Subject: [PATCH 0/4] x86, amd: family 0x15 L3 cache features

This patch set applies to tip/x86/amd-nb f658bcfb. It enables L3 cache
index disable and adds support for L3 cache partitioning on family 0x15
CPUs.

Andreas Herrmann (1):
x86, amd: Normalize compute unit IDs on multi-node processors

Hans Rosenfeld (3):
x86, amd: Enable L3 cache index disable on family 0x15
x86, amd: Extend AMD northbridge caching code to support "Link
Control" devices
x86, amd: Support L3 Cache Partitioning on AMD family 0x15 CPUs

arch/x86/include/asm/amd_nb.h | 4 ++
arch/x86/kernel/amd_nb.c | 69 ++++++++++++++++++++++++++++++-
arch/x86/kernel/cpu/amd.c | 8 +++-
arch/x86/kernel/cpu/intel_cacheinfo.c | 73 +++++++++++++++++++++++++++-----
arch/x86/kernel/smpboot.c | 1 +
include/linux/pci_ids.h | 1 +
6 files changed, 140 insertions(+), 16 deletions(-)


2010-12-20 17:14:06

by Hans Rosenfeld

[permalink] [raw]
Subject: [PATCH 4/4] x86, amd: Support L3 Cache Partitioning on AMD family 0x15 CPUs

L3 Cache Partitioning allows selecting which of the 4 L3 subcaches can
be used for evictions by the L2 cache of each compute unit. By writing a
4-bit hexadecimal mask into the the sysfs file /sys/devices/system/cpu/\
cpuX/cache/index3/subcaches, the user can set the enabled subcaches for
a CPU. The settings are directly read from and written to the hardware,
so there is no way to have contradicting settings for two CPUs belonging
to the same compute unit. Writing will always overwrite any previous
setting for a compute unit.

Signed-off-by: Hans Rosenfeld <[email protected]>
---
arch/x86/include/asm/amd_nb.h | 3 +
arch/x86/kernel/amd_nb.c | 55 +++++++++++++++++++++++++
arch/x86/kernel/cpu/intel_cacheinfo.c | 73 +++++++++++++++++++++++++++-----
3 files changed, 119 insertions(+), 12 deletions(-)

diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index b830c7f..23e299f 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -12,6 +12,8 @@ extern void amd_flush_garts(void);
extern int amd_get_nodes(struct bootnode *nodes);
extern int amd_numa_init(unsigned long start_pfn, unsigned long end_pfn);
extern int amd_scan_nodes(void);
+extern int amd_get_subcaches(int);
+extern int amd_set_subcaches(int, int);

struct amd_northbridge {
struct pci_dev *misc;
@@ -27,6 +29,7 @@ extern struct amd_northbridge_info amd_northbridges;

#define AMD_NB_GART 0x1
#define AMD_NB_L3_INDEX_DISABLE 0x2
+#define AMD_NB_L3_PARTITIONING 0x4

#ifdef CONFIG_AMD_NB

diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index aecf12f..4f69b86 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -88,6 +88,10 @@ int amd_cache_northbridges(void)
if (boot_cpu_data.x86 == 0x15)
amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE;

+ /* L3 cache partitioning is supported on family 0x15 */
+ if (boot_cpu_data.x86 == 0x15)
+ amd_northbridges.flags |= AMD_NB_L3_PARTITIONING;
+
return 0;
}
EXPORT_SYMBOL_GPL(amd_cache_northbridges);
@@ -105,6 +109,57 @@ int __init early_is_amd_nb(u32 device)
return 0;
}

+int amd_get_subcaches(int cpu)
+{
+ struct pci_dev *link = node_to_amd_nb(amd_get_nb_id(cpu))->link;
+ unsigned int mask;
+
+ if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
+ return 0;
+
+ pci_read_config_dword(link, 0x1d4, &mask);
+
+ return (mask >> (4 * cpu_data(cpu).compute_unit_id)) & 0xf;
+}
+
+int amd_set_subcaches(int cpu, int mask)
+{
+ static unsigned int reset, ban;
+ unsigned int reg;
+ struct amd_northbridge *nb = node_to_amd_nb(amd_get_nb_id(cpu));
+
+ if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING) || mask > 0xf)
+ return -EINVAL;
+
+ /* if necessary, collect reset state of L3 partitioning and BAN mode */
+ if (reset == 0) {
+ pci_read_config_dword(nb->link, 0x1d4, &reset);
+ pci_read_config_dword(nb->misc, 0x1b8, &ban);
+ ban &= 0x180000;
+ }
+
+ /* deactivate BAN mode if any subcaches are to be disabled */
+ if (mask != 0xf) {
+ pci_read_config_dword(nb->misc, 0x1b8, &reg);
+ pci_write_config_dword(nb->misc, 0x1b8, reg & ~0x180000);
+ }
+
+ mask <<= 4 * cpu_data(cpu).compute_unit_id;
+ mask |= (0xf ^ (1 << cpu_data(cpu).compute_unit_id)) << 26;
+
+ pci_write_config_dword(nb->link, 0x1d4, mask);
+
+ /* reset BAN mode if L3 partitioning returned to reset state */
+ pci_read_config_dword(nb->link, 0x1d4, &reg);
+ if (reg == reset) {
+ pci_read_config_dword(nb->misc, 0x1b8, &reg);
+ reg &= ~0x180000;
+ pci_write_config_dword(nb->misc, 0x1b8, reg | ban);
+ }
+
+ return 0;
+}
+
int amd_cache_gart(void)
{
int i;
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 9ecf81f..557a927 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -301,8 +301,9 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,

struct _cache_attr {
struct attribute attr;
- ssize_t (*show)(struct _cpuid4_info *, char *);
- ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count);
+ ssize_t (*show)(struct _cpuid4_info *, char *, unsigned int);
+ ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count,
+ unsigned int);
};

#ifdef CONFIG_AMD_NB
@@ -397,7 +398,8 @@ static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,

#define SHOW_CACHE_DISABLE(slot) \
static ssize_t \
-show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf) \
+show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf, \
+ unsigned int cpu) \
{ \
return show_cache_disable(this_leaf, buf, slot); \
}
@@ -509,7 +511,8 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
#define STORE_CACHE_DISABLE(slot) \
static ssize_t \
store_cache_disable_##slot(struct _cpuid4_info *this_leaf, \
- const char *buf, size_t count) \
+ const char *buf, size_t count, \
+ unsigned int cpu) \
{ \
return store_cache_disable(this_leaf, buf, count, slot); \
}
@@ -521,6 +524,41 @@ static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
show_cache_disable_1, store_cache_disable_1);

+static ssize_t
+show_subcaches(struct _cpuid4_info *this_leaf, char *buf, unsigned int cpu)
+{
+ if (!this_leaf->l3 ||
+ !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
+ return -EINVAL;
+
+ return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
+}
+
+static ssize_t
+store_subcaches(struct _cpuid4_info *this_leaf, const char *buf, size_t count,
+ unsigned int cpu)
+{
+ unsigned long val;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (!this_leaf->l3 ||
+ !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
+ return -EINVAL;
+
+ if (strict_strtoul(buf, 16, &val) < 0)
+ return -EINVAL;
+
+ if (amd_set_subcaches(cpu, val))
+ return -EINVAL;
+
+ return count;
+}
+
+static struct _cache_attr subcaches =
+ __ATTR(subcaches, 0644, show_subcaches, store_subcaches);
+
#else /* CONFIG_AMD_NB */
#define amd_init_l3_cache(x, y)
#endif /* CONFIG_AMD_NB */
@@ -867,8 +905,8 @@ static DEFINE_PER_CPU(struct _index_kobject *, ici_index_kobject);
#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(ici_index_kobject, x))[y]))

#define show_one_plus(file_name, object, val) \
-static ssize_t show_##file_name \
- (struct _cpuid4_info *this_leaf, char *buf) \
+static ssize_t show_##file_name(struct _cpuid4_info *this_leaf, char *buf, \
+ unsigned int cpu) \
{ \
return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \
}
@@ -879,7 +917,8 @@ show_one_plus(physical_line_partition, ebx.split.physical_line_partition, 1);
show_one_plus(ways_of_associativity, ebx.split.ways_of_associativity, 1);
show_one_plus(number_of_sets, ecx.split.number_of_sets, 1);

-static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf)
+static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf,
+ unsigned int cpu)
{
return sprintf(buf, "%luK\n", this_leaf->size / 1024);
}
@@ -903,17 +942,20 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
return n;
}

-static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf)
+static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf,
+ unsigned int cpu)
{
return show_shared_cpu_map_func(leaf, 0, buf);
}

-static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf)
+static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf,
+ unsigned int cpu)
{
return show_shared_cpu_map_func(leaf, 1, buf);
}

-static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf)
+static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf,
+ unsigned int cpu)
{
switch (this_leaf->eax.split.type) {
case CACHE_TYPE_DATA:
@@ -971,6 +1013,9 @@ static struct attribute ** __cpuinit amd_l3_attrs(void)
if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
n += 2;

+ if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
+ n += 1;
+
attrs = kzalloc(n * sizeof (struct attribute *), GFP_KERNEL);
if (attrs == NULL)
return attrs = default_attrs;
@@ -983,6 +1028,10 @@ static struct attribute ** __cpuinit amd_l3_attrs(void)
attrs[n++] = &cache_disable_1.attr;
}

+ if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) {
+ attrs[n++] = &subcaches.attr;
+ }
+
return attrs;
}
#endif
@@ -995,7 +1044,7 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)

ret = fattr->show ?
fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
- buf) :
+ buf, this_leaf->cpu) :
0;
return ret;
}
@@ -1009,7 +1058,7 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,

ret = fattr->store ?
fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
- buf, count) :
+ buf, count, this_leaf->cpu) :
0;
return ret;
}
--
1.5.6.5

2010-12-20 17:14:13

by Hans Rosenfeld

[permalink] [raw]
Subject: [PATCH 3/4] x86, amd: Extend AMD northbridge caching code to support "Link Control" devices

"Link Control" devices (NB function 4) will be used by L3 cache
partitioning on family 0x15.

Signed-off-by: Hans Rosenfeld <[email protected]>
---
arch/x86/include/asm/amd_nb.h | 1 +
arch/x86/kernel/amd_nb.c | 11 +++++++++--
include/linux/pci_ids.h | 1 +
3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 6aee50d..b830c7f 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -15,6 +15,7 @@ extern int amd_scan_nodes(void);

struct amd_northbridge {
struct pci_dev *misc;
+ struct pci_dev *link;
};

struct amd_northbridge_info {
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 4ce87c2..aecf12f 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -20,6 +20,11 @@ struct pci_device_id amd_nb_misc_ids[] = {
};
EXPORT_SYMBOL(amd_nb_misc_ids);

+static struct pci_device_id amd_nb_link_ids[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_LINK) },
+ {}
+};
+
struct amd_northbridge_info amd_northbridges;
EXPORT_SYMBOL(amd_northbridges);

@@ -38,7 +43,7 @@ int amd_cache_northbridges(void)
{
int i = 0;
struct amd_northbridge *nb;
- struct pci_dev *misc;
+ struct pci_dev *misc, *link;

if (amd_nb_num())
return 0;
@@ -57,10 +62,12 @@ int amd_cache_northbridges(void)
amd_northbridges.nb = nb;
amd_northbridges.num = i;

- misc = NULL;
+ link = misc = NULL;
for (i = 0; i != amd_nb_num(); i++) {
node_to_amd_nb(i)->misc = misc =
next_northbridge(misc, amd_nb_misc_ids);
+ node_to_amd_nb(i)->link = link =
+ next_northbridge(link, amd_nb_link_ids);
}

/* some CPU families (e.g. family 0x11) do not support GART */
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index c6bcfe9..dd8be13 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -518,6 +518,7 @@
#define PCI_DEVICE_ID_AMD_11H_NB_MISC 0x1303
#define PCI_DEVICE_ID_AMD_11H_NB_LINK 0x1304
#define PCI_DEVICE_ID_AMD_15H_NB_MISC 0x1603
+#define PCI_DEVICE_ID_AMD_15H_NB_LINK 0x1604
#define PCI_DEVICE_ID_AMD_LANCE 0x2000
#define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001
#define PCI_DEVICE_ID_AMD_SCSI 0x2020
--
1.5.6.5

2010-12-20 17:14:25

by Hans Rosenfeld

[permalink] [raw]
Subject: [PATCH 2/4] x86, amd: Enable L3 cache index disable on family 0x15

AMD family 0x15 CPUs support L3 cache index disable, so enable it on
them.

Signed-off-by: Hans Rosenfeld <[email protected]>
---
arch/x86/kernel/amd_nb.c | 3 +++
1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index affacb5..4ce87c2 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -78,6 +78,9 @@ int amd_cache_northbridges(void)
boot_cpu_data.x86_mask >= 0x1))
amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE;

+ if (boot_cpu_data.x86 == 0x15)
+ amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE;
+
return 0;
}
EXPORT_SYMBOL_GPL(amd_cache_northbridges);
--
1.5.6.5

2010-12-20 17:29:11

by Hans Rosenfeld

[permalink] [raw]
Subject: [PATCH 1/4] x86, amd: Normalize compute unit IDs on multi-node processors

From: Andreas Herrmann <[email protected]>

On multi-node CPUs we don't need the socket wide compute unit ID but
the node-wide compute unit ID. Thus we need to normalize the value.
This is similar to what we do with cpu_core_id.

A compute unit is then identified by physical_package_id, node_id, and
compute_unit_id.

Signed-off-by: Andreas Herrmann <[email protected]>
---
arch/x86/kernel/cpu/amd.c | 8 ++++++--
arch/x86/kernel/smpboot.c | 1 +
2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 9e093f8..9645604 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -261,7 +261,7 @@ static int __cpuinit nearby_node(int apicid)
#ifdef CONFIG_X86_HT
static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c)
{
- u32 nodes;
+ u32 nodes, cores_per_cu;
u8 node_id;
int cpu = smp_processor_id();

@@ -276,6 +276,7 @@ static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c)
/* get compute unit information */
smp_num_siblings = ((ebx >> 8) & 3) + 1;
c->compute_unit_id = ebx & 0xff;
+ cores_per_cu = ((ebx >> 8) & 3) + 1;
} else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
u64 value;

@@ -288,15 +289,18 @@ static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c)
/* fixup multi-node processor information */
if (nodes > 1) {
u32 cores_per_node;
+ u32 cus_per_node;

set_cpu_cap(c, X86_FEATURE_AMD_DCM);
cores_per_node = c->x86_max_cores / nodes;
+ cus_per_node = cores_per_node / cores_per_cu;

/* store NodeID, use llc_shared_map to store sibling info */
per_cpu(cpu_llc_id, cpu) = node_id;

/* core id to be in range from 0 to (cores_per_node - 1) */
- c->cpu_core_id = c->cpu_core_id % cores_per_node;
+ c->cpu_core_id %= cores_per_node;
+ c->compute_unit_id %= cus_per_node;
}
}
#endif
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 083e99d..3bc0435 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -417,6 +417,7 @@ void __cpuinit set_cpu_sibling_map(int cpu)

if (cpu_has(c, X86_FEATURE_TOPOEXT)) {
if (c->phys_proc_id == o->phys_proc_id &&
+ per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i) &&
c->compute_unit_id == o->compute_unit_id)
link_thread_siblings(cpu, i);
} else if (c->phys_proc_id == o->phys_proc_id &&
--
1.5.6.5

2011-02-01 15:15:11

by Hans Rosenfeld

[permalink] [raw]
Subject: Re: [PATCH 4/4] x86, amd: Support L3 Cache Partitioning on AMD family 0x15 CPUs

On Thu, Jan 27, 2011 at 07:47:56AM -0500, Ingo Molnar wrote:
> > The problem is that cpuinfo_x86.compute_unit_id etc. don't exist unless
> > CONFIG_SMP is enabled. I don't think there is any reason why this should
> > be that way, but changing this just for this particular L3 feature seems
> > too intrusive. Do you really want me to do that?
>
> All the CONFIG_X86_HT #ifdefs in arch/x86/kernel/cpu/amd.c look pretty ugly too -
> and it's not really a properly modularized solution.
>
> We generally want to unify the SMP and UP kernels as much as possible. 'CONFIG_SMP'
> is not really a property of the hardware, it's a property of the software.
>
> If some topology information should be excluded then it can already be done by
> turning off CONFIG_CPU_SUP_AMD under CONFIG_EXPERT.

I see several solutions to resolve this issue:

1. Remove #ifdef CONFIG_SMP around compute_unit_id in struct cpuinfo_x86
and then use my original patch. This would work without introducing
new #ifdef ugliness with the L3 cache partitioning, but it would
increase #ifdef ugliness in struct cpuinfo_x86. Also, compute_unit_id
would just so happen to be initialized to 0, there would be no other
code using it for CONFIG_SMP. L3 cache partitioning would be the
first SMP-specific feature to be available in non-SMP kernels.

2. Same as #1, but remove CONFIG_SMP completely from struct cpuinfo_x86.
This would mean less #ifdef ugliness there, but then we would have a
bunch of unused fields in there in non-SMP kernels, which would also
just be initialized to 0. I don't think that would be correct for
booted_cores, but as it is unused I don't see an immediate problem
with that. Of course, this is also neither correct nor less ugly.

3. Same as #2, but also rework all code using those fields to be usable
on non-SMP kernels. This would be essentially a rework of all that
CONFIG_SMP stuff, and I think thats too much to ask for just for a
little extra L3 feature.

Maybe I'm missing something here, but I don't see how this could be
done cleanly in any other way at this time.

Of course, you could just take the modified patch I sent you. That would
be ugly, but not more so than the existing code. If this is not
acceptable, please tell me which of the other two ugly solutions you
would prefer.


Hans


--
%SYSTEM-F-ANARCHISM, The operating system has been overthrown

2011-02-04 22:08:07

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH 1/4] x86, amd: Normalize compute unit IDs on multi-node processors

On Mon, 24 Jan 2011 16:05:40 +0100
Hans Rosenfeld <[email protected]> wrote:

> From: Andreas Herrmann <[email protected]>
>
> On multi-node CPUs we don't need the socket wide compute unit ID but
> the node-wide compute unit ID. Thus we need to normalize the value.
> This is similar to what we do with cpu_core_id.
>
> A compute unit is then identified by physical_package_id, node_id, and
> compute_unit_id.
>
> ...
>
> --- a/arch/x86/kernel/cpu/amd.c
> +++ b/arch/x86/kernel/cpu/amd.c
> @@ -261,7 +261,7 @@ static int __cpuinit nearby_node(int apicid)
> #ifdef CONFIG_X86_HT
> static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c)
> {
> - u32 nodes;
> + u32 nodes, cores_per_cu;
> u8 node_id;
> int cpu = smp_processor_id();
>
> @@ -276,6 +276,7 @@ static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c)
> /* get compute unit information */
> smp_num_siblings = ((ebx >> 8) & 3) + 1;
> c->compute_unit_id = ebx & 0xff;
> + cores_per_cu = ((ebx >> 8) & 3) + 1;
> } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
> u64 value;
>
> @@ -288,15 +289,18 @@ static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c)
> /* fixup multi-node processor information */
> if (nodes > 1) {
> u32 cores_per_node;
> + u32 cus_per_node;
>
> set_cpu_cap(c, X86_FEATURE_AMD_DCM);
> cores_per_node = c->x86_max_cores / nodes;
> + cus_per_node = cores_per_node / cores_per_cu;
>
> /* store NodeID, use llc_shared_map to store sibling info */
> per_cpu(cpu_llc_id, cpu) = node_id;
>
> /* core id to be in range from 0 to (cores_per_node - 1) */
> - c->cpu_core_id = c->cpu_core_id % cores_per_node;
> + c->cpu_core_id %= cores_per_node;
> + c->compute_unit_id %= cus_per_node;
> }
> }
> #endif

arch/x86/kernel/cpu/amd.c: In function 'init_amd':
arch/x86/kernel/cpu/amd.c:268: warning: 'cores_per_cu' may be used uninitialized in this function

The code looks buggy to me.