2002-01-24 20:55:31

by Martin Wilck

[permalink] [raw]
Subject: [PATCH]: Fix MTRR handling on HT CPUs (improved)


On Intel processors with "Jackson" technology (also HT, HyperThreading),
two logical processors share certain registers, in particular the MTRR
registers. The Linux way of manipulating these registers may lead to
inconsistent MTRR settings or total disabling of cache.

The patch fixes this. It is against 2.4.17 and applies cleanly to 2.5.2.

In addition to the similar patch I sent to LK on 2002-01-22, this
patch also accounts for the possibility that one of the two logical CPUs
sharing a mtrr may be offline. It is cleaner, too.

I strongly suspected somebody else must have hit this problem before, but
intensive research did show up nothing. Also my first post on LK
received no "hey, that's old stuff" answer. So here I go.

I suspect there may be more possible hazards of this sort hidden in the
kernel code when it comes to HT technology.

Regards,
Martin Wilck

--- include/asm-i386/processor.h.orig Wed Jan 23 16:02:43 2002
+++ include/asm-i386/processor.h Wed Jan 23 16:02:51 2002
@@ -90,6 +90,7 @@
#define cpu_has_xmm (test_bit(X86_FEATURE_XMM, boot_cpu_data.x86_capability))
#define cpu_has_fpu (test_bit(X86_FEATURE_FPU, boot_cpu_data.x86_capability))
#define cpu_has_apic (test_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability))
+#define cpu_has_ht (test_bit(X86_FEATURE_HT, boot_cpu_data.x86_capability))

extern char ignore_irq13;

--- arch/i386/kernel/mtrr.c.orig Wed Jan 23 16:02:43 2002
+++ arch/i386/kernel/mtrr.c Wed Jan 23 16:02:51 2002
@@ -378,7 +378,7 @@
static int arr3_protected;

/* Put the processor into a state where MTRRs can be safely set */
-static void set_mtrr_prepare (struct set_mtrr_context *ctxt)
+static void set_mtrr_prepare (struct set_mtrr_context *ctxt, int doit)
{
unsigned long tmp;

@@ -406,7 +406,8 @@
if ( mtrr_if == MTRR_IF_INTEL ) {
/* Disable MTRRs, and set the default type to uncached */
rdmsr (MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi);
- wrmsr (MTRRdefType_MSR, ctxt->deftype_lo & 0xf300UL, ctxt->deftype_hi);
+ if (doit)
+ wrmsr (MTRRdefType_MSR, ctxt->deftype_lo & 0xf300UL, ctxt->deftype_hi);
} else {
/* Cyrix ARRs - everything else were excluded at the top */
tmp = getCx86 (CX86_CCR3);
@@ -416,7 +417,7 @@
} /* End Function set_mtrr_prepare */

/* Restore the processor after a set_mtrr_prepare */
-static void set_mtrr_done (struct set_mtrr_context *ctxt)
+static void set_mtrr_done (struct set_mtrr_context *ctxt, int doit)
{
if ( mtrr_if != MTRR_IF_INTEL && mtrr_if != MTRR_IF_CYRIX_ARR ) {
__restore_flags (ctxt->flags);
@@ -427,7 +428,7 @@
wbinvd();

/* Restore MTRRdefType */
- if ( mtrr_if == MTRR_IF_INTEL ) {
+ if ( mtrr_if == MTRR_IF_INTEL && doit ) {
/* Intel (P6) standard MTRRs */
wrmsr (MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi);
} else {
@@ -674,7 +675,7 @@
{
struct set_mtrr_context ctxt;

- if (do_safe) set_mtrr_prepare (&ctxt);
+ if (do_safe) set_mtrr_prepare (&ctxt, 1);
if (size == 0)
{
/* The invalid bit is kept in the mask, so we simply clear the
@@ -688,7 +689,7 @@
wrmsr (MTRRphysMask_MSR (reg), -size << PAGE_SHIFT | 0x800,
(-size & size_and_mask) >> (32 - PAGE_SHIFT));
}
- if (do_safe) set_mtrr_done (&ctxt);
+ if (do_safe) set_mtrr_done (&ctxt, 1);
} /* End Function intel_set_mtrr_up */

static void cyrix_set_arr_up (unsigned int reg, unsigned long base,
@@ -726,13 +727,13 @@
}
}

- if (do_safe) set_mtrr_prepare (&ctxt);
+ if (do_safe) set_mtrr_prepare (&ctxt, 1);
base <<= PAGE_SHIFT;
setCx86(arr, ((unsigned char *) &base)[3]);
setCx86(arr+1, ((unsigned char *) &base)[2]);
setCx86(arr+2, (((unsigned char *) &base)[1]) | arr_size);
setCx86(CX86_RCR_BASE + reg, arr_type);
- if (do_safe) set_mtrr_done (&ctxt);
+ if (do_safe) set_mtrr_done (&ctxt, 1);
} /* End Function cyrix_set_arr_up */

static void amd_set_mtrr_up (unsigned int reg, unsigned long base,
@@ -750,7 +751,7 @@
u32 regs[2];
struct set_mtrr_context ctxt;

- if (do_safe) set_mtrr_prepare (&ctxt);
+ if (do_safe) set_mtrr_prepare (&ctxt, 1);
/*
* Low is MTRR0 , High MTRR 1
*/
@@ -777,7 +778,7 @@
*/
wbinvd();
wrmsr (MSR_K6_UWCCR, regs[0], regs[1]);
- if (do_safe) set_mtrr_done (&ctxt);
+ if (do_safe) set_mtrr_done (&ctxt, 1);
} /* End Function amd_set_mtrr_up */


@@ -788,7 +789,7 @@
struct set_mtrr_context ctxt;
unsigned long low, high;

- if (do_safe) set_mtrr_prepare( &ctxt );
+ if (do_safe) set_mtrr_prepare( &ctxt, 1 );
if (size == 0)
{
/* Disable */
@@ -810,7 +811,7 @@
centaur_mcr[reg].high = high;
centaur_mcr[reg].low = low;
wrmsr (MSR_IDT_MCR0 + reg, low, high);
- if (do_safe) set_mtrr_done( &ctxt );
+ if (do_safe) set_mtrr_done( &ctxt, 1 );
} /* End Function centaur_set_mtrr_up */

static void (*set_mtrr_up) (unsigned int reg, unsigned long base,
@@ -996,6 +997,33 @@
mtrr_type smp_type;
};

+static int may_manipulate_mtrr (int cpu)
+{
+ /*
+ MTRR may be modified
+ 1. If no HT technology present,
+ 2. If the CPU has an even APIC id,
+ 3. If the CPU has an odd APIC id but the corresponding logical
+ CPU with even APIC id is offline.
+ Otherwise, the CPU should leave the MTRRs untouched.
+ */
+
+ int apicid;
+
+ if (!cpu_has_ht)
+ return 1;
+
+ apicid = x86_cpu_to_apicid[cpu];
+ if ((apicid & 1U) == 0)
+ return 1;
+
+ if (x86_apicid_to_cpu[apicid & ~1U] == -1)
+ return 1;
+
+ return 0;
+}
+
+
static void ipi_handler (void *info)
/* [SUMMARY] Synchronisation handler. Executed by "other" CPUs.
[RETURNS] Nothing.
@@ -1003,19 +1031,21 @@
{
struct set_mtrr_data *data = info;
struct set_mtrr_context ctxt;
-
- set_mtrr_prepare (&ctxt);
+ int doit = may_manipulate_mtrr (smp_processor_id());
+
+ set_mtrr_prepare (&ctxt, doit);
/* Notify master that I've flushed and disabled my cache */
atomic_dec (&undone_count);
while (wait_barrier_execute) barrier ();
/* The master has cleared me to execute */
- (*set_mtrr_up) (data->smp_reg, data->smp_base, data->smp_size,
- data->smp_type, FALSE);
+ if (doit)
+ (*set_mtrr_up) (data->smp_reg, data->smp_base, data->smp_size,
+ data->smp_type, FALSE);
/* Notify master CPU that I've executed the function */
atomic_dec (&undone_count);
/* Wait for master to clear me to enable cache and return */
while (wait_barrier_cache_enable) barrier ();
- set_mtrr_done (&ctxt);
+ set_mtrr_done (&ctxt, doit);
} /* End Function ipi_handler */

static void set_mtrr_smp (unsigned int reg, unsigned long base,
@@ -1023,6 +1053,7 @@
{
struct set_mtrr_data data;
struct set_mtrr_context ctxt;
+ int doit = may_manipulate_mtrr (smp_processor_id());

data.smp_reg = reg;
data.smp_base = base;
@@ -1035,20 +1066,21 @@
if (smp_call_function (ipi_handler, &data, 1, 0) != 0)
panic ("mtrr: timed out waiting for other CPUs\n");
/* Flush and disable the local CPU's cache */
- set_mtrr_prepare (&ctxt);
+ set_mtrr_prepare (&ctxt, doit);
/* Wait for all other CPUs to flush and disable their caches */
while (atomic_read (&undone_count) > 0) barrier ();
/* Set up for completion wait and then release other CPUs to change MTRRs*/
atomic_set (&undone_count, smp_num_cpus - 1);
wait_barrier_execute = FALSE;
- (*set_mtrr_up) (reg, base, size, type, FALSE);
+ if (doit)
+ (*set_mtrr_up) (reg, base, size, type, FALSE);
/* Now wait for other CPUs to complete the function */
while (atomic_read (&undone_count) > 0) barrier ();
/* Now all CPUs should have finished the function. Release the barrier to
allow them to re-enable their caches and return from their interrupt,
then enable the local cache and return */
wait_barrier_cache_enable = FALSE;
- set_mtrr_done (&ctxt);
+ set_mtrr_done (&ctxt, doit);
} /* End Function set_mtrr_smp */


@@ -1889,7 +1921,7 @@
struct set_mtrr_context ctxt;
int i;

- set_mtrr_prepare (&ctxt); /* flush cache and enable MAPEN */
+ set_mtrr_prepare (&ctxt, 1); /* flush cache and enable MAPEN */

/* the CCRs are not contiguous */
for(i=0; i<4; i++) setCx86(CX86_CCR0 + i, ccr_state[i]);
@@ -1898,7 +1930,7 @@
cyrix_set_arr_up(i,
arr_state[i].base, arr_state[i].size, arr_state[i].type, FALSE);

- set_mtrr_done (&ctxt); /* flush cache and disable MAPEN */
+ set_mtrr_done (&ctxt, 1); /* flush cache and disable MAPEN */
} /* End Function cyrix_arr_init_secondary */

#endif
@@ -1926,7 +1958,7 @@
int i;
#endif

- set_mtrr_prepare (&ctxt); /* flush cache and enable MAPEN */
+ set_mtrr_prepare (&ctxt, 1); /* flush cache and enable MAPEN */

/* Save all CCRs locally */
ccr[0] = getCx86 (CX86_CCR0);
@@ -1975,7 +2007,7 @@
&arr_state[i].base, &arr_state[i].size, &arr_state[i].type);
#endif

- set_mtrr_done (&ctxt); /* flush cache and disable MAPEN */
+ set_mtrr_done (&ctxt, 1); /* flush cache and disable MAPEN */

if ( ccrc[5] ) printk ("mtrr: ARR usage was not enabled, enabled manually\n");
if ( ccrc[3] ) printk ("mtrr: ARR3 cannot be changed\n");
@@ -2075,14 +2107,14 @@
{
struct set_mtrr_context ctxt;

- set_mtrr_prepare (&ctxt);
+ set_mtrr_prepare (&ctxt, 1);

if(boot_cpu_data.x86_model==4)
centaur_mcr0_init();
else if(boot_cpu_data.x86_model==8 || boot_cpu_data.x86_model == 9)
centaur_mcr1_init();

- set_mtrr_done (&ctxt);
+ set_mtrr_done (&ctxt, 1);
} /* End Function centaur_mcr_init */

static int __init mtrr_setup(void)
@@ -2192,9 +2224,9 @@
/* Note that this is not ideal, since the cache is only flushed/disabled
for this CPU while the MTRRs are changed, but changing this requires
more invasive changes to the way the kernel boots */
- set_mtrr_prepare (&ctxt);
+ set_mtrr_prepare (&ctxt, 1);
mask = set_mtrr_state (&smp_mtrr_state, &ctxt);
- set_mtrr_done (&ctxt);
+ set_mtrr_done (&ctxt, 1);
/* Use the atomic bitops to update the global mask */
for (count = 0; count < sizeof mask * 8; ++count)
{





2002-01-24 21:36:21

by Timothy Covell

[permalink] [raw]
Subject: Re: [PATCH]: Fix MTRR handling on HT CPUs (improved)

On Thursday 24 January 2002 14:57, Martin Wilck wrote:
> On Intel processors with "Jackson" technology (also HT, HyperThreading),
> two logical processors share certain registers, in particular the MTRR
> registers. The Linux way of manipulating these registers may lead to
> inconsistent MTRR settings or total disabling of cache.

I thought that Intel was purposely downplaying HT support on the
processors due to performance issues and M$ didn't support it
likewise. Is this another case of Linux being out in the front of things?
Are you to a point that you can benchmark it and recommend when
to enable it or not?

--
[email protected].

2002-01-24 21:46:42

by Linus Torvalds

[permalink] [raw]
Subject: Re: [PATCH]: Fix MTRR handling on HT CPUs (improved)


On Thu, 24 Jan 2002, Martin Wilck wrote:
>
> I strongly suspected somebody else must have hit this problem before, but
> intensive research did show up nothing. Also my first post on LK
> received no "hey, that's old stuff" answer. So here I go.

There is a patch from Asit Mallik floating around, which I've applied in
my tree. Most of us mere mortals can't test it yet, of course.

Linus

2002-01-24 22:30:43

by Dave Jones

[permalink] [raw]
Subject: Re: [PATCH]: Fix MTRR handling on HT CPUs (improved)

On Thu, 24 Jan 2002, Linus Torvalds wrote:

> > I strongly suspected somebody else must have hit this problem before, but
> > intensive research did show up nothing. Also my first post on LK
> > received no "hey, that's old stuff" answer. So here I go.
> There is a patch from Asit Mallik floating around, which I've applied in
> my tree. Most of us mere mortals can't test it yet, of course.

There are at least three versions of that patch by different people.
Marcelo pointed me ot one from someone at Intel. I suspect they;re all
largely the same though.

--
| Dave Jones. http://www.codemonkey.org.uk
| SuSE Labs

2002-01-25 09:35:11

by Martin Wilck

[permalink] [raw]
Subject: Re: [PATCH]: Fix MTRR handling on HT CPUs (improved)

On Thu, 24 Jan 2002, Linus Torvalds wrote:

> There is a patch from Asit Mallik floating around, which I've applied in
> my tree. Most of us mere mortals can't test it yet, of course.

The patch looks good. I'll test it presently. If you don't hear anything
from me later today, it means it was successfully tested.

Martin

--
Martin Wilck Phone: +49 5251 8 15113
Fujitsu Siemens Computers Fax: +49 5251 8 20409
Heinz-Nixdorf-Ring 1 mailto:[email protected]
D-33106 Paderborn http://www.fujitsu-siemens.com/primergy





2002-01-25 11:03:17

by Martin Wilck

[permalink] [raw]
Subject: Re: [PATCH]: Fix MTRR handling on HT CPUs (improved)


The patch from Intel works fine.
I recommend its inclusion into the official kernels.

--
Martin Wilck Phone: +49 5251 8 15113
Fujitsu Siemens Computers Fax: +49 5251 8 20409
Heinz-Nixdorf-Ring 1 mailto:[email protected]
D-33106 Paderborn http://www.fujitsu-siemens.com/primergy





2002-01-25 17:56:38

by Alan

[permalink] [raw]
Subject: Re: [PATCH]: Fix MTRR handling on HT CPUs (improved)

> > intensive research did show up nothing. Also my first post on LK
> > received no "hey, that's old stuff" answer. So here I go.
>
> There is a patch from Asit Mallik floating around, which I've applied in
> my tree. Most of us mere mortals can't test it yet, of course.

2.4 has a nice clean 2 line patch in to cover this or should have done
providing it didnt get lost in the merge.

2002-01-25 18:08:01

by Alan

[permalink] [raw]
Subject: Re: [PATCH]: Fix MTRR handling on HT CPUs (improved)

> I strongly suspected somebody else must have hit this problem before, but
> intensive research did show up nothing. Also my first post on LK
> received no "hey, that's old stuff" answer. So here I go.

A tiny patch was posted about 4-6 months ago

The patch is total overkill. Just remove the error reporting if the right
firmware was already loaded. You've written a fixup wrapper around a
rather nonsensical erorr check for an existing non-error.

The same will occur with CPU hot plugging in the future as well as ACPI
power saving sometimes and in those cases the patch you posted doesn't
help anyway.

Alan

2002-01-25 18:19:42

by Dave Jones

[permalink] [raw]
Subject: Re: [PATCH]: Fix MTRR handling on HT CPUs (improved)

On Fri, Jan 25, 2002 at 06:08:34PM +0000, Alan Cox wrote:
> 2.4 has a nice clean 2 line patch in to cover this or should have done
> providing it didnt get lost in the merge.

The microcode driver fix was ~2 lines. The MTRR ones I've seen
have been a little more involved. Unless you saw something I
didn't..

--
| Dave Jones. http://www.codemonkey.org.uk
| SuSE Labs

2002-01-25 18:30:33

by Linus Torvalds

[permalink] [raw]
Subject: Re: [PATCH]: Fix MTRR handling on HT CPUs (improved)


On Fri, 25 Jan 2002, Alan Cox wrote:
>
> > I strongly suspected somebody else must have hit this problem before, but
> > intensive research did show up nothing. Also my first post on LK
> > received no "hey, that's old stuff" answer. So here I go.
>
> A tiny patch was posted about 4-6 months ago
>
> The patch is total overkill. Just remove the error reporting if the right
> firmware was already loaded. You've written a fixup wrapper around a
> rather nonsensical erorr check for an existing non-error.

You are making the same mistake I did when I saw the patch.

This is the _MTRR_ setting, not the microcode loading.

They both had the same issues with HT - and the microcode fix was indeed
just to make sure that the microcode hadn't already been loaded (together
with some locking).

The Intel MTRR patch is similar - add some locking, and add some logic to
just not do it on the right CPU (you're _not_ supposed to read to see if
you are writing the same value: MTRR's can at least in theory have
side-effects, so it's not the same check as for the microcode update).

Linus

2002-01-25 18:42:33

by Alan

[permalink] [raw]
Subject: Re: [PATCH]: Fix MTRR handling on HT CPUs (improved)

As Dave pointed out I was mixing them

> just not do it on the right CPU (you're _not_ supposed to read to see if
> you are writing the same value: MTRR's can at least in theory have
> side-effects, so it's not the same check as for the microcode update).

So why not just set it twice - surely that is harmless ? Why add complex
code ?

2002-01-25 18:52:19

by Arjan van de Ven

[permalink] [raw]
Subject: Re: [PATCH]: Fix MTRR handling on HT CPUs (improved)

In article <[email protected]> you wrote:
> As Dave pointed out I was mixing them

>> just not do it on the right CPU (you're _not_ supposed to read to see if
>> you are writing the same value: MTRR's can at least in theory have
>> side-effects, so it's not the same check as for the microcode update).

> So why not just set it twice - surely that is harmless ? Why add complex
> code ?

mtrr code does

"read value" -> store in variable
"set uncached"
<do lots of fiddling>
"write stored value"

on all cpus precisely in parallel.

Now... on HT the second half will store the value the first half just set to
uncached. and worse: it will RESTORE that into the final mtrr..

2002-01-25 18:58:42

by Dave Jones

[permalink] [raw]
Subject: Re: [PATCH]: Fix MTRR handling on HT CPUs (improved)

On Fri, 25 Jan 2002, Alan Cox wrote:

> > just not do it on the right CPU (you're _not_ supposed to read to see if
> > you are writing the same value: MTRR's can at least in theory have
> > side-effects, so it's not the same check as for the microcode update).
> So why not just set it twice - surely that is harmless ? Why add complex
> code ?

I wondered the same thing of the microcode changes.
Since for the commoncase (ie, non-HT) it now has the side-effect of
doing this..

microcode: CPU0 updated from revision 7 to 7, date-10151999

which looks odd in comparison to the old "already current" msg.

--
| Dave Jones. http://www.codemonkey.org.uk
| SuSE Labs

2002-01-25 20:56:11

by Linus Torvalds

[permalink] [raw]
Subject: Re: [PATCH]: Fix MTRR handling on HT CPUs (improved)


On Fri, 25 Jan 2002, Alan Cox wrote:
> As Dave pointed out I was mixing them
>
> > just not do it on the right CPU (you're _not_ supposed to read to see if
> > you are writing the same value: MTRR's can at least in theory have
> > side-effects, so it's not the same check as for the microcode update).
>
> So why not just set it twice - surely that is harmless ? Why add complex
> code ?

At the _least_ you have to serialize the thing, which is most of what the
patch actually does. Writing the MTRR's in parallel from two different
cores at the same time is just obviously bogus, the same way it is
obviously bogus to try to update the microcode at the same time.

Linus

2002-01-28 08:12:20

by Martin Wilck

[permalink] [raw]
Subject: Re: [PATCH]: Fix MTRR handling on HT CPUs (improved)

> > So why not just set it twice - surely that is harmless ? Why add complex
> > code ?
>
> At the _least_ you have to serialize the thing, which is most of what the
> patch actually does.

I suppose you're talking about the Intel patch, not mine, which is
obviously out. I admit it was too complex.

The problem with the original code is that it combines reading the status
of the MTRR default register and disabling cache in a single step that
is performed simultaneously by all CPUs.
The patch from Intel inserts synchronization between these two steps,
thereby avoiding that the second CPU reads a "defaults status" that in
reality is the "cache disabled" status the first CPU had set before.
This suffices to fix the problem.

Martin

--
Martin Wilck Phone: +49 5251 8 15113
Fujitsu Siemens Computers Fax: +49 5251 8 20409
Heinz-Nixdorf-Ring 1 mailto:[email protected]
D-33106 Paderborn http://www.fujitsu-siemens.com/primergy





2002-02-13 11:40:38

by Martin Wilck

[permalink] [raw]
Subject: SMT, again (was: Re: [PATCH]: Fix MTRR handling on HT CPUs)


I just found out that Intel specifies that on SMT-enabled
("Jackson") systems the BIOS MP tables list only the physical CPUs.
Logical CPUs will only be available through the ACPI tables.

See http://www.intel.com/technology/hyperthread/platform_nexgen/,
in particular sld014.htm, sld021.htm.

This will basically obsolete the patch we were discussing, if BIOS
manufacturers comply to that spec, because linux will only see the
2 physical CPUs. (The problem we discovered was caused by our BIOS not
complying to the spec).

Of course, it would also mean that Linux will only run on ~70% of the CPU
power that Win2k/XP systems will have available on such systems.

All my attempts to get ACPI running on our SMT-enabled system have failed
so far (I'm working on a bug report on that for linux-acpi).

A possible workaround would be the "processor affinity algorithm"
sketched in sld021.htm, but it may be unreliable because it overrides
BIOS settings (BIOS-diabled CPUs) that are available only through ACPI.

Sorry to bother if you knew this already.

Martin

--
Martin Wilck Phone: +49 5251 8 15113
Fujitsu Siemens Computers Fax: +49 5251 8 20409
Heinz-Nixdorf-Ring 1 mailto:[email protected]
D-33106 Paderborn http://www.fujitsu-siemens.com/primergy





2002-02-13 12:02:24

by Alan

[permalink] [raw]
Subject: Re: SMT, again (was: Re: [PATCH]: Fix MTRR handling on HT CPUs)

> I just found out that Intel specifies that on SMT-enabled
> ("Jackson") systems the BIOS MP tables list only the physical CPUs.
> Logical CPUs will only be available through the ACPI tables.

Yep. Thats why the miniacpi scanning stuff is in the current kernel

> All my attempts to get ACPI running on our SMT-enabled system have failed
> so far (I'm working on a bug report on that for linux-acpi).

You don't need ACPI itself. See arch/i386/kernel/acpitable.c. Intel contributed
patches to handle this stuff already.

Alan