Note, this code is identical in 2.4 so this fix applies there as well.
A user reported that the x87 "tag word" value reported by PTRACE_GETFPREGS
did not match what the "fnsave" instruction stores for the same FPU state.
It turns out to be a bug in the conversion from fxsave format to fnsave
format. (This can also bite interrupted FPU state restored by a signal
handler.) The tag bits (in both formats) are stored in physical register
order, though the register contents are stored in x87 register stack order.
This is barely mentioned in the processor manuals, and easy to overlook.
It's even more confusing when you read the AMD64 manuals, which erroneously
claim that fxsave stores the register contents in physical order as well.
Fortunately, only the manuals differ and all the chips actually agree.
Signed-off-by: Roland McGrath <[email protected]>
--- linux-2.6/arch/i386/kernel/i387.c
+++ linux-2.6/arch/i386/kernel/i387.c
@@ -111,6 +111,7 @@ static inline unsigned short twd_i387_to
static inline unsigned long twd_fxsr_to_i387( struct i387_fxsave_struct *fxsave )
{
struct _fpxreg *st = NULL;
+ const unsigned int tos = (fxsave->swd >> 11) & 7;
unsigned long twd = (unsigned long) fxsave->twd;
unsigned long tag;
unsigned long ret = 0xffff0000u;
@@ -120,7 +121,10 @@ static inline unsigned long twd_fxsr_to_
for ( i = 0 ; i < 8 ; i++ ) {
if ( twd & 0x1 ) {
- st = (struct _fpxreg *) FPREG_ADDR( fxsave, i );
+ /* The tag bits are saved in physical order,
+ but the registers are saved in stack order. */
+ st = (struct _fpxreg *) FPREG_ADDR(fxsave,
+ (i + 8 - tos) & 7);
switch ( st->exponent & 0x7fff ) {
case 0x7fff:
--- linux-2.6/arch/x86_64/ia32/fpu32.c
+++ linux-2.6/arch/x86_64/ia32/fpu32.c
@@ -28,6 +28,7 @@ static inline unsigned short twd_i387_to
static inline unsigned long twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave)
{
struct _fpxreg *st = NULL;
+ const unsigned int tos = (fxsave->swd >> 11) & 7;
unsigned long twd = (unsigned long) fxsave->twd;
unsigned long tag;
unsigned long ret = 0xffff0000;
@@ -37,7 +38,10 @@ static inline unsigned long twd_fxsr_to_
for (i = 0 ; i < 8 ; i++) {
if (twd & 0x1) {
- st = (struct _fpxreg *) FPREG_ADDR( fxsave, i );
+ /* The tag bits are saved in physical order,
+ but the registers are saved in stack order. */
+ st = (struct _fpxreg *) FPREG_ADDR(fxsave,
+ (i + 8 - tos) & 7);
switch (st->exponent & 0x7fff) {
case 0x7fff:
On Sun, 16 Jan 2005, Roland McGrath wrote:
>
> Note, this code is identical in 2.4 so this fix applies there as well.
Uhh.. "(i + 8 - tos) & 7" makes no sense.
It's not only mathematically the same as "(i - tos) & 7", but I don't even
see how you got to the "+ 8" in the first place..
Also, we're not using C++, so we don't do the "const" part of
const unsigned int tos = ...
thing. So suggested version with trivial fixes appended (technically, you
don't need the "& 7" on the tos calculation either, but hey, at least that
one I see why it's there, and agree with - the compiler may well be able
to optimize both of these things away, so keeping the source in its
"logical" format makes sense). Does this still work for you?
> Fortunately, only the manuals differ and all the chips actually agree.
Hmm.
What happens if it was in MMX mode? If I remember correctly, different
CPU's do different things for MMX (some rotate them so that "mmx0" is
always the same as "st(0)", some don't, and/or have separate hw registers
altogether for it)? I realize we don't probably care, I'm just wondering
if somebody knows...
Linus
----
===== arch/i386/kernel/i387.c 1.22 vs edited =====
--- 1.22/arch/i386/kernel/i387.c 2004-10-28 00:39:55 -07:00
+++ edited/arch/i386/kernel/i387.c 2005-01-16 19:38:28 -08:00
@@ -111,16 +111,17 @@
static inline unsigned long twd_fxsr_to_i387( struct i387_fxsave_struct *fxsave )
{
struct _fpxreg *st = NULL;
+ unsigned int tos = (fxsave->swd >> 11) & 7;
unsigned long twd = (unsigned long) fxsave->twd;
unsigned long tag;
unsigned long ret = 0xffff0000u;
int i;
-#define FPREG_ADDR(f, n) ((char *)&(f)->st_space + (n) * 16);
+#define FPREG_ADDR(f, n) ((void *)&(f)->st_space + (n) * 16);
for ( i = 0 ; i < 8 ; i++ ) {
if ( twd & 0x1 ) {
- st = (struct _fpxreg *) FPREG_ADDR( fxsave, i );
+ st = FPREG_ADDR( fxsave, (i - tos) & 7 );
switch ( st->exponent & 0x7fff ) {
case 0x7fff:
===== arch/x86_64/ia32/fpu32.c 1.8 vs edited =====
--- 1.8/arch/x86_64/ia32/fpu32.c 2004-05-30 12:49:35 -07:00
+++ edited/arch/x86_64/ia32/fpu32.c 2005-01-16 19:39:20 -08:00
@@ -28,16 +28,17 @@
static inline unsigned long twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave)
{
struct _fpxreg *st = NULL;
+ unsigned long tos = (fxsave->swd >> 11) & 7;
unsigned long twd = (unsigned long) fxsave->twd;
unsigned long tag;
unsigned long ret = 0xffff0000;
int i;
-#define FPREG_ADDR(f, n) ((char *)&(f)->st_space + (n) * 16);
+#define FPREG_ADDR(f, n) ((void *)&(f)->st_space + (n) * 16);
for (i = 0 ; i < 8 ; i++) {
if (twd & 0x1) {
- st = (struct _fpxreg *) FPREG_ADDR( fxsave, i );
+ st = FPREG_ADDR( fxsave, (i - tos) & 7 );
switch (st->exponent & 0x7fff) {
case 0x7fff:
> Also, we're not using C++, so we don't do the "const" part of
Or C89, apparently.
> What happens if it was in MMX mode?
I don't really know anything about that stuff, but what the manuals say is
that using any MMX instruction resets the tag bits and clears the TOS field.
So the patch doesn't change anything for that case (tos == 0).
> If I remember correctly, different CPU's do different things for MMX
> (some rotate them so that "mmx0" is always the same as "st(0)", some
> don't, and/or have separate hw registers altogether for it)? I realize we
> don't probably care, I'm just wondering if somebody knows...
I don't really know, but I have two flavors of manual in front of me.
I suspect you remember incorrectly, or perhaps in the past there was
something like that. AFAICT, it's always the case that MMX clears the TOS
bits and mmx0 = physical fpr0, which also = st(0) since TOS = 0.
Thanks,
Roland