I need to use MMX and/or SSE registers in my code.
Have looked at arch/i386/lib/mmx.c,
kernel_fpu_begin();
muck_with_MMX_regs();
kernel_fpu_end();
seems to be the way to go.
My code is in .S, so I did the equivalent
(see <------), but routine crashed and burned
as soon as it tripped over kernel_fpu_begin.
Commenting KERNEL_FPU_BEGIN made code work
(I was able to boot with root=/dev/fd0)
but it won't be usable (will trash userspace
FP/MMX/SSE registers).
Does it have anything to do with the fact I'm
using this code from csum_partial[_copy_generic] ?
--
vda
...
#define KERNEL_FPU_BEGIN \
call kernel_fpu_begin
#define KERNEL_FPU_END(r) \
movl %cr0, r ;\
orl $8, r ;\
movl r, %cr0
...
# "big chunks" loop
PREFETCH((%esi)) # Prefetch _each_ cacheline
PREFETCH(32(%esi))
PREFETCH(64(%esi))
PREFETCH(64+32(%esi))
PREFETCH(128(%esi))
PREFETCH(128+32(%esi))
PREFETCH(192(%esi))
PREFETCH(192+32(%esi))
KERNEL_FPU_BEGIN <--------------
clc
#define ROUND(x,r) \
SRC( movq x(%esi), r ); \
adcl x(%esi), %eax ; \
adcl x+4(%esi), %eax ; \
DST( movntq r, x(%edi) );
10:
PREFETCH(256(%esi))
ROUND(%mm0)
ROUND(8,%mm0)
ROUND(16,%mm0)
ROUND(24,%mm0)
lea ITER_SZ(%esi), %esi
lea ITER_SZ(%edi), %edi
loop 10b
adcl $0, %eax
sfence
KERNEL_FPU_END(%ebx) <------------
> Does it have anything to do with the fact I'm
> using this code from csum_partial[_copy_generic] ?
You cannot use kernel_fpu_begin in interrupt context,
and the csum copy functions can be called from that in some
cases (e.g. TCP retransmit timer). You need to check for
in_interrupt() too.
I think the problem in your case is that you don't save/restore
your registers properly around the function call. C calling
conventions allow to clobber some of them and you don't seem
to handle that.
-Andi