2005-02-07 06:24:10

by Linus Torvalds

[permalink] [raw]
Subject: out-of-line x86 "put_user()" implementation


I was looking at some of the code we generate, and happened to notice that
we have this strange situation where the x86 "get_user()" macros generate
out-of-line code to do all the address verification etc, but the
"put_user()" ones do not, and do everything inline.

I also noticed that (probably as a result of this), our "put_user()" on
old i386 machines does not do the full magic manual page-following. Which
means that copy-on-write doesn't necessarily work right due to the broken
paging hw on the original 386 core.

I didn't fix the second part, but at least making things out-of-line makes
it possible. And making "put_user()" be out-of-line seemed quite doable.

I no longer use x86 as my main machine, so this patch is totally untested.
I've compiled it to see that things look somewhat sane, but that doesn't
mean much. If I forgot some register or screwed something else up, this
will result in a totally nonworking kernel, but I thought that maybe
somebody else would be interested in looking at whether this (a) works,
(b) migth even shrink the kernel and (c) might make us able to DTRT wrt
the page table following crud (old i386 cores may be hard to find these
days, so maybe people don't care).

Linus

----
# This is a BitKeeper generated diff -Nru style patch.
#
# ChangeSet
# 2005/02/06 22:10:04-08:00 [email protected]
# x86: make "put_user()" be out-of-line
#
# It's really too big to be inlined.
#
# arch/i386/lib/putuser.S
# 2005/02/06 22:09:53-08:00 [email protected] +87 -0
#
# include/asm-i386/uaccess.h
# 2005/02/06 22:09:53-08:00 [email protected] +27 -3
# x86: make "put_user()" be out-of-line
#
# It's really too big to be inlined.
#
# arch/i386/lib/putuser.S
# 2005/02/06 22:09:53-08:00 [email protected] +0 -0
# BitKeeper file /home/torvalds/v2.6/linux/arch/i386/lib/putuser.S
#
# arch/i386/lib/Makefile
# 2005/02/06 22:09:53-08:00 [email protected] +1 -1
# x86: make "put_user()" be out-of-line
#
# It's really too big to be inlined.
#
# arch/i386/kernel/i386_ksyms.c
# 2005/02/06 22:09:53-08:00 [email protected] +5 -0
# x86: make "put_user()" be out-of-line
#
# It's really too big to be inlined.
#
diff -Nru a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c
--- a/arch/i386/kernel/i386_ksyms.c 2005-02-06 22:12:01 -08:00
+++ b/arch/i386/kernel/i386_ksyms.c 2005-02-06 22:12:01 -08:00
@@ -97,6 +97,11 @@
EXPORT_SYMBOL(__get_user_2);
EXPORT_SYMBOL(__get_user_4);

+EXPORT_SYMBOL(__put_user_1);
+EXPORT_SYMBOL(__put_user_2);
+EXPORT_SYMBOL(__put_user_4);
+EXPORT_SYMBOL(__put_user_8);
+
EXPORT_SYMBOL(strpbrk);
EXPORT_SYMBOL(strstr);

diff -Nru a/arch/i386/lib/Makefile b/arch/i386/lib/Makefile
--- a/arch/i386/lib/Makefile 2005-02-06 22:12:01 -08:00
+++ b/arch/i386/lib/Makefile 2005-02-06 22:12:01 -08:00
@@ -3,7 +3,7 @@
#


-lib-y = checksum.o delay.o usercopy.o getuser.o memcpy.o strstr.o \
+lib-y = checksum.o delay.o usercopy.o getuser.o putuser.o memcpy.o strstr.o \
bitops.o

lib-$(CONFIG_X86_USE_3DNOW) += mmx.o
diff -Nru a/arch/i386/lib/putuser.S b/arch/i386/lib/putuser.S
--- /dev/null Wed Dec 31 16:00:00 196900
+++ b/arch/i386/lib/putuser.S 2005-02-06 22:12:01 -08:00
@@ -0,0 +1,87 @@
+/*
+ * __put_user functions.
+ *
+ * (C) Copyright 2005 Linus Torvalds
+ *
+ * These functions have a non-standard call interface
+ * to make them more efficient, especially as they
+ * return an error value in addition to the "real"
+ * return value.
+ */
+#include <asm/thread_info.h>
+
+
+/*
+ * __put_user_X
+ *
+ * Inputs: %eax[:%edx] contains the data
+ * %ecx contains the address
+ *
+ * Outputs: %eax is error code (0 or -EFAULT)
+ * %ecx is corrupted
+ *
+ * These functions should not modify any other registers,
+ * as they get called from within inline assembly.
+ */
+
+#define ENTER pushl %ecx ; pushl %ebx ; GET_THREAD_INFO(%ebx)
+#define EXIT popl %ebx ; popl %ecx ; ret
+
+.text
+.align 4
+.globl __put_user_1
+__put_user_1:
+ ENTER
+ cmpl TI_addr_limit(%ebx),%ecx
+ jae bad_put_user
+1: movb %al,(%ecx)
+ xorl %eax,%eax
+ EXIT
+
+.align 4
+.globl __put_user_2
+__put_user_2:
+ ENTER
+ addl $1,%ecx
+ jc bad_put_user
+ cmpl TI_addr_limit(%ebx),%ecx
+ jae bad_put_user
+2: movw %ax,-1(%ecx)
+ xorl %eax,%eax
+ EXIT
+
+.align 4
+.globl __put_user_4
+__put_user_4:
+ ENTER
+ addl $3,%ecx
+ jc bad_put_user
+ cmpl TI_addr_limit(%ebx),%ecx
+ jae bad_put_user
+3: movl %eax,-3(%ecx)
+ xorl %eax,%eax
+ EXIT
+
+.align 4
+.globl __put_user_8
+__put_user_8:
+ ENTER
+ addl $7,%ecx
+ jc bad_put_user
+ cmpl TI_addr_limit(%ebx),%ecx
+ jae bad_put_user
+3: movl %eax,-7(%ecx)
+4: movl %edx,-3(%ecx)
+ xorl %eax,%eax
+ EXIT
+
+bad_put_user:
+ movl $-14,%eax
+ EXIT
+
+.section __ex_table,"a"
+ .long 1b,bad_put_user
+ .long 2b,bad_put_user
+ .long 3b,bad_put_user
+ .long 4b,bad_put_user
+.previous
diff -Nru a/include/asm-i386/uaccess.h b/include/asm-i386/uaccess.h
--- a/include/asm-i386/uaccess.h 2005-02-06 22:12:01 -08:00
+++ b/include/asm-i386/uaccess.h 2005-02-06 22:12:01 -08:00
@@ -185,6 +185,21 @@

extern void __put_user_bad(void);

+/*
+ * Strange magic calling convention: pointer in %ecx,
+ * value in %eax(:%edx), return value in %eax, no clobbers.
+ */
+extern void __put_user_1(void);
+extern void __put_user_2(void);
+extern void __put_user_4(void);
+extern void __put_user_8(void);
+
+#define __put_user_1(x, ptr) __asm__ __volatile__("call __put_user_1":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_2(x, ptr) __asm__ __volatile__("call __put_user_2":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_4(x, ptr) __asm__ __volatile__("call __put_user_4":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_8(x, ptr) __asm__ __volatile__("call __put_user_8":"=A" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_X(x, ptr) __asm__ __volatile__("call __put_user_X":"=a" (__ret_pu):"c" (ptr))
+
/**
* put_user: - Write a simple value into user space.
* @x: Value to copy to user space.
@@ -201,9 +216,18 @@
*
* Returns zero on success, or -EFAULT on error.
*/
-#define put_user(x,ptr) \
- __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
-
+#define put_user(x,ptr) \
+({ int __ret_pu; \
+ __chk_user_ptr(ptr); \
+ switch(sizeof(*(ptr))) { \
+ case 1: __put_user_1(x, ptr); break; \
+ case 2: __put_user_2(x, ptr); break; \
+ case 4: __put_user_4(x, ptr); break; \
+ case 8: __put_user_8(x, ptr); break; \
+ default:__put_user_X(x, ptr); break; \
+ } \
+ __ret_pu; \
+})

/**
* __get_user: - Get a simple variable from user space, with less checking.


2005-02-07 11:44:29

by Ingo Molnar

[permalink] [raw]
Subject: Re: out-of-line x86 "put_user()" implementation


* Linus Torvalds <[email protected]> wrote:

> I no longer use x86 as my main machine, so this patch is totally
> untested. I've compiled it to see that things look somewhat sane, but
> that doesn't mean much. If I forgot some register or screwed something
> else up, this will result in a totally nonworking kernel, but I
> thought that maybe somebody else would be interested in looking at
> whether this (a) works, (b) migth even shrink the kernel and (c) might
> make us able to DTRT wrt the page table following crud (old i386 cores
> may be hard to find these days, so maybe people don't care).

boots fine and shrinks the image size quite noticeably:

[Nr] Name Type Addr Off Size
[ 1] .text PROGBITS c0100000 001000 2771a9 [vmlinux-orig]
[ 1] .text PROGBITS c0100000 001000 2742dd [vmlinux-patched]

that's 11980 bytes off a 2585001 bytes .text, a 0.5% size reduction.
This patch we want ...

Ingo

2005-02-08 01:20:44

by Linus Torvalds

[permalink] [raw]
Subject: Re: out-of-line x86 "put_user()" implementation



On Mon, 7 Feb 2005, Ingo Molnar wrote:
>
> boots fine and shrinks the image size quite noticeably:
>
> [Nr] Name Type Addr Off Size
> [ 1] .text PROGBITS c0100000 001000 2771a9 [vmlinux-orig]
> [ 1] .text PROGBITS c0100000 001000 2742dd [vmlinux-patched]
>
> that's 11980 bytes off a 2585001 bytes .text, a 0.5% size reduction.
> This patch we want ...

Goodie. Here's a slightly more recent version, where I cleaned up the
assembly code (no need to save %ecx if we just update %ebx instead, which
makes the code a bit more readable too - and doing it this way should
hopefully make it easier for an out-of-order CPU to start the memops
earlier too. Who knows..)

I'm not going to put this into 2.6.11, since I worry about compiler
interactions, but the more people who test it anyway, the better.

Linus

-----
# This is a BitKeeper generated diff -Nru style patch.
#
# ChangeSet
# 2005/02/07 08:14:28-08:00 [email protected]
# x86: make "put_user()" be out-of-line
#
# It's really too big to be inlined.
#
# Ingo tests and reports: this shrinks his kernel text size by
# about 12kB (roughly 0.5%)
#
# arch/i386/lib/putuser.S
# 2005/02/07 08:14:17-08:00 [email protected] +86 -0
#
# include/asm-i386/uaccess.h
# 2005/02/07 08:14:17-08:00 [email protected] +27 -3
# x86: make "put_user()" be out-of-line
#
# arch/i386/lib/putuser.S
# 2005/02/07 08:14:17-08:00 [email protected] +0 -0
# BitKeeper file /home/torvalds/v2.6/linux/arch/i386/lib/putuser.S
#
# arch/i386/lib/Makefile
# 2005/02/07 08:14:17-08:00 [email protected] +1 -1
# x86: make "put_user()" be out-of-line
#
# arch/i386/kernel/i386_ksyms.c
# 2005/02/07 08:14:17-08:00 [email protected] +5 -0
# x86: make "put_user()" be out-of-line
#
diff -Nru a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c
--- a/arch/i386/kernel/i386_ksyms.c 2005-02-07 17:16:32 -08:00
+++ b/arch/i386/kernel/i386_ksyms.c 2005-02-07 17:16:32 -08:00
@@ -97,6 +97,11 @@
EXPORT_SYMBOL(__get_user_2);
EXPORT_SYMBOL(__get_user_4);

+EXPORT_SYMBOL(__put_user_1);
+EXPORT_SYMBOL(__put_user_2);
+EXPORT_SYMBOL(__put_user_4);
+EXPORT_SYMBOL(__put_user_8);
+
EXPORT_SYMBOL(strpbrk);
EXPORT_SYMBOL(strstr);

diff -Nru a/arch/i386/lib/Makefile b/arch/i386/lib/Makefile
--- a/arch/i386/lib/Makefile 2005-02-07 17:16:32 -08:00
+++ b/arch/i386/lib/Makefile 2005-02-07 17:16:32 -08:00
@@ -3,7 +3,7 @@
#


-lib-y = checksum.o delay.o usercopy.o getuser.o memcpy.o strstr.o \
+lib-y = checksum.o delay.o usercopy.o getuser.o putuser.o memcpy.o strstr.o \
bitops.o

lib-$(CONFIG_X86_USE_3DNOW) += mmx.o
diff -Nru a/arch/i386/lib/putuser.S b/arch/i386/lib/putuser.S
--- /dev/null Wed Dec 31 16:00:00 196900
+++ b/arch/i386/lib/putuser.S 2005-02-07 17:16:32 -08:00
@@ -0,0 +1,86 @@
+/*
+ * __put_user functions.
+ *
+ * (C) Copyright 2005 Linus Torvalds
+ *
+ * These functions have a non-standard call interface
+ * to make them more efficient, especially as they
+ * return an error value in addition to the "real"
+ * return value.
+ */
+#include <asm/thread_info.h>
+
+
+/*
+ * __put_user_X
+ *
+ * Inputs: %eax[:%edx] contains the data
+ * %ecx contains the address
+ *
+ * Outputs: %eax is error code (0 or -EFAULT)
+ *
+ * These functions should not modify any other registers,
+ * as they get called from within inline assembly.
+ */
+
+#define ENTER pushl %ebx ; GET_THREAD_INFO(%ebx)
+#define EXIT popl %ebx ; ret
+
+.text
+.align 4
+.globl __put_user_1
+__put_user_1:
+ ENTER
+ cmpl TI_addr_limit(%ebx),%ecx
+ jae bad_put_user
+1: movb %al,(%ecx)
+ xorl %eax,%eax
+ EXIT
+
+.align 4
+.globl __put_user_2
+__put_user_2:
+ ENTER
+ movl TI_addr_limit(%ebx),%ebx
+ subl $1,%ebx
+ cmpl %ebx,%ecx
+ jae bad_put_user
+2: movw %ax,(%ecx)
+ xorl %eax,%eax
+ EXIT
+
+.align 4
+.globl __put_user_4
+__put_user_4:
+ ENTER
+ movl TI_addr_limit(%ebx),%ebx
+ subl $3,%ebx
+ cmpl %ebx,%ecx
+ jae bad_put_user
+3: movl %eax,(%ecx)
+ xorl %eax,%eax
+ EXIT
+
+.align 4
+.globl __put_user_8
+__put_user_8:
+ ENTER
+ movl TI_addr_limit(%ebx),%ebx
+ subl $7,%ebx
+ cmpl %ebx,%ecx
+ jae bad_put_user
+3: movl %eax,(%ecx)
+4: movl %edx,4(%ecx)
+ xorl %eax,%eax
+ EXIT
+
+bad_put_user:
+ movl $-14,%eax
+ EXIT
+
+.section __ex_table,"a"
+ .long 1b,bad_put_user
+ .long 2b,bad_put_user
+ .long 3b,bad_put_user
+ .long 4b,bad_put_user
+.previous
diff -Nru a/include/asm-i386/uaccess.h b/include/asm-i386/uaccess.h
--- a/include/asm-i386/uaccess.h 2005-02-07 17:16:32 -08:00
+++ b/include/asm-i386/uaccess.h 2005-02-07 17:16:32 -08:00
@@ -185,6 +185,21 @@

extern void __put_user_bad(void);

+/*
+ * Strange magic calling convention: pointer in %ecx,
+ * value in %eax(:%edx), return value in %eax, no clobbers.
+ */
+extern void __put_user_1(void);
+extern void __put_user_2(void);
+extern void __put_user_4(void);
+extern void __put_user_8(void);
+
+#define __put_user_1(x, ptr) __asm__ __volatile__("call __put_user_1":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_2(x, ptr) __asm__ __volatile__("call __put_user_2":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_4(x, ptr) __asm__ __volatile__("call __put_user_4":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_8(x, ptr) __asm__ __volatile__("call __put_user_8":"=A" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_X(x, ptr) __asm__ __volatile__("call __put_user_X":"=a" (__ret_pu):"c" (ptr))
+
/**
* put_user: - Write a simple value into user space.
* @x: Value to copy to user space.
@@ -201,9 +216,18 @@
*
* Returns zero on success, or -EFAULT on error.
*/
-#define put_user(x,ptr) \
- __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
-
+#define put_user(x,ptr) \
+({ int __ret_pu; \
+ __chk_user_ptr(ptr); \
+ switch(sizeof(*(ptr))) { \
+ case 1: __put_user_1(x, ptr); break; \
+ case 2: __put_user_2(x, ptr); break; \
+ case 4: __put_user_4(x, ptr); break; \
+ case 8: __put_user_8(x, ptr); break; \
+ default:__put_user_X(x, ptr); break; \
+ } \
+ __ret_pu; \
+})

/**
* __get_user: - Get a simple variable from user space, with less checking.

2005-02-08 18:59:40

by Pavel Machek

[permalink] [raw]
Subject: Re: out-of-line x86 "put_user()" implementation

Hi!

> > boots fine and shrinks the image size quite noticeably:
> >
> > [Nr] Name Type Addr Off Size
> > [ 1] .text PROGBITS c0100000 001000 2771a9 [vmlinux-orig]
> > [ 1] .text PROGBITS c0100000 001000 2742dd [vmlinux-patched]
> >
> > that's 11980 bytes off a 2585001 bytes .text, a 0.5% size reduction.
> > This patch we want ...
>
> Goodie. Here's a slightly more recent version, where I cleaned up the
> assembly code (no need to save %ecx if we just update %ebx instead, which
> makes the code a bit more readable too - and doing it this way should
> hopefully make it easier for an out-of-order CPU to start the memops
> earlier too. Who knows..)
>
> I'm not going to put this into 2.6.11, since I worry about compiler
> interactions, but the more people who test it anyway, the better.

Put it in -mm tree :-)))
Pavel

--
People were complaining that M$ turns users into beta-testers...
...jr ghea gurz vagb qrirybcref, naq gurl frrz gb yvxr vg gung jnl!

2005-02-08 19:36:38

by Valdis Klētnieks

[permalink] [raw]
Subject: Re: out-of-line x86 "put_user()" implementation

On Mon, 07 Feb 2005 17:20:06 PST, Linus Torvalds said:

> I'm not going to put this into 2.6.11, since I worry about compiler
> interactions, but the more people who test it anyway, the better.

Well, since I'm a known glutton for punishment. ;)

a 2.6.11-rc3-RT tree I had handy from last night shows about a 0.5% size reduction:
text data bss dec hex filename
3033417 680824 436188 4150429 3f549d vmlinux.before
3017443 681068 436188 4134699 3f172b vmlinux

Am running a 2.6.11-rc3-mm1-RT kernel with it applied, and have probably 40-50
hours of uptime on it with no noticed issues on a Dell laptop. Sorry, don't
have any comparative performance stats...


Attachments:
(No filename) (226.00 B)

2005-02-09 01:26:15

by Richard Henderson

[permalink] [raw]
Subject: Re: out-of-line x86 "put_user()" implementation

On Mon, Feb 07, 2005 at 05:20:06PM -0800, Linus Torvalds wrote:
> +3: movl %eax,(%ecx)
...
> +3: movl %eax,(%ecx)
> +4: movl %edx,4(%ecx)
...
> + .long 3b,bad_put_user
> + .long 4b,bad_put_user

The first 3 gets lost.


r~

2005-02-09 01:28:31

by Richard Henderson

[permalink] [raw]
Subject: Re: out-of-line x86 "put_user()" implementation

On Mon, Feb 07, 2005 at 05:20:06PM -0800, Linus Torvalds wrote:
> +#define __put_user_8(x, ptr) __asm__ __volatile__("call __put_user_8":"=A" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))

This is not constrained enough. The compiler could choose to put the
return value in edx. You want

__asm__ __volatile__("call __put_user_8":"=a" (__ret_pu)
: "A" ((typeof(*(ptr)))(x)), "c" (ptr))


r~

2005-02-09 02:09:33

by Linus Torvalds

[permalink] [raw]
Subject: Re: out-of-line x86 "put_user()" implementation



On Tue, 8 Feb 2005, Richard Henderson wrote:
>
> The first 3 gets lost.

Thanks. So here's v3 (which also removes the now stale __put_user_check()
macro).

Andrew - do you want to put it in -mm?

Linus

---
# This is a BitKeeper generated diff -Nru style patch.
#
# ChangeSet
# 2005/02/08 18:05:45-08:00 [email protected]
# x86: make "put_user()" be out-of-line
#
# It's really too big to be inlined.
#
# Ingo tests and reports: this shrinks his kernel text size by
# about 12kB (roughly 0.5%)
#
# arch/i386/lib/putuser.S
# 2005/02/08 18:05:32-08:00 [email protected] +87 -0
#
# include/asm-i386/uaccess.h
# 2005/02/08 18:05:32-08:00 [email protected] +27 -13
# x86: make "put_user()" be out-of-line
#
# arch/i386/lib/putuser.S
# 2005/02/08 18:05:32-08:00 [email protected] +0 -0
# BitKeeper file /home/torvalds/v2.6/linux/arch/i386/lib/putuser.S
#
# arch/i386/lib/Makefile
# 2005/02/08 18:05:32-08:00 [email protected] +1 -1
# x86: make "put_user()" be out-of-line
#
# arch/i386/kernel/i386_ksyms.c
# 2005/02/08 18:05:32-08:00 [email protected] +5 -0
# x86: make "put_user()" be out-of-line
#
diff -Nru a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c
--- a/arch/i386/kernel/i386_ksyms.c 2005-02-08 18:06:14 -08:00
+++ b/arch/i386/kernel/i386_ksyms.c 2005-02-08 18:06:14 -08:00
@@ -97,6 +97,11 @@
EXPORT_SYMBOL(__get_user_2);
EXPORT_SYMBOL(__get_user_4);

+EXPORT_SYMBOL(__put_user_1);
+EXPORT_SYMBOL(__put_user_2);
+EXPORT_SYMBOL(__put_user_4);
+EXPORT_SYMBOL(__put_user_8);
+
EXPORT_SYMBOL(strpbrk);
EXPORT_SYMBOL(strstr);

diff -Nru a/arch/i386/lib/Makefile b/arch/i386/lib/Makefile
--- a/arch/i386/lib/Makefile 2005-02-08 18:06:14 -08:00
+++ b/arch/i386/lib/Makefile 2005-02-08 18:06:14 -08:00
@@ -3,7 +3,7 @@
#


-lib-y = checksum.o delay.o usercopy.o getuser.o memcpy.o strstr.o \
+lib-y = checksum.o delay.o usercopy.o getuser.o putuser.o memcpy.o strstr.o \
bitops.o

lib-$(CONFIG_X86_USE_3DNOW) += mmx.o
diff -Nru a/arch/i386/lib/putuser.S b/arch/i386/lib/putuser.S
--- /dev/null Wed Dec 31 16:00:00 196900
+++ b/arch/i386/lib/putuser.S 2005-02-08 18:06:14 -08:00
@@ -0,0 +1,87 @@
+/*
+ * __put_user functions.
+ *
+ * (C) Copyright 2005 Linus Torvalds
+ *
+ * These functions have a non-standard call interface
+ * to make them more efficient, especially as they
+ * return an error value in addition to the "real"
+ * return value.
+ */
+#include <asm/thread_info.h>
+
+
+/*
+ * __put_user_X
+ *
+ * Inputs: %eax[:%edx] contains the data
+ * %ecx contains the address
+ *
+ * Outputs: %eax is error code (0 or -EFAULT)
+ *
+ * These functions should not modify any other registers,
+ * as they get called from within inline assembly.
+ */
+
+#define ENTER pushl %ebx ; GET_THREAD_INFO(%ebx)
+#define EXIT popl %ebx ; ret
+
+.text
+.align 4
+.globl __put_user_1
+__put_user_1:
+ ENTER
+ cmpl TI_addr_limit(%ebx),%ecx
+ jae bad_put_user
+1: movb %al,(%ecx)
+ xorl %eax,%eax
+ EXIT
+
+.align 4
+.globl __put_user_2
+__put_user_2:
+ ENTER
+ movl TI_addr_limit(%ebx),%ebx
+ subl $1,%ebx
+ cmpl %ebx,%ecx
+ jae bad_put_user
+2: movw %ax,(%ecx)
+ xorl %eax,%eax
+ EXIT
+
+.align 4
+.globl __put_user_4
+__put_user_4:
+ ENTER
+ movl TI_addr_limit(%ebx),%ebx
+ subl $3,%ebx
+ cmpl %ebx,%ecx
+ jae bad_put_user
+3: movl %eax,(%ecx)
+ xorl %eax,%eax
+ EXIT
+
+.align 4
+.globl __put_user_8
+__put_user_8:
+ ENTER
+ movl TI_addr_limit(%ebx),%ebx
+ subl $7,%ebx
+ cmpl %ebx,%ecx
+ jae bad_put_user
+4: movl %eax,(%ecx)
+5: movl %edx,4(%ecx)
+ xorl %eax,%eax
+ EXIT
+
+bad_put_user:
+ movl $-14,%eax
+ EXIT
+
+.section __ex_table,"a"
+ .long 1b,bad_put_user
+ .long 2b,bad_put_user
+ .long 3b,bad_put_user
+ .long 4b,bad_put_user
+ .long 5b,bad_put_user
+.previous
diff -Nru a/include/asm-i386/uaccess.h b/include/asm-i386/uaccess.h
--- a/include/asm-i386/uaccess.h 2005-02-08 18:06:14 -08:00
+++ b/include/asm-i386/uaccess.h 2005-02-08 18:06:14 -08:00
@@ -185,6 +185,21 @@

extern void __put_user_bad(void);

+/*
+ * Strange magic calling convention: pointer in %ecx,
+ * value in %eax(:%edx), return value in %eax, no clobbers.
+ */
+extern void __put_user_1(void);
+extern void __put_user_2(void);
+extern void __put_user_4(void);
+extern void __put_user_8(void);
+
+#define __put_user_1(x, ptr) __asm__ __volatile__("call __put_user_1":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_2(x, ptr) __asm__ __volatile__("call __put_user_2":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_4(x, ptr) __asm__ __volatile__("call __put_user_4":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_8(x, ptr) __asm__ __volatile__("call __put_user_8":"=A" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_X(x, ptr) __asm__ __volatile__("call __put_user_X":"=a" (__ret_pu):"c" (ptr))
+
/**
* put_user: - Write a simple value into user space.
* @x: Value to copy to user space.
@@ -201,9 +216,18 @@
*
* Returns zero on success, or -EFAULT on error.
*/
-#define put_user(x,ptr) \
- __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
-
+#define put_user(x,ptr) \
+({ int __ret_pu; \
+ __chk_user_ptr(ptr); \
+ switch(sizeof(*(ptr))) { \
+ case 1: __put_user_1(x, ptr); break; \
+ case 2: __put_user_2(x, ptr); break; \
+ case 4: __put_user_4(x, ptr); break; \
+ case 8: __put_user_8(x, ptr); break; \
+ default:__put_user_X(x, ptr); break; \
+ } \
+ __ret_pu; \
+})

/**
* __get_user: - Get a simple variable from user space, with less checking.
@@ -258,16 +282,6 @@
__pu_err; \
})

-
-#define __put_user_check(x,ptr,size) \
-({ \
- long __pu_err = -EFAULT; \
- __typeof__(*(ptr)) __user *__pu_addr = (ptr); \
- might_sleep(); \
- if (access_ok(VERIFY_WRITE,__pu_addr,size)) \
- __put_user_size((x),__pu_addr,(size),__pu_err,-EFAULT); \
- __pu_err; \
-})

#define __put_user_u64(x, addr, err) \
__asm__ __volatile__( \

2005-02-09 02:16:26

by Linus Torvalds

[permalink] [raw]
Subject: Re: out-of-line x86 "put_user()" implementation



On Tue, 8 Feb 2005, Richard Henderson wrote:
>
> On Mon, Feb 07, 2005 at 05:20:06PM -0800, Linus Torvalds wrote:
> > +#define __put_user_8(x, ptr) __asm__ __volatile__("call __put_user_8":"=A" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
>
> This is not constrained enough. The compiler could choose to put the
> return value in edx. You want
>
> __asm__ __volatile__("call __put_user_8":"=a" (__ret_pu)
> : "A" ((typeof(*(ptr)))(x)), "c" (ptr))

Hmm.. I always thought "A" was the _pairing_ of %eax/%edx, not "eax or
edx"?

IOW, as far as I know, I'm telling the compiler that the asm is writing a
64-bit value into %eax:%edx, and that that __ret_pu gets that 64-bit value
assigned to it (and truncated, at that point). No?

Note that he code that actually writes to the register is the assembly
code, and that one always writes to %eax. So the compiler doesn't get to
"put the return value" anywhere. It gets told where it is.

I'd happily use your version, but I thought that some versions of gcc
require that input output registers cannot overlap, and would refuse to do
that thing? But if you tell me differently..

Linus

2005-02-09 02:25:50

by Andrew Morton

[permalink] [raw]
Subject: Re: out-of-line x86 "put_user()" implementation

Linus Torvalds <[email protected]> wrote:
>
> Andrew - do you want to put it in -mm?

I'll take patches from anyone ;)

2005-02-09 02:31:07

by Linus Torvalds

[permalink] [raw]
Subject: Re: out-of-line x86 "put_user()" implementation



On Tue, 8 Feb 2005, Linus Torvalds wrote:
>
> Hmm.. I always thought "A" was the _pairing_ of %eax/%edx, not "eax or
> edx"?

Ahh. Some testing shows that gcc really seems to think of it as "eax or
edx", ie you can do

asm("uglee %0 %1": :"a" (1), "A" (2));

and it will output

movl $1, %eax
movl $2, %edx
uglee %eax %edx

without complaining.

Live and learn.

That brings up another issue: if I don't care which registers a 64-bit
value goes into, can I get the "low reg" and "high reg" names some way? In
this case I'm constrained, and do want eactly %eax:%edx, but in some other
cases I might not care, and wouldn't mind having a

asm("addl %2,%L0 ; adcl $0,%H0"
:"=r" (ll_result)
:"r" (increment), "0" (ll_input));

to add a 32-bit number to a 64-bit one (this is totally made up, since gcc
can probably generate this sequence itself, but you get the idea -
allowing the asm to treat the low/high parts of the 64-bit entity
separately, without forcing them into one specific pair).

v4 with your asm constrain fix appended.

Linus

-----
# This is a BitKeeper generated diff -Nru style patch.
#
# ChangeSet
# 2005/02/08 18:18:46-08:00 [email protected]
# x86: make "put_user()" be out-of-line
#
# It's really too big to be inlined.
#
# Ingo tests and reports: this shrinks his kernel text size by
# about 12kB (roughly 0.5%).
#
# Updated on input from Richard Henderson on how the "A" constraint
# works on x86.
#
# arch/i386/lib/putuser.S
# 2005/02/08 18:18:35-08:00 [email protected] +87 -0
#
# include/asm-i386/uaccess.h
# 2005/02/08 18:18:35-08:00 [email protected] +27 -13
# x86: make "put_user()" be out-of-line
#
# arch/i386/lib/putuser.S
# 2005/02/08 18:18:35-08:00 [email protected] +0 -0
# BitKeeper file /home/torvalds/v2.6/linux/arch/i386/lib/putuser.S
#
# arch/i386/lib/Makefile
# 2005/02/08 18:18:34-08:00 [email protected] +1 -1
# x86: make "put_user()" be out-of-line
#
# arch/i386/kernel/i386_ksyms.c
# 2005/02/08 18:18:34-08:00 [email protected] +5 -0
# x86: make "put_user()" be out-of-line
#
diff -Nru a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c
--- a/arch/i386/kernel/i386_ksyms.c 2005-02-08 18:25:22 -08:00
+++ b/arch/i386/kernel/i386_ksyms.c 2005-02-08 18:25:22 -08:00
@@ -97,6 +97,11 @@
EXPORT_SYMBOL(__get_user_2);
EXPORT_SYMBOL(__get_user_4);

+EXPORT_SYMBOL(__put_user_1);
+EXPORT_SYMBOL(__put_user_2);
+EXPORT_SYMBOL(__put_user_4);
+EXPORT_SYMBOL(__put_user_8);
+
EXPORT_SYMBOL(strpbrk);
EXPORT_SYMBOL(strstr);

diff -Nru a/arch/i386/lib/Makefile b/arch/i386/lib/Makefile
--- a/arch/i386/lib/Makefile 2005-02-08 18:25:22 -08:00
+++ b/arch/i386/lib/Makefile 2005-02-08 18:25:22 -08:00
@@ -3,7 +3,7 @@
#


-lib-y = checksum.o delay.o usercopy.o getuser.o memcpy.o strstr.o \
+lib-y = checksum.o delay.o usercopy.o getuser.o putuser.o memcpy.o strstr.o \
bitops.o

lib-$(CONFIG_X86_USE_3DNOW) += mmx.o
diff -Nru a/arch/i386/lib/putuser.S b/arch/i386/lib/putuser.S
--- /dev/null Wed Dec 31 16:00:00 196900
+++ b/arch/i386/lib/putuser.S 2005-02-08 18:25:22 -08:00
@@ -0,0 +1,87 @@
+/*
+ * __put_user functions.
+ *
+ * (C) Copyright 2005 Linus Torvalds
+ *
+ * These functions have a non-standard call interface
+ * to make them more efficient, especially as they
+ * return an error value in addition to the "real"
+ * return value.
+ */
+#include <asm/thread_info.h>
+
+
+/*
+ * __put_user_X
+ *
+ * Inputs: %eax[:%edx] contains the data
+ * %ecx contains the address
+ *
+ * Outputs: %eax is error code (0 or -EFAULT)
+ *
+ * These functions should not modify any other registers,
+ * as they get called from within inline assembly.
+ */
+
+#define ENTER pushl %ebx ; GET_THREAD_INFO(%ebx)
+#define EXIT popl %ebx ; ret
+
+.text
+.align 4
+.globl __put_user_1
+__put_user_1:
+ ENTER
+ cmpl TI_addr_limit(%ebx),%ecx
+ jae bad_put_user
+1: movb %al,(%ecx)
+ xorl %eax,%eax
+ EXIT
+
+.align 4
+.globl __put_user_2
+__put_user_2:
+ ENTER
+ movl TI_addr_limit(%ebx),%ebx
+ subl $1,%ebx
+ cmpl %ebx,%ecx
+ jae bad_put_user
+2: movw %ax,(%ecx)
+ xorl %eax,%eax
+ EXIT
+
+.align 4
+.globl __put_user_4
+__put_user_4:
+ ENTER
+ movl TI_addr_limit(%ebx),%ebx
+ subl $3,%ebx
+ cmpl %ebx,%ecx
+ jae bad_put_user
+3: movl %eax,(%ecx)
+ xorl %eax,%eax
+ EXIT
+
+.align 4
+.globl __put_user_8
+__put_user_8:
+ ENTER
+ movl TI_addr_limit(%ebx),%ebx
+ subl $7,%ebx
+ cmpl %ebx,%ecx
+ jae bad_put_user
+4: movl %eax,(%ecx)
+5: movl %edx,4(%ecx)
+ xorl %eax,%eax
+ EXIT
+
+bad_put_user:
+ movl $-14,%eax
+ EXIT
+
+.section __ex_table,"a"
+ .long 1b,bad_put_user
+ .long 2b,bad_put_user
+ .long 3b,bad_put_user
+ .long 4b,bad_put_user
+ .long 5b,bad_put_user
+.previous
diff -Nru a/include/asm-i386/uaccess.h b/include/asm-i386/uaccess.h
--- a/include/asm-i386/uaccess.h 2005-02-08 18:25:22 -08:00
+++ b/include/asm-i386/uaccess.h 2005-02-08 18:25:22 -08:00
@@ -185,6 +185,21 @@

extern void __put_user_bad(void);

+/*
+ * Strange magic calling convention: pointer in %ecx,
+ * value in %eax(:%edx), return value in %eax, no clobbers.
+ */
+extern void __put_user_1(void);
+extern void __put_user_2(void);
+extern void __put_user_4(void);
+extern void __put_user_8(void);
+
+#define __put_user_1(x, ptr) __asm__ __volatile__("call __put_user_1":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_2(x, ptr) __asm__ __volatile__("call __put_user_2":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_4(x, ptr) __asm__ __volatile__("call __put_user_4":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_8(x, ptr) __asm__ __volatile__("call __put_user_8":"=a" (__ret_pu):"A" ((typeof(*(ptr)))(x)), "c" (ptr))
+#define __put_user_X(x, ptr) __asm__ __volatile__("call __put_user_X":"=a" (__ret_pu):"c" (ptr))
+
/**
* put_user: - Write a simple value into user space.
* @x: Value to copy to user space.
@@ -201,9 +216,18 @@
*
* Returns zero on success, or -EFAULT on error.
*/
-#define put_user(x,ptr) \
- __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
-
+#define put_user(x,ptr) \
+({ int __ret_pu; \
+ __chk_user_ptr(ptr); \
+ switch(sizeof(*(ptr))) { \
+ case 1: __put_user_1(x, ptr); break; \
+ case 2: __put_user_2(x, ptr); break; \
+ case 4: __put_user_4(x, ptr); break; \
+ case 8: __put_user_8(x, ptr); break; \
+ default:__put_user_X(x, ptr); break; \
+ } \
+ __ret_pu; \
+})

/**
* __get_user: - Get a simple variable from user space, with less checking.
@@ -258,16 +282,6 @@
__pu_err; \
})

-
-#define __put_user_check(x,ptr,size) \
-({ \
- long __pu_err = -EFAULT; \
- __typeof__(*(ptr)) __user *__pu_addr = (ptr); \
- might_sleep(); \
- if (access_ok(VERIFY_WRITE,__pu_addr,size)) \
- __put_user_size((x),__pu_addr,(size),__pu_err,-EFAULT); \
- __pu_err; \
-})

#define __put_user_u64(x, addr, err) \
__asm__ __volatile__( \

2005-02-09 02:33:00

by Linus Torvalds

[permalink] [raw]
Subject: Re: out-of-line x86 "put_user()" implementation



On Tue, 8 Feb 2005, Andrew Morton wrote:
>
> I'll take patches from anyone ;)

You'll never live it down. Once you get a name for being easy, you'll
always be known as Andrew "patch-ho" Morton.

Linus

2005-02-09 02:34:25

by Richard Henderson

[permalink] [raw]
Subject: Re: out-of-line x86 "put_user()" implementation

On Tue, Feb 08, 2005 at 06:27:08PM -0800, Linus Torvalds wrote:
> That brings up another issue: if I don't care which registers a 64-bit
> value goes into, can I get the "low reg" and "high reg" names some way?

Nope. We never needed one in the i386 backend itself, so we never
added anything like that.


r~

2005-02-09 02:38:49

by Richard Henderson

[permalink] [raw]
Subject: Re: out-of-line x86 "put_user()" implementation

On Tue, Feb 08, 2005 at 06:16:15PM -0800, Linus Torvalds wrote:
> I'd happily use your version, but I thought that some versions of gcc
> require that input output registers cannot overlap, and would refuse to do
> that thing? But if you tell me differently..

No, you're thinking of

asm("" : "=a" (x) : : "eax")

where the clobber overlaps the inputs or outputs.


r~

2005-02-11 22:15:35

by H. Peter Anvin

[permalink] [raw]
Subject: Re: out-of-line x86 "put_user()" implementation

Followup to: <[email protected]>
By author: Richard Henderson <[email protected]>
In newsgroup: linux.dev.kernel
>
> On Tue, Feb 08, 2005 at 06:27:08PM -0800, Linus Torvalds wrote:
> > That brings up another issue: if I don't care which registers a 64-bit
> > value goes into, can I get the "low reg" and "high reg" names some way?
>
> Nope. We never needed one in the i386 backend itself, so we never
> added anything like that.
>

I would really like to see that feature. I've missed it more than a
few times.

-hpa

2005-02-12 01:59:42

by Chuck Ebbert

[permalink] [raw]
Subject: Re: out-of-line x86 "put_user()" implementation

On Tue, 8 Feb 2005 at 18:27:08 -0800, Linus Torvalds wrote:

> +/*
> + * Strange magic calling convention: pointer in %ecx,
> + * value in %eax(:%edx), return value in %eax, no clobbers.
> + */
> +extern void __put_user_1(void);
> +extern void __put_user_2(void);
> +extern void __put_user_4(void);
> +extern void __put_user_8(void);
> +
> +#define __put_user_1(x, ptr) __asm__ __volatile__("call __put_user_1":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
> +#define __put_user_2(x, ptr) __asm__ __volatile__("call __put_user_2":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
> +#define __put_user_4(x, ptr) __asm__ __volatile__("call __put_user_4":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
> +#define __put_user_8(x, ptr) __asm__ __volatile__("call __put_user_8":"=a" (__ret_pu):"A" ((typeof(*(ptr)))(x)), "c" (ptr))
> +#define __put_user_X(x, ptr) __asm__ __volatile__("call __put_user_X":"=a" (__ret_pu):"c" (ptr))
> +

Should "cc" be on the clobber list since all the called functions alter EFLAGS?

And in any case is it too much to ask for an 80-column limit? ;)

#define __put_user_1(x, ptr) \
__asm__ __volatile__( \
"call __put_user_1" \
: "=a" (__ret_pu) \
: "0" ((typeof(*(ptr)))(x)), "c" (ptr) \
: "cc")
#define __put_user_2(x, ptr) \
__asm__ __volatile__( \
"call __put_user_2" \
: "=a" (__ret_pu) \
: "0" ((typeof(*(ptr)))(x)), "c" (ptr) \
: "cc")
#define __put_user_4(x, ptr) \
__asm__ __volatile__( \
"call __put_user_4" \
: "=a" (__ret_pu) \
: "0" ((typeof(*(ptr)))(x)), "c" (ptr) \
: "cc")
#define __put_user_8(x, ptr) \
__asm__ __volatile__( \
"call __put_user_8" \
: "=a" (__ret_pu) \
: "A" ((typeof(*(ptr)))(x)), "c" (ptr) \
: "cc")
#define __put_user_X(x, ptr) \
__asm__ __volatile__( \
"call __put_user_X" \
: "=a" (__ret_pu) \
: "c" (ptr) \
: "cc")

--
Chuck

2005-02-12 02:04:45

by Linus Torvalds

[permalink] [raw]
Subject: Re: out-of-line x86 "put_user()" implementation



On Fri, 11 Feb 2005, Chuck Ebbert wrote:
>
> And in any case is it too much to ask for an 80-column limit? ;)

Yes. Dammit, especially for something like this, the long-line version is
just _so_ much more readable. Compare my and your version wrt being able
to tell what the differences between the four different cases are.

In the single-long-line version, the differences are trivially visible. In
the "prettified" version (aka "I'm still living in the 60's, and proud of
it" version), it's impossible to pick out the differences.

If you don't like long lines, use a helper #define for the common part or
something.

Linus

2005-03-12 14:56:27

by Coywolf Qi Hunt

[permalink] [raw]
Subject: Re: out-of-line x86 "put_user()" implementation

On Sun, 6 Feb 2005 22:23:51 -0800 (PST), Linus Torvalds
<[email protected]> wrote:
>
> I was looking at some of the code we generate, and happened to notice that
> we have this strange situation where the x86 "get_user()" macros generate
> out-of-line code to do all the address verification etc, but the
> "put_user()" ones do not, and do everything inline.
>
> I also noticed that (probably as a result of this), our "put_user()" on
> old i386 machines does not do the full magic manual page-following. Which
> means that copy-on-write doesn't necessarily work right due to the broken
> paging hw on the original 386 core.

I've noticed that "put_user()" ones do generate out-of-line code in 2.2.x and
later got dropped out in patch-2.3.99-pre9.
( perhaps by http://www.uwsg.iu.edu/hypermail/linux/kernel/0005.2/0487.html )

At that time "put_user is broken for i386 machines (security) - sem
stuff may be wrong too " was put on the job list. Not sure what bug it
was.


--coywolf

>
> I didn't fix the second part, but at least making things out-of-line makes
> it possible. And making "put_user()" be out-of-line seemed quite doable.
>
> I no longer use x86 as my main machine, so this patch is totally untested.
> I've compiled it to see that things look somewhat sane, but that doesn't
> mean much. If I forgot some register or screwed something else up, this
> will result in a totally nonworking kernel, but I thought that maybe
> somebody else would be interested in looking at whether this (a) works,
> (b) migth even shrink the kernel and (c) might make us able to DTRT wrt
> the page table following crud (old i386 cores may be hard to find these
> days, so maybe people don't care).
>
> Linus
>
> ----
> # This is a BitKeeper generated diff -Nru style patch.
> #
> # ChangeSet
> # 2005/02/06 22:10:04-08:00 [email protected]
> # x86: make "put_user()" be out-of-line
> #
> # It's really too big to be inlined.
> #
> # arch/i386/lib/putuser.S
> # 2005/02/06 22:09:53-08:00 [email protected] +87 -0
> #
> # include/asm-i386/uaccess.h
> # 2005/02/06 22:09:53-08:00 [email protected] +27 -3
> # x86: make "put_user()" be out-of-line
> #
> # It's really too big to be inlined.
> #
> # arch/i386/lib/putuser.S
> # 2005/02/06 22:09:53-08:00 [email protected] +0 -0
> # BitKeeper file /home/torvalds/v2.6/linux/arch/i386/lib/putuser.S
> #
> # arch/i386/lib/Makefile
> # 2005/02/06 22:09:53-08:00 [email protected] +1 -1
> # x86: make "put_user()" be out-of-line
> #
> # It's really too big to be inlined.
> #
> # arch/i386/kernel/i386_ksyms.c
> # 2005/02/06 22:09:53-08:00 [email protected] +5 -0
> # x86: make "put_user()" be out-of-line
> #
> # It's really too big to be inlined.
> #
> diff -Nru a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c
> --- a/arch/i386/kernel/i386_ksyms.c 2005-02-06 22:12:01 -08:00
> +++ b/arch/i386/kernel/i386_ksyms.c 2005-02-06 22:12:01 -08:00
> @@ -97,6 +97,11 @@
> EXPORT_SYMBOL(__get_user_2);
> EXPORT_SYMBOL(__get_user_4);
>
> +EXPORT_SYMBOL(__put_user_1);
> +EXPORT_SYMBOL(__put_user_2);
> +EXPORT_SYMBOL(__put_user_4);
> +EXPORT_SYMBOL(__put_user_8);
> +
> EXPORT_SYMBOL(strpbrk);
> EXPORT_SYMBOL(strstr);
>
> diff -Nru a/arch/i386/lib/Makefile b/arch/i386/lib/Makefile
> --- a/arch/i386/lib/Makefile 2005-02-06 22:12:01 -08:00
> +++ b/arch/i386/lib/Makefile 2005-02-06 22:12:01 -08:00
> @@ -3,7 +3,7 @@
> #
>
> -lib-y = checksum.o delay.o usercopy.o getuser.o memcpy.o strstr.o \
> +lib-y = checksum.o delay.o usercopy.o getuser.o putuser.o memcpy.o strstr.o \
> bitops.o
>
> lib-$(CONFIG_X86_USE_3DNOW) += mmx.o
> diff -Nru a/arch/i386/lib/putuser.S b/arch/i386/lib/putuser.S
> --- /dev/null Wed Dec 31 16:00:00 196900
> +++ b/arch/i386/lib/putuser.S 2005-02-06 22:12:01 -08:00
> @@ -0,0 +1,87 @@
> +/*
> + * __put_user functions.
> + *
> + * (C) Copyright 2005 Linus Torvalds
> + *
> + * These functions have a non-standard call interface
> + * to make them more efficient, especially as they
> + * return an error value in addition to the "real"
> + * return value.
> + */
> +#include <asm/thread_info.h>
> +
> +
> +/*
> + * __put_user_X
> + *
> + * Inputs: %eax[:%edx] contains the data
> + * %ecx contains the address
> + *
> + * Outputs: %eax is error code (0 or -EFAULT)
> + * %ecx is corrupted
> + *
> + * These functions should not modify any other registers,
> + * as they get called from within inline assembly.
> + */
> +
> +#define ENTER pushl %ecx ; pushl %ebx ; GET_THREAD_INFO(%ebx)
> +#define EXIT popl %ebx ; popl %ecx ; ret
> +
> +.text
> +.align 4
> +.globl __put_user_1
> +__put_user_1:
> + ENTER
> + cmpl TI_addr_limit(%ebx),%ecx
> + jae bad_put_user
> +1: movb %al,(%ecx)
> + xorl %eax,%eax
> + EXIT
> +
> +.align 4
> +.globl __put_user_2
> +__put_user_2:
> + ENTER
> + addl $1,%ecx
> + jc bad_put_user
> + cmpl TI_addr_limit(%ebx),%ecx
> + jae bad_put_user
> +2: movw %ax,-1(%ecx)
> + xorl %eax,%eax
> + EXIT
> +
> +.align 4
> +.globl __put_user_4
> +__put_user_4:
> + ENTER
> + addl $3,%ecx
> + jc bad_put_user
> + cmpl TI_addr_limit(%ebx),%ecx
> + jae bad_put_user
> +3: movl %eax,-3(%ecx)
> + xorl %eax,%eax
> + EXIT
> +
> +.align 4
> +.globl __put_user_8
> +__put_user_8:
> + ENTER
> + addl $7,%ecx
> + jc bad_put_user
> + cmpl TI_addr_limit(%ebx),%ecx
> + jae bad_put_user
> +3: movl %eax,-7(%ecx)
> +4: movl %edx,-3(%ecx)
> + xorl %eax,%eax
> + EXIT
> +
> +bad_put_user:
> + movl $-14,%eax
> + EXIT
> +
> +.section __ex_table,"a"
> + .long 1b,bad_put_user
> + .long 2b,bad_put_user
> + .long 3b,bad_put_user
> + .long 4b,bad_put_user
> +.previous
> diff -Nru a/include/asm-i386/uaccess.h b/include/asm-i386/uaccess.h
> --- a/include/asm-i386/uaccess.h 2005-02-06 22:12:01 -08:00
> +++ b/include/asm-i386/uaccess.h 2005-02-06 22:12:01 -08:00
> @@ -185,6 +185,21 @@
>
> extern void __put_user_bad(void);
>
> +/*
> + * Strange magic calling convention: pointer in %ecx,
> + * value in %eax(:%edx), return value in %eax, no clobbers.
> + */
> +extern void __put_user_1(void);
> +extern void __put_user_2(void);
> +extern void __put_user_4(void);
> +extern void __put_user_8(void);
> +
> +#define __put_user_1(x, ptr) __asm__ __volatile__("call __put_user_1":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
> +#define __put_user_2(x, ptr) __asm__ __volatile__("call __put_user_2":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
> +#define __put_user_4(x, ptr) __asm__ __volatile__("call __put_user_4":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
> +#define __put_user_8(x, ptr) __asm__ __volatile__("call __put_user_8":"=A" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
> +#define __put_user_X(x, ptr) __asm__ __volatile__("call __put_user_X":"=a" (__ret_pu):"c" (ptr))
> +
> /**
> * put_user: - Write a simple value into user space.
> * @x: Value to copy to user space.
> @@ -201,9 +216,18 @@
> *
> * Returns zero on success, or -EFAULT on error.
> */
> -#define put_user(x,ptr) \
> - __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
> -
> +#define put_user(x,ptr) \
> +({ int __ret_pu; \
> + __chk_user_ptr(ptr); \
> + switch(sizeof(*(ptr))) { \
> + case 1: __put_user_1(x, ptr); break; \
> + case 2: __put_user_2(x, ptr); break; \
> + case 4: __put_user_4(x, ptr); break; \
> + case 8: __put_user_8(x, ptr); break; \
> + default:__put_user_X(x, ptr); break; \
> + } \
> + __ret_pu; \
> +})
>
> /**
> * __get_user: - Get a simple variable from user space, with less checking.
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>


--
Coywolf Qi Hunt
Homepage http://sosdg.org/~coywolf/