2008-10-26 16:42:37

by Ulrich Drepper

[permalink] [raw]
Subject: [PATCH] reintroduce accept4

This patch reintroduces accept4, replacing paccept. It's easy to see that
the patch only removes code and then redirects existing code away from the
removed functions. Since the paccept code sans signal handling was never
in question I think there is no reason to quarantine the patch first.

I've updated the test program which now looks as follows:

#include <fcntl.h>
#include <pthread.h>
#include <stdio.h>
#include <unistd.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/syscall.h>

#ifdef __x86_64__
#define __NR_accept4 288
#define SOCK_CLOEXEC O_CLOEXEC
#elif __i386__
#define SYS_ACCEPT4 18
#define USE_SOCKETCALL 1
#define SOCK_CLOEXEC O_CLOEXEC
#else
#error "define syscall numbers for this architecture"
#endif

#define PORT 57392

static pthread_barrier_t b;

static void *
tf (void *arg)
{
pthread_barrier_wait (&b);
int s = socket (AF_INET, SOCK_STREAM, 0);
struct sockaddr_in sin;
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
sin.sin_port = htons (PORT);
connect (s, (const struct sockaddr *) &sin, sizeof (sin));
close (s);
pthread_barrier_wait (&b);

pthread_barrier_wait (&b);
s = socket (AF_INET, SOCK_STREAM, 0);
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
sin.sin_port = htons (PORT + 1);
connect (s, (const struct sockaddr *) &sin, sizeof (sin));
close (s);
return NULL;
}

int
main (void)
{
alarm (5);

int status = 0;

pthread_barrier_init (&b, NULL, 2);

pthread_t th;
if (pthread_create (&th, NULL, tf, NULL) != 0)
{
puts ("pthread_create failed");
status = 1;
}
else
{
int s = socket (AF_INET, SOCK_STREAM, 0);
int fl = 1;
setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &fl, sizeof (fl));
struct sockaddr_in sin;
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
sin.sin_port = htons (PORT);
bind (s, (struct sockaddr *) &sin, sizeof (sin));
listen (s, SOMAXCONN);

pthread_barrier_wait (&b);

int s2 = accept (s, NULL, 0);
if (s2 < 0)
{
puts ("accept failed");
status = 1;
}
else
{
int fl = fcntl (s2, F_GETFD);
if ((fl & FD_CLOEXEC) != 0)
{
puts ("accept did set CLOEXEC");
status = 1;
}

close (s2);
}

close (s);

pthread_barrier_wait (&b);

sin.sin_family = AF_INET;
sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
sin.sin_port = htons (PORT + 1);
s = socket (AF_INET, SOCK_STREAM, 0);
bind (s, (struct sockaddr *) &sin, sizeof (sin));
listen (s, SOMAXCONN);

pthread_barrier_wait (&b);

#if USE_SOCKETCALL
s2 = syscall (__NR_socketcall, SYS_ACCEPT4, s, NULL, 0, SOCK_CLOEXEC);
#else
s2 = syscall (__NR_accept4, s, NULL, 0, SOCK_CLOEXEC);
#endif
if (s2 < 0)
{
puts ("accept4 failed");
status = 1;
}
else
{
int fl = fcntl (s2, F_GETFD);
if ((fl & FD_CLOEXEC) == 0)
{
puts ("accept4 did not set CLOEXEC");
status = 1;
}

close (s2);
}

close (s);
}

return status;
}


arch/x86/include/asm/unistd_64.h | 4 -
include/linux/net.h | 6 --
include/linux/syscalls.h | 3 -
kernel/sys_ni.c | 2
net/compat.c | 50 ++----------------------
net/socket.c | 80 ++++-----------------------------------
6 files changed, 21 insertions(+), 124 deletions(-)


Signed-off-by: Ulrich Drepper <[email protected]>

diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 834b2c1..d2e415e 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -639,8 +639,8 @@ __SYSCALL(__NR_fallocate, sys_fallocate)
__SYSCALL(__NR_timerfd_settime, sys_timerfd_settime)
#define __NR_timerfd_gettime 287
__SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime)
-#define __NR_paccept 288
-__SYSCALL(__NR_paccept, sys_paccept)
+#define __NR_accept4 288
+__SYSCALL(__NR_accept4, sys_accept4)
#define __NR_signalfd4 289
__SYSCALL(__NR_signalfd4, sys_signalfd4)
#define __NR_eventfd2 290
diff --git a/include/linux/net.h b/include/linux/net.h
index 6dc14a2..4515efa 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -40,7 +40,7 @@
#define SYS_GETSOCKOPT 15 /* sys_getsockopt(2) */
#define SYS_SENDMSG 16 /* sys_sendmsg(2) */
#define SYS_RECVMSG 17 /* sys_recvmsg(2) */
-#define SYS_PACCEPT 18 /* sys_paccept(2) */
+#define SYS_ACCEPT4 18 /* sys_accept4(2) */

typedef enum {
SS_FREE = 0, /* not allocated */
@@ -100,7 +100,7 @@ enum sock_type {
* remaining bits are used as flags. */
#define SOCK_TYPE_MASK 0xf

-/* Flags for socket, socketpair, paccept */
+/* Flags for socket, socketpair, accept4 */
#define SOCK_CLOEXEC O_CLOEXEC
#ifndef SOCK_NONBLOCK
#define SOCK_NONBLOCK O_NONBLOCK
@@ -223,8 +223,6 @@ extern int sock_map_fd(struct socket *sock, int flags);
extern struct socket *sockfd_lookup(int fd, int *err);
#define sockfd_put(sock) fput(sock->file)
extern int net_ratelimit(void);
-extern long do_accept(int fd, struct sockaddr __user *upeer_sockaddr,
- int __user *upeer_addrlen, int flags);

#define net_random() random32()
#define net_srandom(seed) srandom32((__force u32)seed)
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index d6ff145..04fb47b 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -410,8 +410,7 @@ asmlinkage long sys_getsockopt(int fd, int level, int optname,
asmlinkage long sys_bind(int, struct sockaddr __user *, int);
asmlinkage long sys_connect(int, struct sockaddr __user *, int);
asmlinkage long sys_accept(int, struct sockaddr __user *, int __user *);
-asmlinkage long sys_paccept(int, struct sockaddr __user *, int __user *,
- const __user sigset_t *, size_t, int);
+asmlinkage long sys_accept4(int, struct sockaddr __user *, int __user *, int);
asmlinkage long sys_getsockname(int, struct sockaddr __user *, int __user *);
asmlinkage long sys_getpeername(int, struct sockaddr __user *, int __user *);
asmlinkage long sys_send(int, void __user *, size_t, unsigned);
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index a77b27b..e14a232 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -31,7 +31,7 @@ cond_syscall(sys_socketpair);
cond_syscall(sys_bind);
cond_syscall(sys_listen);
cond_syscall(sys_accept);
-cond_syscall(sys_paccept);
+cond_syscall(sys_accept4);
cond_syscall(sys_connect);
cond_syscall(sys_getsockname);
cond_syscall(sys_getpeername);
diff --git a/net/compat.c b/net/compat.c
index 67fb6a3..50ce0a9 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -725,7 +725,7 @@ EXPORT_SYMBOL(compat_mc_getsockopt);
static unsigned char nas[19]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
- AL(6)};
+ AL(4)};
#undef AL

asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags)
@@ -738,52 +738,13 @@ asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, uns
return sys_recvmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT);
}

-asmlinkage long compat_sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
- int __user *upeer_addrlen,
- const compat_sigset_t __user *sigmask,
- compat_size_t sigsetsize, int flags)
-{
- compat_sigset_t ss32;
- sigset_t ksigmask, sigsaved;
- int ret;
-
- if (sigmask) {
- if (sigsetsize != sizeof(compat_sigset_t))
- return -EINVAL;
- if (copy_from_user(&ss32, sigmask, sizeof(ss32)))
- return -EFAULT;
- sigset_from_compat(&ksigmask, &ss32);
-
- sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
- sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
- }
-
- ret = do_accept(fd, upeer_sockaddr, upeer_addrlen, flags);
-
- if (ret == -ERESTARTNOHAND) {
- /*
- * Don't restore the signal mask yet. Let do_signal() deliver
- * the signal on the way back to userspace, before the signal
- * mask is restored.
- */
- if (sigmask) {
- memcpy(&current->saved_sigmask, &sigsaved,
- sizeof(sigsaved));
- set_restore_sigmask();
- }
- } else if (sigmask)
- sigprocmask(SIG_SETMASK, &sigsaved, NULL);
-
- return ret;
-}
-
asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
{
int ret;
u32 a[6];
u32 a0, a1;

- if (call < SYS_SOCKET || call > SYS_PACCEPT)
+ if (call < SYS_SOCKET || call > SYS_ACCEPT4)
return -EINVAL;
if (copy_from_user(a, args, nas[call]))
return -EFAULT;
@@ -804,7 +765,7 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
ret = sys_listen(a0, a1);
break;
case SYS_ACCEPT:
- ret = do_accept(a0, compat_ptr(a1), compat_ptr(a[2]), 0);
+ ret = sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), 0);
break;
case SYS_GETSOCKNAME:
ret = sys_getsockname(a0, compat_ptr(a1), compat_ptr(a[2]));
@@ -844,9 +805,8 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
case SYS_RECVMSG:
ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]);
break;
- case SYS_PACCEPT:
- ret = compat_sys_paccept(a0, compat_ptr(a1), compat_ptr(a[2]),
- compat_ptr(a[3]), a[4], a[5]);
+ case SYS_ACCEPT4:
+ ret = sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), a[3]);
break;
default:
ret = -EINVAL;
diff --git a/net/socket.c b/net/socket.c
index 2b7a4b5..8e72806 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1427,8 +1427,8 @@ asmlinkage long sys_listen(int fd, int backlog)
* clean when we restucture accept also.
*/

-long do_accept(int fd, struct sockaddr __user *upeer_sockaddr,
- int __user *upeer_addrlen, int flags)
+asmlinkage long sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
+ int __user *upeer_addrlen, int flags)
{
struct socket *sock, *newsock;
struct file *newfile;
@@ -1511,66 +1511,10 @@ out_fd:
goto out_put;
}

-#if 0
-#ifdef HAVE_SET_RESTORE_SIGMASK
-asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
- int __user *upeer_addrlen,
- const sigset_t __user *sigmask,
- size_t sigsetsize, int flags)
-{
- sigset_t ksigmask, sigsaved;
- int ret;
-
- if (sigmask) {
- /* XXX: Don't preclude handling different sized sigset_t's. */
- if (sigsetsize != sizeof(sigset_t))
- return -EINVAL;
- if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
- return -EFAULT;
-
- sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
- sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
- }
-
- ret = do_accept(fd, upeer_sockaddr, upeer_addrlen, flags);
-
- if (ret < 0 && signal_pending(current)) {
- /*
- * Don't restore the signal mask yet. Let do_signal() deliver
- * the signal on the way back to userspace, before the signal
- * mask is restored.
- */
- if (sigmask) {
- memcpy(&current->saved_sigmask, &sigsaved,
- sizeof(sigsaved));
- set_restore_sigmask();
- }
- } else if (sigmask)
- sigprocmask(SIG_SETMASK, &sigsaved, NULL);
-
- return ret;
-}
-#else
-asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
- int __user *upeer_addrlen,
- const sigset_t __user *sigmask,
- size_t sigsetsize, int flags)
-{
- /* The platform does not support restoring the signal mask in the
- * return path. So we do not allow using paccept() with a signal
- * mask. */
- if (sigmask)
- return -EINVAL;
-
- return do_accept(fd, upeer_sockaddr, upeer_addrlen, flags);
-}
-#endif
-#endif
-
asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen)
{
- return do_accept(fd, upeer_sockaddr, upeer_addrlen, 0);
+ return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
}

/*
@@ -2097,7 +2041,7 @@ static const unsigned char nargs[19]={
AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
- AL(6)
+ AL(4)
};

#undef AL
@@ -2116,7 +2060,7 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args)
unsigned long a0, a1;
int err;

- if (call < 1 || call > SYS_PACCEPT)
+ if (call < 1 || call > SYS_ACCEPT4)
return -EINVAL;

/* copy_from_user should be SMP safe. */
@@ -2144,9 +2088,8 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args)
err = sys_listen(a0, a1);
break;
case SYS_ACCEPT:
- err =
- do_accept(a0, (struct sockaddr __user *)a1,
- (int __user *)a[2], 0);
+ err = sys_accept4(a0, (struct sockaddr __user *)a1,
+ (int __user *)a[2], 0);
break;
case SYS_GETSOCKNAME:
err =
@@ -2193,12 +2136,9 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args)
case SYS_RECVMSG:
err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
break;
- case SYS_PACCEPT:
- err =
- sys_paccept(a0, (struct sockaddr __user *)a1,
- (int __user *)a[2],
- (const sigset_t __user *) a[3],
- a[4], a[5]);
+ case SYS_ACCEPT4:
+ err = sys_accept4(a0, (struct sockaddr __user *)a1,
+ (int __user *)a[2], a[3]);
break;
default:
err = -EINVAL;


2008-10-28 03:42:00

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH] reintroduce accept4

(cc linux-api)

(cc linux-arch)

On Sun, 26 Oct 2008 12:41:50 -0400 Ulrich Drepper <[email protected]> wrote:

> This patch reintroduces accept4, replacing paccept. It's easy to see that
> the patch only removes code and then redirects existing code away from the
> removed functions. Since the paccept code sans signal handling was never
> in question I think there is no reason to quarantine the patch first.

I'll confess to not having a clue what's going on here.

What is accept4() and why do I want one? Sigh. Hopefully others have
been following more closely and have some context.

> I've updated the test program which now looks as follows:

>
> #include <fcntl.h>
> #include <pthread.h>
> #include <stdio.h>
> #include <unistd.h>
> #include <netinet/in.h>
> #include <sys/socket.h>
> #include <sys/syscall.h>
>
> #ifdef __x86_64__
> #define __NR_accept4 288
> #define SOCK_CLOEXEC O_CLOEXEC
> #elif __i386__
> #define SYS_ACCEPT4 18
> #define USE_SOCKETCALL 1
> #define SOCK_CLOEXEC O_CLOEXEC
> #else

Well. This doesn't actually agree with the kernel patch.

>
> ...
>
> arch/x86/include/asm/unistd_64.h | 4 -
> include/linux/net.h | 6 --
> include/linux/syscalls.h | 3 -
> kernel/sys_ni.c | 2
> net/compat.c | 50 ++----------------------
> net/socket.c | 80 ++++-----------------------------------

I'd suggest that i386 is sufficiently common to warrant its inclusion
in the initial patch.

2008-10-28 04:23:19

by Ulrich Drepper

[permalink] [raw]
Subject: Re: [PATCH] reintroduce accept4

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

Andrew Morton wrote:
> I'll confess to not having a clue what's going on here.

It has been discussed at length. accept created file descriptors and we
need flags for tis.

>> #elif __i386__
>> #define SYS_ACCEPT4 18
>> #define USE_SOCKETCALL 1
>> #define SOCK_CLOEXEC O_CLOEXEC
>> #else
>
> Well. This doesn't actually agree with the kernel patch.

What doesn't agree?


> I'd suggest that i386 is sufficiently common to warrant its inclusion
> in the initial patch.

The x86 code is included. x86 uses socketcall instead of a syscall.

I changed all paccept occurrences in the tree, not just x86-64.

- --
➧ Ulrich Drepper ➧ Red Hat, Inc. ➧ 444 Castro St ➧ Mountain View, CA ❖
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.9 (GNU/Linux)
Comment: Using GnuPG with Fedora - http://enigmail.mozdev.org

iEYEARECAAYFAkkGk6MACgkQ2ijCOnn/RHSwSgCfeb/PGDCm2qy0ZESqWb8wgyhX
cHoAoIIXvhBjdxcj2gy09eQBzNKOTwCU
=QSFh
-----END PGP SIGNATURE-----

2008-10-28 04:54:08

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH] reintroduce accept4

(really does cc linux-arch this time)

On Mon, 27 Oct 2008 21:22:59 -0700 Ulrich Drepper <[email protected]> wrote:

> -----BEGIN PGP SIGNED MESSAGE-----
> Hash: SHA1
>
> Andrew Morton wrote:
> > I'll confess to not having a clue what's going on here.
>
> It has been discussed at length.

That's of little use to people trying to understand the git commit.

And it's of little use to people who write the documentation.
Hopefully Michael has been following this closely enough to get by with
what we have here (ie: nothing).

And it's of little use to people who are trying to review this patch
and who haven't followed this alleged lengthy discussion. (ie: me)

And it's of little use to people who write for sites such as lwn.net,
http://kernelnewbies.org/LinuxChanges etc.

This is not some personal weekend hack project. It is the Linux
kernel, used by millions around the world.

> accept created file descriptors and we
> need flags for tis.
>
> >> #elif __i386__
> >> #define SYS_ACCEPT4 18
> >> #define USE_SOCKETCALL 1
> >> #define SOCK_CLOEXEC O_CLOEXEC
> >> #else
> >
> > Well. This doesn't actually agree with the kernel patch.
>
> What doesn't agree?
>
>
> > I'd suggest that i386 is sufficiently common to warrant its inclusion
> > in the initial patch.
>
> The x86 code is included. x86 uses socketcall instead of a syscall.

<wonders why arch/x86/include/asm/unistd_64.h defines
__ARCH_WANT_SYS_SOCKETCALL but doesn't wire up sys_socketcall()>

> I changed all paccept occurrences in the tree, not just x86-64.

OK, that covered a lot of architectures.

Subject: Re: [PATCH] reintroduce accept4

[linux-api + many indiiduals added to CC]

Ulrich,

On Sun, Oct 26, 2008 at 11:41 AM, Ulrich Drepper <[email protected]> wrote:
> This patch reintroduces accept4, replacing paccept. It's easy to see that
> the patch only removes code and then redirects existing code away from the
> removed functions. Since the paccept code sans signal handling was never
> in question I think there is no reason to quarantine the patch first.

When you (re)submit a patch:

1. Please take the trouble to CC interested parties. For example, the
people CCed in recent threads that discussed earlier versions of the
the patch. This is basic LKML basic etiquette, since almost no one
reads all LKML, or reads it hourly. Doing this allows interested
prties to comment on the patch, and also avoids patches hitting the
kernel "under the radar".

2. Don't assume anyone has cached earlier discussions of the topic.
Write a clear, complete description of the patch tht could b red by
someone new to the topic, something like a summary of
http://lwn.net/Articles/281965/ and http://udrepper.livejournal.com/20407.html

3. Explain what changes have been made from earlier versions of the
patch, and why.
E.g., some discussion thatsummarizes this:
http://thread.gmane.org/gmane.linux.network/106071

4. CC [email protected] on patches thatare API changes. This
requirement was added to SubmitChecklist in the last few weeks.

5. CC me or [email protected], so that something makes its way
into man-pages.

Cheers,

Michael

> I've updated the test program which now looks as follows:
>
> #include <fcntl.h>
> #include <pthread.h>
> #include <stdio.h>
> #include <unistd.h>
> #include <netinet/in.h>
> #include <sys/socket.h>
> #include <sys/syscall.h>
>
> #ifdef __x86_64__
> #define __NR_accept4 288
> #define SOCK_CLOEXEC O_CLOEXEC
> #elif __i386__
> #define SYS_ACCEPT4 18
> #define USE_SOCKETCALL 1
> #define SOCK_CLOEXEC O_CLOEXEC
> #else
> #error "define syscall numbers for this architecture"
> #endif
>
> #define PORT 57392
>
> static pthread_barrier_t b;
>
> static void *
> tf (void *arg)
> {
> pthread_barrier_wait (&b);
> int s = socket (AF_INET, SOCK_STREAM, 0);
> struct sockaddr_in sin;
> sin.sin_family = AF_INET;
> sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
> sin.sin_port = htons (PORT);
> connect (s, (const struct sockaddr *) &sin, sizeof (sin));
> close (s);
> pthread_barrier_wait (&b);
>
> pthread_barrier_wait (&b);
> s = socket (AF_INET, SOCK_STREAM, 0);
> sin.sin_family = AF_INET;
> sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
> sin.sin_port = htons (PORT + 1);
> connect (s, (const struct sockaddr *) &sin, sizeof (sin));
> close (s);
> return NULL;
> }
>
> int
> main (void)
> {
> alarm (5);
>
> int status = 0;
>
> pthread_barrier_init (&b, NULL, 2);
>
> pthread_t th;
> if (pthread_create (&th, NULL, tf, NULL) != 0)
> {
> puts ("pthread_create failed");
> status = 1;
> }
> else
> {
> int s = socket (AF_INET, SOCK_STREAM, 0);
> int fl = 1;
> setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &fl, sizeof (fl));
> struct sockaddr_in sin;
> sin.sin_family = AF_INET;
> sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
> sin.sin_port = htons (PORT);
> bind (s, (struct sockaddr *) &sin, sizeof (sin));
> listen (s, SOMAXCONN);
>
> pthread_barrier_wait (&b);
>
> int s2 = accept (s, NULL, 0);
> if (s2 < 0)
> {
> puts ("accept failed");
> status = 1;
> }
> else
> {
> int fl = fcntl (s2, F_GETFD);
> if ((fl & FD_CLOEXEC) != 0)
> {
> puts ("accept did set CLOEXEC");
> status = 1;
> }
>
> close (s2);
> }
>
> close (s);
>
> pthread_barrier_wait (&b);
>
> sin.sin_family = AF_INET;
> sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
> sin.sin_port = htons (PORT + 1);
> s = socket (AF_INET, SOCK_STREAM, 0);
> bind (s, (struct sockaddr *) &sin, sizeof (sin));
> listen (s, SOMAXCONN);
>
> pthread_barrier_wait (&b);
>
> #if USE_SOCKETCALL
> s2 = syscall (__NR_socketcall, SYS_ACCEPT4, s, NULL, 0, SOCK_CLOEXEC);
> #else
> s2 = syscall (__NR_accept4, s, NULL, 0, SOCK_CLOEXEC);
> #endif
> if (s2 < 0)
> {
> puts ("accept4 failed");
> status = 1;
> }
> else
> {
> int fl = fcntl (s2, F_GETFD);
> if ((fl & FD_CLOEXEC) == 0)
> {
> puts ("accept4 did not set CLOEXEC");
> status = 1;
> }
>
> close (s2);
> }
>
> close (s);
> }
>
> return status;
> }
>
>
> arch/x86/include/asm/unistd_64.h | 4 -
> include/linux/net.h | 6 --
> include/linux/syscalls.h | 3 -
> kernel/sys_ni.c | 2
> net/compat.c | 50 ++----------------------
> net/socket.c | 80 ++++-----------------------------------
> 6 files changed, 21 insertions(+), 124 deletions(-)
>
>
> Signed-off-by: Ulrich Drepper <[email protected]>
>
> diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
> index 834b2c1..d2e415e 100644
> --- a/arch/x86/include/asm/unistd_64.h
> +++ b/arch/x86/include/asm/unistd_64.h
> @@ -639,8 +639,8 @@ __SYSCALL(__NR_fallocate, sys_fallocate)
> __SYSCALL(__NR_timerfd_settime, sys_timerfd_settime)
> #define __NR_timerfd_gettime 287
> __SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime)
> -#define __NR_paccept 288
> -__SYSCALL(__NR_paccept, sys_paccept)
> +#define __NR_accept4 288
> +__SYSCALL(__NR_accept4, sys_accept4)
> #define __NR_signalfd4 289
> __SYSCALL(__NR_signalfd4, sys_signalfd4)
> #define __NR_eventfd2 290
> diff --git a/include/linux/net.h b/include/linux/net.h
> index 6dc14a2..4515efa 100644
> --- a/include/linux/net.h
> +++ b/include/linux/net.h
> @@ -40,7 +40,7 @@
> #define SYS_GETSOCKOPT 15 /* sys_getsockopt(2) */
> #define SYS_SENDMSG 16 /* sys_sendmsg(2) */
> #define SYS_RECVMSG 17 /* sys_recvmsg(2) */
> -#define SYS_PACCEPT 18 /* sys_paccept(2) */
> +#define SYS_ACCEPT4 18 /* sys_accept4(2) */
>
> typedef enum {
> SS_FREE = 0, /* not allocated */
> @@ -100,7 +100,7 @@ enum sock_type {
> * remaining bits are used as flags. */
> #define SOCK_TYPE_MASK 0xf
>
> -/* Flags for socket, socketpair, paccept */
> +/* Flags for socket, socketpair, accept4 */
> #define SOCK_CLOEXEC O_CLOEXEC
> #ifndef SOCK_NONBLOCK
> #define SOCK_NONBLOCK O_NONBLOCK
> @@ -223,8 +223,6 @@ extern int sock_map_fd(struct socket *sock, int flags);
> extern struct socket *sockfd_lookup(int fd, int *err);
> #define sockfd_put(sock) fput(sock->file)
> extern int net_ratelimit(void);
> -extern long do_accept(int fd, struct sockaddr __user *upeer_sockaddr,
> - int __user *upeer_addrlen, int flags);
>
> #define net_random() random32()
> #define net_srandom(seed) srandom32((__force u32)seed)
> diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
> index d6ff145..04fb47b 100644
> --- a/include/linux/syscalls.h
> +++ b/include/linux/syscalls.h
> @@ -410,8 +410,7 @@ asmlinkage long sys_getsockopt(int fd, int level, int optname,
> asmlinkage long sys_bind(int, struct sockaddr __user *, int);
> asmlinkage long sys_connect(int, struct sockaddr __user *, int);
> asmlinkage long sys_accept(int, struct sockaddr __user *, int __user *);
> -asmlinkage long sys_paccept(int, struct sockaddr __user *, int __user *,
> - const __user sigset_t *, size_t, int);
> +asmlinkage long sys_accept4(int, struct sockaddr __user *, int __user *, int);
> asmlinkage long sys_getsockname(int, struct sockaddr __user *, int __user *);
> asmlinkage long sys_getpeername(int, struct sockaddr __user *, int __user *);
> asmlinkage long sys_send(int, void __user *, size_t, unsigned);
> diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
> index a77b27b..e14a232 100644
> --- a/kernel/sys_ni.c
> +++ b/kernel/sys_ni.c
> @@ -31,7 +31,7 @@ cond_syscall(sys_socketpair);
> cond_syscall(sys_bind);
> cond_syscall(sys_listen);
> cond_syscall(sys_accept);
> -cond_syscall(sys_paccept);
> +cond_syscall(sys_accept4);
> cond_syscall(sys_connect);
> cond_syscall(sys_getsockname);
> cond_syscall(sys_getpeername);
> diff --git a/net/compat.c b/net/compat.c
> index 67fb6a3..50ce0a9 100644
> --- a/net/compat.c
> +++ b/net/compat.c
> @@ -725,7 +725,7 @@ EXPORT_SYMBOL(compat_mc_getsockopt);
> static unsigned char nas[19]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
> AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
> AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
> - AL(6)};
> + AL(4)};
> #undef AL
>
> asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags)
> @@ -738,52 +738,13 @@ asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, uns
> return sys_recvmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT);
> }
>
> -asmlinkage long compat_sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
> - int __user *upeer_addrlen,
> - const compat_sigset_t __user *sigmask,
> - compat_size_t sigsetsize, int flags)
> -{
> - compat_sigset_t ss32;
> - sigset_t ksigmask, sigsaved;
> - int ret;
> -
> - if (sigmask) {
> - if (sigsetsize != sizeof(compat_sigset_t))
> - return -EINVAL;
> - if (copy_from_user(&ss32, sigmask, sizeof(ss32)))
> - return -EFAULT;
> - sigset_from_compat(&ksigmask, &ss32);
> -
> - sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
> - sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
> - }
> -
> - ret = do_accept(fd, upeer_sockaddr, upeer_addrlen, flags);
> -
> - if (ret == -ERESTARTNOHAND) {
> - /*
> - * Don't restore the signal mask yet. Let do_signal() deliver
> - * the signal on the way back to userspace, before the signal
> - * mask is restored.
> - */
> - if (sigmask) {
> - memcpy(&current->saved_sigmask, &sigsaved,
> - sizeof(sigsaved));
> - set_restore_sigmask();
> - }
> - } else if (sigmask)
> - sigprocmask(SIG_SETMASK, &sigsaved, NULL);
> -
> - return ret;
> -}
> -
> asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
> {
> int ret;
> u32 a[6];
> u32 a0, a1;
>
> - if (call < SYS_SOCKET || call > SYS_PACCEPT)
> + if (call < SYS_SOCKET || call > SYS_ACCEPT4)
> return -EINVAL;
> if (copy_from_user(a, args, nas[call]))
> return -EFAULT;
> @@ -804,7 +765,7 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
> ret = sys_listen(a0, a1);
> break;
> case SYS_ACCEPT:
> - ret = do_accept(a0, compat_ptr(a1), compat_ptr(a[2]), 0);
> + ret = sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), 0);
> break;
> case SYS_GETSOCKNAME:
> ret = sys_getsockname(a0, compat_ptr(a1), compat_ptr(a[2]));
> @@ -844,9 +805,8 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
> case SYS_RECVMSG:
> ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]);
> break;
> - case SYS_PACCEPT:
> - ret = compat_sys_paccept(a0, compat_ptr(a1), compat_ptr(a[2]),
> - compat_ptr(a[3]), a[4], a[5]);
> + case SYS_ACCEPT4:
> + ret = sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), a[3]);
> break;
> default:
> ret = -EINVAL;
> diff --git a/net/socket.c b/net/socket.c
> index 2b7a4b5..8e72806 100644
> --- a/net/socket.c
> +++ b/net/socket.c
> @@ -1427,8 +1427,8 @@ asmlinkage long sys_listen(int fd, int backlog)
> * clean when we restucture accept also.
> */
>
> -long do_accept(int fd, struct sockaddr __user *upeer_sockaddr,
> - int __user *upeer_addrlen, int flags)
> +asmlinkage long sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
> + int __user *upeer_addrlen, int flags)
> {
> struct socket *sock, *newsock;
> struct file *newfile;
> @@ -1511,66 +1511,10 @@ out_fd:
> goto out_put;
> }
>
> -#if 0
> -#ifdef HAVE_SET_RESTORE_SIGMASK
> -asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
> - int __user *upeer_addrlen,
> - const sigset_t __user *sigmask,
> - size_t sigsetsize, int flags)
> -{
> - sigset_t ksigmask, sigsaved;
> - int ret;
> -
> - if (sigmask) {
> - /* XXX: Don't preclude handling different sized sigset_t's. */
> - if (sigsetsize != sizeof(sigset_t))
> - return -EINVAL;
> - if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
> - return -EFAULT;
> -
> - sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
> - sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
> - }
> -
> - ret = do_accept(fd, upeer_sockaddr, upeer_addrlen, flags);
> -
> - if (ret < 0 && signal_pending(current)) {
> - /*
> - * Don't restore the signal mask yet. Let do_signal() deliver
> - * the signal on the way back to userspace, before the signal
> - * mask is restored.
> - */
> - if (sigmask) {
> - memcpy(&current->saved_sigmask, &sigsaved,
> - sizeof(sigsaved));
> - set_restore_sigmask();
> - }
> - } else if (sigmask)
> - sigprocmask(SIG_SETMASK, &sigsaved, NULL);
> -
> - return ret;
> -}
> -#else
> -asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
> - int __user *upeer_addrlen,
> - const sigset_t __user *sigmask,
> - size_t sigsetsize, int flags)
> -{
> - /* The platform does not support restoring the signal mask in the
> - * return path. So we do not allow using paccept() with a signal
> - * mask. */
> - if (sigmask)
> - return -EINVAL;
> -
> - return do_accept(fd, upeer_sockaddr, upeer_addrlen, flags);
> -}
> -#endif
> -#endif
> -
> asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
> int __user *upeer_addrlen)
> {
> - return do_accept(fd, upeer_sockaddr, upeer_addrlen, 0);
> + return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
> }
>
> /*
> @@ -2097,7 +2041,7 @@ static const unsigned char nargs[19]={
> AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
> AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
> AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
> - AL(6)
> + AL(4)
> };
>
> #undef AL
> @@ -2116,7 +2060,7 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args)
> unsigned long a0, a1;
> int err;
>
> - if (call < 1 || call > SYS_PACCEPT)
> + if (call < 1 || call > SYS_ACCEPT4)
> return -EINVAL;
>
> /* copy_from_user should be SMP safe. */
> @@ -2144,9 +2088,8 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args)
> err = sys_listen(a0, a1);
> break;
> case SYS_ACCEPT:
> - err =
> - do_accept(a0, (struct sockaddr __user *)a1,
> - (int __user *)a[2], 0);
> + err = sys_accept4(a0, (struct sockaddr __user *)a1,
> + (int __user *)a[2], 0);
> break;
> case SYS_GETSOCKNAME:
> err =
> @@ -2193,12 +2136,9 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args)
> case SYS_RECVMSG:
> err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
> break;
> - case SYS_PACCEPT:
> - err =
> - sys_paccept(a0, (struct sockaddr __user *)a1,
> - (int __user *)a[2],
> - (const sigset_t __user *) a[3],
> - a[4], a[5]);
> + case SYS_ACCEPT4:
> + err = sys_accept4(a0, (struct sockaddr __user *)a1,
> + (int __user *)a[2], a[3]);
> break;
> default:
> err = -EINVAL;
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>



--
Michael Kerrisk Linux man-pages maintainer;
http://www.kernel.org/doc/man-pages/ Found a documentation bug?
http://www.kernel.org/doc/man-pages/reporting_bugs.html

Subject: Re: [PATCH] reintroduce accept4

Andrew,

On 10/26/08, Ulrich Drepper <[email protected]> wrote:
> This patch reintroduces accept4, replacing paccept. It's easy to see that
> the patch only removes code and then redirects existing code away from the
> removed functions. Since the paccept code sans signal handling was never
> in question I think there is no reason to quarantine the patch first.

I see you accepted this patch into -mm. I've finally got to looking
at and testing this, so:

Tested-by: Michael Kerrisk <[email protected]>
Acked-by: Michael Kerrisk <[email protected]>

In my tests, everything looks fine. I'll forward my test program in a
follow-up mail.

I think Ulrich wanted to try to see this patch in for 2.6.28; it's
past the merge window of course, so it's up to you, but I have no
problem with that. The API is the one that Ulrich initially proposed,
before taking a detour into paccept()
(http://thread.gmane.org/gmane.linux.kernel/671443 ), which I argued
against (http://thread.gmane.org/gmane.linux.kernel/723952,
http://thread.gmane.org/gmane.linux.network/106071/), since I (and
Roland) could see no reason for the added complexity of a signal set
argument (like pselect()/ppoll()/epoll_pwait()). (In any case, if
someone does come up with a compelling reason to add a sigset
argument, then we can add it via the use of a new flag bit.)

My only argument is with the name of the new sysytem call.

> I've updated the test program which now looks as follows:

(I assume that there had been no testing on x86-32, since, the
__i386__ ifdef's notwithstanding, the program below can't work on
x86-32 -- sys_socketcall() takes its arguments packaged into an array
on x86-32, not as an inline list.)

Andrew, you noted a lack of explanation accompanying the original
patch. Here's something to fill the gap, and which may be suitable
for the changelog.

==
Introduce a new accept4() system call. The addition of this system
call matches analogous changes in 2.6.27 (dup3(), evenfd2(),
signalfd4(), inotify_init1(), epoll_create1(), pipe2()) which added
new system calls that differed from analogous traditional system calls
in adding a flags argument that can be used to access additional
functionality. The accept4() system call is exactly the same as
accept(), except that it adds a flags bit-mask argument. Two flags
are initially implemented. (Most of the new system calls in 2.6.27
also had both of these flags.) SOCK_CLOEXEC causes the close-on-exec
(FD_CLOEXEC) flag to be enabled for the new file descriptor returned
by accept4(). This is a useful security feature to avoid leaking
information in a multithreaded program where one thread is doing an
accept() at the same time as another thread is doing a fork() plus
exec(). (More details here:
http://udrepper.livejournal.com/20407.html "Secure File Descriptor
Handling", Ulrich Drepper) The other flag is SOCK_NONBLOCK, which
causes the O_NONBLOCK flag to be enabled on the new open file
description created by accept4(). (This flag is merely a convenience,
saving the use of additional calls fcntl(F_GETFL) and fcntl (F_SETFL)
to achieve the same result.)
==

For the curious, some further background on accept4() and the new
system calls in 2.6.27 is at
http://linux-man-pages.blogspot.com/2008/10/recent-changes-in-file-descriptor.html
and http://lwn.net/Articles/281965/)

Cheers,

Michael

> #include <fcntl.h>
> #include <pthread.h>
> #include <stdio.h>
> #include <unistd.h>
> #include <netinet/in.h>
> #include <sys/socket.h>
> #include <sys/syscall.h>
>
> #ifdef __x86_64__
> #define __NR_accept4 288
> #define SOCK_CLOEXEC O_CLOEXEC
> #elif __i386__
> #define SYS_ACCEPT4 18
> #define USE_SOCKETCALL 1
> #define SOCK_CLOEXEC O_CLOEXEC
> #else
> #error "define syscall numbers for this architecture"
> #endif
>
> #define PORT 57392
>
> static pthread_barrier_t b;
>
> static void *
> tf (void *arg)
> {
> pthread_barrier_wait (&b);
> int s = socket (AF_INET, SOCK_STREAM, 0);
> struct sockaddr_in sin;
> sin.sin_family = AF_INET;
> sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
> sin.sin_port = htons (PORT);
> connect (s, (const struct sockaddr *) &sin, sizeof (sin));
> close (s);
> pthread_barrier_wait (&b);
>
> pthread_barrier_wait (&b);
> s = socket (AF_INET, SOCK_STREAM, 0);
> sin.sin_family = AF_INET;
> sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
> sin.sin_port = htons (PORT + 1);
> connect (s, (const struct sockaddr *) &sin, sizeof (sin));
> close (s);
> return NULL;
> }
>
> int
> main (void)
> {
> alarm (5);
>
> int status = 0;
>
> pthread_barrier_init (&b, NULL, 2);
>
> pthread_t th;
> if (pthread_create (&th, NULL, tf, NULL) != 0)
> {
> puts ("pthread_create failed");
> status = 1;
> }
> else
> {
> int s = socket (AF_INET, SOCK_STREAM, 0);
> int fl = 1;
> setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &fl, sizeof (fl));
> struct sockaddr_in sin;
> sin.sin_family = AF_INET;
> sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
> sin.sin_port = htons (PORT);
> bind (s, (struct sockaddr *) &sin, sizeof (sin));
> listen (s, SOMAXCONN);
>
> pthread_barrier_wait (&b);
>
> int s2 = accept (s, NULL, 0);
> if (s2 < 0)
> {
> puts ("accept failed");
> status = 1;
> }
> else
> {
> int fl = fcntl (s2, F_GETFD);
> if ((fl & FD_CLOEXEC) != 0)
> {
> puts ("accept did set CLOEXEC");
> status = 1;
> }
>
> close (s2);
> }
>
> close (s);
>
> pthread_barrier_wait (&b);
>
> sin.sin_family = AF_INET;
> sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
> sin.sin_port = htons (PORT + 1);
> s = socket (AF_INET, SOCK_STREAM, 0);
> bind (s, (struct sockaddr *) &sin, sizeof (sin));
> listen (s, SOMAXCONN);
>
> pthread_barrier_wait (&b);
>
> #if USE_SOCKETCALL
> s2 = syscall (__NR_socketcall, SYS_ACCEPT4, s, NULL, 0, SOCK_CLOEXEC);
> #else
> s2 = syscall (__NR_accept4, s, NULL, 0, SOCK_CLOEXEC);
> #endif
> if (s2 < 0)
> {
> puts ("accept4 failed");
> status = 1;
> }
> else
> {
> int fl = fcntl (s2, F_GETFD);
> if ((fl & FD_CLOEXEC) == 0)
> {
> puts ("accept4 did not set CLOEXEC");
> status = 1;
> }
>
> close (s2);
> }
>
> close (s);
> }
>
> return status;
> }
>
>
> arch/x86/include/asm/unistd_64.h | 4 -
> include/linux/net.h | 6 --
> include/linux/syscalls.h | 3 -
> kernel/sys_ni.c | 2
> net/compat.c | 50 ++----------------------
> net/socket.c | 80
> ++++-----------------------------------
> 6 files changed, 21 insertions(+), 124 deletions(-)
>
>
> Signed-off-by: Ulrich Drepper <[email protected]>
>
> diff --git a/arch/x86/include/asm/unistd_64.h
> b/arch/x86/include/asm/unistd_64.h
> index 834b2c1..d2e415e 100644
> --- a/arch/x86/include/asm/unistd_64.h
> +++ b/arch/x86/include/asm/unistd_64.h
> @@ -639,8 +639,8 @@ __SYSCALL(__NR_fallocate, sys_fallocate)
> __SYSCALL(__NR_timerfd_settime, sys_timerfd_settime)
> #define __NR_timerfd_gettime 287
> __SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime)
> -#define __NR_paccept 288
> -__SYSCALL(__NR_paccept, sys_paccept)
> +#define __NR_accept4 288
> +__SYSCALL(__NR_accept4, sys_accept4)
> #define __NR_signalfd4 289
> __SYSCALL(__NR_signalfd4, sys_signalfd4)
> #define __NR_eventfd2 290
> diff --git a/include/linux/net.h b/include/linux/net.h
> index 6dc14a2..4515efa 100644
> --- a/include/linux/net.h
> +++ b/include/linux/net.h
> @@ -40,7 +40,7 @@
> #define SYS_GETSOCKOPT 15 /* sys_getsockopt(2) */
> #define SYS_SENDMSG 16 /* sys_sendmsg(2) */
> #define SYS_RECVMSG 17 /* sys_recvmsg(2) */
> -#define SYS_PACCEPT 18 /* sys_paccept(2) */
> +#define SYS_ACCEPT4 18 /* sys_accept4(2) */
>
> typedef enum {
> SS_FREE = 0, /* not allocated */
> @@ -100,7 +100,7 @@ enum sock_type {
> * remaining bits are used as flags. */
> #define SOCK_TYPE_MASK 0xf
>
> -/* Flags for socket, socketpair, paccept */
> +/* Flags for socket, socketpair, accept4 */
> #define SOCK_CLOEXEC O_CLOEXEC
> #ifndef SOCK_NONBLOCK
> #define SOCK_NONBLOCK O_NONBLOCK
> @@ -223,8 +223,6 @@ extern int sock_map_fd(struct socket *sock, int
> flags);
> extern struct socket *sockfd_lookup(int fd, int *err);
> #define sockfd_put(sock) fput(sock->file)
> extern int net_ratelimit(void);
> -extern long do_accept(int fd, struct sockaddr __user *upeer_sockaddr,
> - int __user *upeer_addrlen, int flags);
>
> #define net_random() random32()
> #define net_srandom(seed) srandom32((__force u32)seed)
> diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
> index d6ff145..04fb47b 100644
> --- a/include/linux/syscalls.h
> +++ b/include/linux/syscalls.h
> @@ -410,8 +410,7 @@ asmlinkage long sys_getsockopt(int fd, int level, int
> optname,
> asmlinkage long sys_bind(int, struct sockaddr __user *, int);
> asmlinkage long sys_connect(int, struct sockaddr __user *, int);
> asmlinkage long sys_accept(int, struct sockaddr __user *, int __user *);
> -asmlinkage long sys_paccept(int, struct sockaddr __user *, int __user *,
> - const __user sigset_t *, size_t, int);
> +asmlinkage long sys_accept4(int, struct sockaddr __user *, int __user *,
> int);
> asmlinkage long sys_getsockname(int, struct sockaddr __user *, int __user
> *);
> asmlinkage long sys_getpeername(int, struct sockaddr __user *, int __user
> *);
> asmlinkage long sys_send(int, void __user *, size_t, unsigned);
> diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
> index a77b27b..e14a232 100644
> --- a/kernel/sys_ni.c
> +++ b/kernel/sys_ni.c
> @@ -31,7 +31,7 @@ cond_syscall(sys_socketpair);
> cond_syscall(sys_bind);
> cond_syscall(sys_listen);
> cond_syscall(sys_accept);
> -cond_syscall(sys_paccept);
> +cond_syscall(sys_accept4);
> cond_syscall(sys_connect);
> cond_syscall(sys_getsockname);
> cond_syscall(sys_getpeername);
> diff --git a/net/compat.c b/net/compat.c
> index 67fb6a3..50ce0a9 100644
> --- a/net/compat.c
> +++ b/net/compat.c
> @@ -725,7 +725,7 @@ EXPORT_SYMBOL(compat_mc_getsockopt);
> static unsigned char nas[19]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
> AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
> AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
> - AL(6)};
> + AL(4)};
> #undef AL
>
> asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user
> *msg, unsigned flags)
> @@ -738,52 +738,13 @@ asmlinkage long compat_sys_recvmsg(int fd, struct
> compat_msghdr __user *msg, uns
> return sys_recvmsg(fd, (struct msghdr __user *)msg, flags |
> MSG_CMSG_COMPAT);
> }
>
> -asmlinkage long compat_sys_paccept(int fd, struct sockaddr __user
> *upeer_sockaddr,
> - int __user *upeer_addrlen,
> - const compat_sigset_t __user *sigmask,
> - compat_size_t sigsetsize, int flags)
> -{
> - compat_sigset_t ss32;
> - sigset_t ksigmask, sigsaved;
> - int ret;
> -
> - if (sigmask) {
> - if (sigsetsize != sizeof(compat_sigset_t))
> - return -EINVAL;
> - if (copy_from_user(&ss32, sigmask, sizeof(ss32)))
> - return -EFAULT;
> - sigset_from_compat(&ksigmask, &ss32);
> -
> - sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
> - sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
> - }
> -
> - ret = do_accept(fd, upeer_sockaddr, upeer_addrlen, flags);
> -
> - if (ret == -ERESTARTNOHAND) {
> - /*
> - * Don't restore the signal mask yet. Let do_signal() deliver
> - * the signal on the way back to userspace, before the signal
> - * mask is restored.
> - */
> - if (sigmask) {
> - memcpy(&current->saved_sigmask, &sigsaved,
> - sizeof(sigsaved));
> - set_restore_sigmask();
> - }
> - } else if (sigmask)
> - sigprocmask(SIG_SETMASK, &sigsaved, NULL);
> -
> - return ret;
> -}
> -
> asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
> {
> int ret;
> u32 a[6];
> u32 a0, a1;
>
> - if (call < SYS_SOCKET || call > SYS_PACCEPT)
> + if (call < SYS_SOCKET || call > SYS_ACCEPT4)
> return -EINVAL;
> if (copy_from_user(a, args, nas[call]))
> return -EFAULT;
> @@ -804,7 +765,7 @@ asmlinkage long compat_sys_socketcall(int call, u32
> __user *args)
> ret = sys_listen(a0, a1);
> break;
> case SYS_ACCEPT:
> - ret = do_accept(a0, compat_ptr(a1), compat_ptr(a[2]), 0);
> + ret = sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), 0);
> break;
> case SYS_GETSOCKNAME:
> ret = sys_getsockname(a0, compat_ptr(a1), compat_ptr(a[2]));
> @@ -844,9 +805,8 @@ asmlinkage long compat_sys_socketcall(int call, u32
> __user *args)
> case SYS_RECVMSG:
> ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]);
> break;
> - case SYS_PACCEPT:
> - ret = compat_sys_paccept(a0, compat_ptr(a1), compat_ptr(a[2]),
> - compat_ptr(a[3]), a[4], a[5]);
> + case SYS_ACCEPT4:
> + ret = sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), a[3]);
> break;
> default:
> ret = -EINVAL;
> diff --git a/net/socket.c b/net/socket.c
> index 2b7a4b5..8e72806 100644
> --- a/net/socket.c
> +++ b/net/socket.c
> @@ -1427,8 +1427,8 @@ asmlinkage long sys_listen(int fd, int backlog)
> * clean when we restucture accept also.
> */
>
> -long do_accept(int fd, struct sockaddr __user *upeer_sockaddr,
> - int __user *upeer_addrlen, int flags)
> +asmlinkage long sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
> + int __user *upeer_addrlen, int flags)
> {
> struct socket *sock, *newsock;
> struct file *newfile;
> @@ -1511,66 +1511,10 @@ out_fd:
> goto out_put;
> }
>
> -#if 0
> -#ifdef HAVE_SET_RESTORE_SIGMASK
> -asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
> - int __user *upeer_addrlen,
> - const sigset_t __user *sigmask,
> - size_t sigsetsize, int flags)
> -{
> - sigset_t ksigmask, sigsaved;
> - int ret;
> -
> - if (sigmask) {
> - /* XXX: Don't preclude handling different sized sigset_t's. */
> - if (sigsetsize != sizeof(sigset_t))
> - return -EINVAL;
> - if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
> - return -EFAULT;
> -
> - sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
> - sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
> - }
> -
> - ret = do_accept(fd, upeer_sockaddr, upeer_addrlen, flags);
> -
> - if (ret < 0 && signal_pending(current)) {
> - /*
> - * Don't restore the signal mask yet. Let do_signal() deliver
> - * the signal on the way back to userspace, before the signal
> - * mask is restored.
> - */
> - if (sigmask) {
> - memcpy(&current->saved_sigmask, &sigsaved,
> - sizeof(sigsaved));
> - set_restore_sigmask();
> - }
> - } else if (sigmask)
> - sigprocmask(SIG_SETMASK, &sigsaved, NULL);
> -
> - return ret;
> -}
> -#else
> -asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
> - int __user *upeer_addrlen,
> - const sigset_t __user *sigmask,
> - size_t sigsetsize, int flags)
> -{
> - /* The platform does not support restoring the signal mask in the
> - * return path. So we do not allow using paccept() with a signal
> - * mask. */
> - if (sigmask)
> - return -EINVAL;
> -
> - return do_accept(fd, upeer_sockaddr, upeer_addrlen, flags);
> -}
> -#endif
> -#endif
> -
> asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
> int __user *upeer_addrlen)
> {
> - return do_accept(fd, upeer_sockaddr, upeer_addrlen, 0);
> + return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
> }
>
> /*
> @@ -2097,7 +2041,7 @@ static const unsigned char nargs[19]={
> AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
> AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
> AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
> - AL(6)
> + AL(4)
> };
>
> #undef AL
> @@ -2116,7 +2060,7 @@ asmlinkage long sys_socketcall(int call, unsigned long
> __user *args)
> unsigned long a0, a1;
> int err;
>
> - if (call < 1 || call > SYS_PACCEPT)
> + if (call < 1 || call > SYS_ACCEPT4)
> return -EINVAL;
>
> /* copy_from_user should be SMP safe. */
> @@ -2144,9 +2088,8 @@ asmlinkage long sys_socketcall(int call, unsigned long
> __user *args)
> err = sys_listen(a0, a1);
> break;
> case SYS_ACCEPT:
> - err =
> - do_accept(a0, (struct sockaddr __user *)a1,
> - (int __user *)a[2], 0);
> + err = sys_accept4(a0, (struct sockaddr __user *)a1,
> + (int __user *)a[2], 0);
> break;
> case SYS_GETSOCKNAME:
> err =
> @@ -2193,12 +2136,9 @@ asmlinkage long sys_socketcall(int call, unsigned
> long __user *args)
> case SYS_RECVMSG:
> err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
> break;
> - case SYS_PACCEPT:
> - err =
> - sys_paccept(a0, (struct sockaddr __user *)a1,
> - (int __user *)a[2],
> - (const sigset_t __user *) a[3],
> - a[4], a[5]);
> + case SYS_ACCEPT4:
> + err = sys_accept4(a0, (struct sockaddr __user *)a1,
> + (int __user *)a[2], a[3]);
> break;
> default:
> err = -EINVAL;
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>


--
Michael Kerrisk Linux man-pages maintainer;
http://www.kernel.org/doc/man-pages/ Found a documentation bug?
http://www.kernel.org/doc/man-pages/reporting_bugs.html

2008-11-13 22:02:45

by Michael Kerrisk

[permalink] [raw]
Subject: Re: [PATCH] reintroduce accept4

Hit the send button a little early on my last mail. Just to complete one piece:

> My only argument is with the name of the new sysytem call.

... Each of these new system calls (accept4(), dup3(), evenfd2(),
signalfd4(), inotify_init1(), epoll_create1(), pipe2()) has a name
that's based on the number of arguments it has. This follows a
convention that was used in a few traditional Unix system calls, e.g.,
wait3(), wait4(), dup2(). However, it's probably a mistake since:

a) The glibc interfaces can have different numbers of arguments from
the system call

b) In the future, we might use the new bits in the flags argument to
signal the presence of additional arguments for the call, in which
case the number in the name no longer matches the number of arguments
in the call signature.

In the end, names like acceptx(), dupx(), ... or acceptfl(), duplf(),
... or somesuch would probably have been better. But given that we
already added the other system calls, it doesn't seem worth bothering
to change things for accept4().

Cheers,

Michael

2008-11-13 22:07:56

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH] reintroduce accept4

On Thu, 13 Nov 2008 16:51:56 -0500
"Michael Kerrisk" <[email protected]> wrote:

> Andrew,
>
> On 10/26/08, Ulrich Drepper <[email protected]> wrote:
> > This patch reintroduces accept4, replacing paccept. It's easy to see that
> > the patch only removes code and then redirects existing code away from the
> > removed functions. Since the paccept code sans signal handling was never
> > in question I think there is no reason to quarantine the patch first.
>
> I see you accepted this patch into -mm. I've finally got to looking
> at and testing this, so:
>
> Tested-by: Michael Kerrisk <[email protected]>
> Acked-by: Michael Kerrisk <[email protected]>

Cool, thanks.

> In my tests, everything looks fine. I'll forward my test program in a
> follow-up mail.

OK, I'll add that to the changelog as well.

> I think Ulrich wanted to try to see this patch in for 2.6.28; it's
> past the merge window of course, so it's up to you, but I have no
> problem with that.

That's easy - I'll send it to Linus and let him decide ;)

Realistically, this isn't likely to get much third-party testing in -rc
anyway. Our best defence at this time is careful review and developer
runtime testing, which you've done, thanks.

If it's buggy, we can live with that - fix it later, backport the
fixes. It's security holes (including DoS ones) which we need to be
most concerned about.

> The API is the one that Ulrich initially proposed,
> before taking a detour into paccept()
> (http://thread.gmane.org/gmane.linux.kernel/671443 ), which I argued
> against (http://thread.gmane.org/gmane.linux.kernel/723952,
> http://thread.gmane.org/gmane.linux.network/106071/), since I (and
> Roland) could see no reason for the added complexity of a signal set
> argument (like pselect()/ppoll()/epoll_pwait()). (In any case, if
> someone does come up with a compelling reason to add a sigset
> argument, then we can add it via the use of a new flag bit.)
>
> My only argument is with the name of the new sysytem call.
>
> > I've updated the test program which now looks as follows:
>
> (I assume that there had been no testing on x86-32, since, the
> __i386__ ifdef's notwithstanding, the program below can't work on
> x86-32 -- sys_socketcall() takes its arguments packaged into an array
> on x86-32, not as an inline list.)
>
> Andrew, you noted a lack of explanation accompanying the original
> patch. Here's something to fill the gap, and which may be suitable
> for the changelog.
>
> ==
> Introduce a new accept4() system call. The addition of this system
> call matches analogous changes in 2.6.27 (dup3(), evenfd2(),
> signalfd4(), inotify_init1(), epoll_create1(), pipe2()) which added
> new system calls that differed from analogous traditional system calls
> in adding a flags argument that can be used to access additional
> functionality. The accept4() system call is exactly the same as
> accept(), except that it adds a flags bit-mask argument. Two flags
> are initially implemented. (Most of the new system calls in 2.6.27
> also had both of these flags.) SOCK_CLOEXEC causes the close-on-exec
> (FD_CLOEXEC) flag to be enabled for the new file descriptor returned
> by accept4(). This is a useful security feature to avoid leaking
> information in a multithreaded program where one thread is doing an
> accept() at the same time as another thread is doing a fork() plus
> exec(). (More details here:
> http://udrepper.livejournal.com/20407.html "Secure File Descriptor
> Handling", Ulrich Drepper) The other flag is SOCK_NONBLOCK, which
> causes the O_NONBLOCK flag to be enabled on the new open file
> description created by accept4(). (This flag is merely a convenience,
> saving the use of additional calls fcntl(F_GETFL) and fcntl (F_SETFL)
> to achieve the same result.)

I replaced the existing changelog with the above (plus some paragraph
breaks ;)). Will add the new test app when it comes along.

2008-11-13 22:11:31

by Michael Kerrisk

[permalink] [raw]
Subject: Re: [PATCH] reintroduce accept4

Here's my test program. Works on x86-32. Should work on x86-64, but
I don't have a system to hand to test with.

It tests accept4() with each of the four possible combinations of
SOCK_CLOEXEC and SOCK_NONBLOCK set/clear in 'flags', and verifies that
the appropriate flags are set on the file descriptor/open file
description returned by accept4().

I tested Ulrich's patch in this thread by applying against 2.6.28-rc2,
and it passes according to my test program.

/* test_accept4.c

Copyright (C) 2008, Linux Foundation, written by Michael Kerrisk
<[email protected]>

Licensed under the GNU GPLv2 or later.
*/
#define _GNU_SOURCE
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <stdlib.h>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>

#define PORT_NUM 33333

#define die(msg) do { perror(msg); exit(EXIT_FAILURE); } while (0)

/**********************************************************************/

/* The following is what we need until glibc gets a wrapper for
accept4() */

/* Flags for socket(), socketpair(), accept4() */
#ifndef SOCK_CLOEXEC
#define SOCK_CLOEXEC O_CLOEXEC
#endif
#ifndef SOCK_NONBLOCK
#define SOCK_NONBLOCK O_NONBLOCK
#endif

#ifdef __x86_64__
#define SYS_accept4 288
#elif __i386__
#define USE_SOCKETCALL 1
#define SYS_ACCEPT4 18
#else
#error "Sorry -- don't know the syscall # on this architecture"
#endif

static int
accept4(int fd, struct sockaddr *sockaddr, socklen_t *addrlen, int flags)
{
printf("Calling accept4(): flags = %x", flags);
if (flags != 0) {
printf(" (");
if (flags & SOCK_CLOEXEC)
printf("SOCK_CLOEXEC");
if ((flags & SOCK_CLOEXEC) && (flags & SOCK_NONBLOCK))
printf(" ");
if (flags & SOCK_NONBLOCK)
printf("SOCK_NONBLOCK");
printf(")");
}
printf("\n");

#if USE_SOCKETCALL
long args[6];

args[0] = fd;
args[1] = (long) sockaddr;
args[2] = (long) addrlen;
args[3] = flags;

printf("x86-32\n"); /* XXX */
return syscall(SYS_socketcall, SYS_ACCEPT4, args);
#else
printf("x86-64\n"); /* XXX */
return syscall(SYS_accept4, fd, sockaddr, addrlen, flags);
#endif
}

/**********************************************************************/


static int
do_test(int lfd, struct sockaddr_in *conn_addr,
int closeonexec_flag, int nonblock_flag)
{
int connfd, acceptfd;
int fdf, flf, fdf_pass, flf_pass;
struct sockaddr_in claddr;
socklen_t addrlen;

printf("=======================================\n");

connfd = socket(AF_INET, SOCK_STREAM, 0);
if (connfd == -1)
die("socket");
if (connect(connfd, (struct sockaddr *) conn_addr,
sizeof(struct sockaddr_in)) == -1)
die("connect");

addrlen = sizeof(struct sockaddr_in);
acceptfd = accept4(lfd, (struct sockaddr *) &claddr, &addrlen,
closeonexec_flag | nonblock_flag);
if (acceptfd == -1) {
perror("accept4()");
close(connfd);
return 0;
}

fdf = fcntl(acceptfd, F_GETFD);
if (fdf == -1)
die("fcntl:F_GETFD");
fdf_pass = ((fdf & FD_CLOEXEC) != 0) ==
((closeonexec_flag & SOCK_CLOEXEC) != 0);
printf("Close-on-exec flag is %sset (%s); ",
(fdf & FD_CLOEXEC) ? "" : "not ",
fdf_pass ? "OK" : "failed");

flf = fcntl(acceptfd, F_GETFL);
if (flf == -1)
die("fcntl:F_GETFD");
flf_pass = ((flf & O_NONBLOCK) != 0) ==
((nonblock_flag & SOCK_NONBLOCK) !=0);
printf("nonblock flag is %sset (%s)\n",
(flf & O_NONBLOCK) ? "" : "not ",
flf_pass ? "OK" : "failed");

close(acceptfd);
close(connfd);

printf("Test result: %s\n", (fdf_pass && flf_pass) ? "PASS" : "FAIL");
return fdf_pass && flf_pass;
}


static int
create_listening_socket(int port_num)
{
struct sockaddr_in svaddr;
int lfd;
int optval;

memset(&svaddr, 0, sizeof(struct sockaddr_in));
svaddr.sin_family = AF_INET;
svaddr.sin_addr.s_addr = htonl(INADDR_ANY);
svaddr.sin_port = htons(port_num);

lfd = socket(AF_INET, SOCK_STREAM, 0);
if (lfd == -1)
die("socket");

optval = 1;
if (setsockopt(lfd, SOL_SOCKET, SO_REUSEADDR, &optval,
sizeof(optval)) == -1)
die("setsockopt");

if (bind(lfd, (struct sockaddr *) &svaddr,
sizeof(struct sockaddr_in)) == -1)
die("bind");

if (listen(lfd, 5) == -1)
die("listen");

return lfd;
}


int
main(int argc, char *argv[])
{
struct sockaddr_in conn_addr;
int lfd;
int port_num;
int passed;

passed = 1;

port_num = (argc > 1) ? atoi(argv[1]) : PORT_NUM;

memset(&conn_addr, 0, sizeof(struct sockaddr_in));
conn_addr.sin_family = AF_INET;
conn_addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
conn_addr.sin_port = htons(port_num);

lfd = create_listening_socket(port_num);

if (!do_test(lfd, &conn_addr, 0, 0))
passed = 0;
if (!do_test(lfd, &conn_addr, SOCK_CLOEXEC, 0))
passed = 0;
if (!do_test(lfd, &conn_addr, 0, SOCK_NONBLOCK))
passed = 0;
if (!do_test(lfd, &conn_addr, SOCK_CLOEXEC, SOCK_NONBLOCK))
passed = 0;

close(lfd);

exit(passed ? EXIT_SUCCESS : EXIT_FAILURE);
}

2008-11-13 22:15:02

by Michael Kerrisk

[permalink] [raw]
Subject: Re: [PATCH] reintroduce accept4

Andrew,

There was a couplke of crufty printf()'s in the preceding version that
I didn't notice until just after I hit send. Paste this version into
the changelog instead.

Cheers,

Michael
/* test_accept4.c

Copyright (C) 2008, Linux Foundation, written by Michael Kerrisk
<[email protected]>

Licensed under the GNU GPLv2 or later.
*/
#define _GNU_SOURCE
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <stdlib.h>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>

#define PORT_NUM 33333

#define die(msg) do { perror(msg); exit(EXIT_FAILURE); } while (0)

/**********************************************************************/

/* The following is what we need until glibc gets a wrapper for
accept4() */

/* Flags for socket(), socketpair(), accept4() */
#ifndef SOCK_CLOEXEC
#define SOCK_CLOEXEC O_CLOEXEC
#endif
#ifndef SOCK_NONBLOCK
#define SOCK_NONBLOCK O_NONBLOCK
#endif

#ifdef __x86_64__
#define SYS_accept4 288
#elif __i386__
#define USE_SOCKETCALL 1
#define SYS_ACCEPT4 18
#else
#error "Sorry -- don't know the syscall # on this architecture"
#endif

static int
accept4(int fd, struct sockaddr *sockaddr, socklen_t *addrlen, int flags)
{
printf("Calling accept4(): flags = %x", flags);
if (flags != 0) {
printf(" (");
if (flags & SOCK_CLOEXEC)
printf("SOCK_CLOEXEC");
if ((flags & SOCK_CLOEXEC) && (flags & SOCK_NONBLOCK))
printf(" ");
if (flags & SOCK_NONBLOCK)
printf("SOCK_NONBLOCK");
printf(")");
}
printf("\n");

#if USE_SOCKETCALL
long args[6];

args[0] = fd;
args[1] = (long) sockaddr;
args[2] = (long) addrlen;
args[3] = flags;

return syscall(SYS_socketcall, SYS_ACCEPT4, args);
#else
return syscall(SYS_accept4, fd, sockaddr, addrlen, flags);
#endif
}

/**********************************************************************/


static int
do_test(int lfd, struct sockaddr_in *conn_addr,
int closeonexec_flag, int nonblock_flag)
{
int connfd, acceptfd;
int fdf, flf, fdf_pass, flf_pass;
struct sockaddr_in claddr;
socklen_t addrlen;

printf("=======================================\n");

connfd = socket(AF_INET, SOCK_STREAM, 0);
if (connfd == -1)
die("socket");
if (connect(connfd, (struct sockaddr *) conn_addr,
sizeof(struct sockaddr_in)) == -1)
die("connect");

addrlen = sizeof(struct sockaddr_in);
acceptfd = accept4(lfd, (struct sockaddr *) &claddr, &addrlen,
closeonexec_flag | nonblock_flag);
if (acceptfd == -1) {
perror("accept4()");
close(connfd);
return 0;
}

fdf = fcntl(acceptfd, F_GETFD);
if (fdf == -1)
die("fcntl:F_GETFD");
fdf_pass = ((fdf & FD_CLOEXEC) != 0) ==
((closeonexec_flag & SOCK_CLOEXEC) != 0);
printf("Close-on-exec flag is %sset (%s); ",
(fdf & FD_CLOEXEC) ? "" : "not ",
fdf_pass ? "OK" : "failed");

flf = fcntl(acceptfd, F_GETFL);
if (flf == -1)
die("fcntl:F_GETFD");
flf_pass = ((flf & O_NONBLOCK) != 0) ==
((nonblock_flag & SOCK_NONBLOCK) !=0);
printf("nonblock flag is %sset (%s)\n",
(flf & O_NONBLOCK) ? "" : "not ",
flf_pass ? "OK" : "failed");

close(acceptfd);
close(connfd);

printf("Test result: %s\n", (fdf_pass && flf_pass) ? "PASS" : "FAIL");
return fdf_pass && flf_pass;
}


static int
create_listening_socket(int port_num)
{
struct sockaddr_in svaddr;
int lfd;
int optval;

memset(&svaddr, 0, sizeof(struct sockaddr_in));
svaddr.sin_family = AF_INET;
svaddr.sin_addr.s_addr = htonl(INADDR_ANY);
svaddr.sin_port = htons(port_num);

lfd = socket(AF_INET, SOCK_STREAM, 0);
if (lfd == -1)
die("socket");

optval = 1;
if (setsockopt(lfd, SOL_SOCKET, SO_REUSEADDR, &optval,
sizeof(optval)) == -1)
die("setsockopt");

if (bind(lfd, (struct sockaddr *) &svaddr,
sizeof(struct sockaddr_in)) == -1)
die("bind");

if (listen(lfd, 5) == -1)
die("listen");

return lfd;
}


int
main(int argc, char *argv[])
{
struct sockaddr_in conn_addr;
int lfd;
int port_num;
int passed;

passed = 1;

port_num = (argc > 1) ? atoi(argv[1]) : PORT_NUM;

memset(&conn_addr, 0, sizeof(struct sockaddr_in));
conn_addr.sin_family = AF_INET;
conn_addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
conn_addr.sin_port = htons(port_num);

lfd = create_listening_socket(port_num);

if (!do_test(lfd, &conn_addr, 0, 0))
passed = 0;
if (!do_test(lfd, &conn_addr, SOCK_CLOEXEC, 0))
passed = 0;
if (!do_test(lfd, &conn_addr, 0, SOCK_NONBLOCK))
passed = 0;
if (!do_test(lfd, &conn_addr, SOCK_CLOEXEC, SOCK_NONBLOCK))
passed = 0;

close(lfd);

exit(passed ? EXIT_SUCCESS : EXIT_FAILURE);
}

2008-11-13 22:26:22

by Paul Mackerras

[permalink] [raw]
Subject: Re: [PATCH] reintroduce accept4

Andrew Morton writes:

> > I think Ulrich wanted to try to see this patch in for 2.6.28; it's
> > past the merge window of course, so it's up to you, but I have no
> > problem with that.
>
> That's easy - I'll send it to Linus and let him decide ;)

Ulrich's patch only updated x86. If you're going to send it to Linus,
please give us other architecture maintainers a chance to get patches
to you to wire it up on our architectures, and then send Linus the
combined patch.

Paul.

2008-11-13 22:29:01

by Paul Mackerras

[permalink] [raw]
Subject: Re: [PATCH] reintroduce accept4

I wrote:

> Andrew Morton writes:
>
> > > I think Ulrich wanted to try to see this patch in for 2.6.28; it's
> > > past the merge window of course, so it's up to you, but I have no
> > > problem with that.
> >
> > That's easy - I'll send it to Linus and let him decide ;)
>
> Ulrich's patch only updated x86. If you're going to send it to Linus,
> please give us other architecture maintainers a chance to get patches
> to you to wire it up on our architectures, and then send Linus the
> combined patch.

Actually, any architecture that uses sys_socketcall should be OK
already, by the looks, and that includes powerpc.

Paul.

2008-11-13 22:58:33

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH] reintroduce accept4

On Fri, 14 Nov 2008 09:28:39 +1100
Paul Mackerras <[email protected]> wrote:

> I wrote:
>
> > Andrew Morton writes:
> >
> > > > I think Ulrich wanted to try to see this patch in for 2.6.28; it's
> > > > past the merge window of course, so it's up to you, but I have no
> > > > problem with that.
> > >
> > > That's easy - I'll send it to Linus and let him decide ;)
> >
> > Ulrich's patch only updated x86. If you're going to send it to Linus,
> > please give us other architecture maintainers a chance to get patches
> > to you to wire it up on our architectures, and then send Linus the
> > combined patch.
>
> Actually, any architecture that uses sys_socketcall should be OK
> already, by the looks, and that includes powerpc.
>

Here's the latest version, for review-n-test enjoyment:

From: Ulrich Drepper <[email protected]>

Introduce a new accept4() system call. The addition of this system call
matches analogous changes in 2.6.27 (dup3(), evenfd2(), signalfd4(),
inotify_init1(), epoll_create1(), pipe2()) which added new system calls
that differed from analogous traditional system calls in adding a flags
argument that can be used to access additional functionality.

The accept4() system call is exactly the same as accept(), except that it
adds a flags bit-mask argument. Two flags are initially implemented.
(Most of the new system calls in 2.6.27 also had both of these flags.)
SOCK_CLOEXEC causes the close-on-exec (FD_CLOEXEC) flag to be enabled for
the new file descriptor returned by accept4(). This is a useful security
feature to avoid leaking information in a multithreaded program where one
thread is doing an accept() at the same time as another thread is doing a
fork() plus exec().

More details here: http://udrepper.livejournal.com/20407.html "Secure File
Descriptor Handling", Ulrich Drepper) The other flag is SOCK_NONBLOCK,
which causes the O_NONBLOCK flag to be enabled on the new open file
description created by accept4(). (This flag is merely a convenience,
saving the use of additional calls fcntl(F_GETFL) and fcntl (F_SETFL) to
achieve the same result.

Here's my test program. Works on x86-32. Should work on x86-64, but
I don't have a system to hand to test with.

It tests accept4() with each of the four possible combinations of
SOCK_CLOEXEC and SOCK_NONBLOCK set/clear in 'flags', and verifies that
the appropriate flags are set on the file descriptor/open file
description returned by accept4().

I tested Ulrich's patch in this thread by applying against 2.6.28-rc2,
and it passes according to my test program.

/* test_accept4.c

Copyright (C) 2008, Linux Foundation, written by Michael Kerrisk
<[email protected]>

Licensed under the GNU GPLv2 or later.
*/
#define _GNU_SOURCE
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <stdlib.h>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>

#define PORT_NUM 33333

#define die(msg) do { perror(msg); exit(EXIT_FAILURE); } while (0)

/**********************************************************************/

/* The following is what we need until glibc gets a wrapper for
accept4() */

/* Flags for socket(), socketpair(), accept4() */
#ifndef SOCK_CLOEXEC
#define SOCK_CLOEXEC O_CLOEXEC
#endif
#ifndef SOCK_NONBLOCK
#define SOCK_NONBLOCK O_NONBLOCK
#endif

#ifdef __x86_64__
#define SYS_accept4 288
#elif __i386__
#define USE_SOCKETCALL 1
#define SYS_ACCEPT4 18
#else
#error "Sorry -- don't know the syscall # on this architecture"
#endif

static int
accept4(int fd, struct sockaddr *sockaddr, socklen_t *addrlen, int flags)
{
printf("Calling accept4(): flags = %x", flags);
if (flags != 0) {
printf(" (");
if (flags & SOCK_CLOEXEC)
printf("SOCK_CLOEXEC");
if ((flags & SOCK_CLOEXEC) && (flags & SOCK_NONBLOCK))
printf(" ");
if (flags & SOCK_NONBLOCK)
printf("SOCK_NONBLOCK");
printf(")");
}
printf("\n");

#if USE_SOCKETCALL
long args[6];

args[0] = fd;
args[1] = (long) sockaddr;
args[2] = (long) addrlen;
args[3] = flags;

return syscall(SYS_socketcall, SYS_ACCEPT4, args);
#else
return syscall(SYS_accept4, fd, sockaddr, addrlen, flags);
#endif
}

/**********************************************************************/


static int
do_test(int lfd, struct sockaddr_in *conn_addr,
int closeonexec_flag, int nonblock_flag)
{
int connfd, acceptfd;
int fdf, flf, fdf_pass, flf_pass;
struct sockaddr_in claddr;
socklen_t addrlen;

printf("=======================================\n");

connfd = socket(AF_INET, SOCK_STREAM, 0);
if (connfd == -1)
die("socket");
if (connect(connfd, (struct sockaddr *) conn_addr,
sizeof(struct sockaddr_in)) == -1)
die("connect");

addrlen = sizeof(struct sockaddr_in);
acceptfd = accept4(lfd, (struct sockaddr *) &claddr, &addrlen,
closeonexec_flag | nonblock_flag);
if (acceptfd == -1) {
perror("accept4()");
close(connfd);
return 0;
}

fdf = fcntl(acceptfd, F_GETFD);
if (fdf == -1)
die("fcntl:F_GETFD");
fdf_pass = ((fdf & FD_CLOEXEC) != 0) ==
((closeonexec_flag & SOCK_CLOEXEC) != 0);
printf("Close-on-exec flag is %sset (%s); ",
(fdf & FD_CLOEXEC) ? "" : "not ",
fdf_pass ? "OK" : "failed");

flf = fcntl(acceptfd, F_GETFL);
if (flf == -1)
die("fcntl:F_GETFD");
flf_pass = ((flf & O_NONBLOCK) != 0) ==
((nonblock_flag & SOCK_NONBLOCK) !=0);
printf("nonblock flag is %sset (%s)\n",
(flf & O_NONBLOCK) ? "" : "not ",
flf_pass ? "OK" : "failed");

close(acceptfd);
close(connfd);

printf("Test result: %s\n", (fdf_pass && flf_pass) ? "PASS" : "FAIL");
return fdf_pass && flf_pass;
}


static int
create_listening_socket(int port_num)
{
struct sockaddr_in svaddr;
int lfd;
int optval;

memset(&svaddr, 0, sizeof(struct sockaddr_in));
svaddr.sin_family = AF_INET;
svaddr.sin_addr.s_addr = htonl(INADDR_ANY);
svaddr.sin_port = htons(port_num);

lfd = socket(AF_INET, SOCK_STREAM, 0);
if (lfd == -1)
die("socket");

optval = 1;
if (setsockopt(lfd, SOL_SOCKET, SO_REUSEADDR, &optval,
sizeof(optval)) == -1)
die("setsockopt");

if (bind(lfd, (struct sockaddr *) &svaddr,
sizeof(struct sockaddr_in)) == -1)
die("bind");

if (listen(lfd, 5) == -1)
die("listen");

return lfd;
}


int
main(int argc, char *argv[])
{
struct sockaddr_in conn_addr;
int lfd;
int port_num;
int passed;

passed = 1;

port_num = (argc > 1) ? atoi(argv[1]) : PORT_NUM;

memset(&conn_addr, 0, sizeof(struct sockaddr_in));
conn_addr.sin_family = AF_INET;
conn_addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
conn_addr.sin_port = htons(port_num);

lfd = create_listening_socket(port_num);

if (!do_test(lfd, &conn_addr, 0, 0))
passed = 0;
if (!do_test(lfd, &conn_addr, SOCK_CLOEXEC, 0))
passed = 0;
if (!do_test(lfd, &conn_addr, 0, SOCK_NONBLOCK))
passed = 0;
if (!do_test(lfd, &conn_addr, SOCK_CLOEXEC, SOCK_NONBLOCK))
passed = 0;

close(lfd);

exit(passed ? EXIT_SUCCESS : EXIT_FAILURE);
}

[[email protected]: rewrote changelog, updated test program]
Signed-off-by: Ulrich Drepper <[email protected]>
Tested-by: Michael Kerrisk <[email protected]>
Acked-by: Michael Kerrisk <[email protected]>
Cc: <[email protected]>
Cc: <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
---

arch/x86/include/asm/unistd_64.h | 4 -
include/linux/net.h | 6 --
include/linux/syscalls.h | 3 -
kernel/sys_ni.c | 2
net/compat.c | 50 +-----------------
net/socket.c | 80 +++--------------------------
6 files changed, 21 insertions(+), 124 deletions(-)

diff -puN arch/x86/include/asm/unistd_64.h~reintroduce-accept4 arch/x86/include/asm/unistd_64.h
--- a/arch/x86/include/asm/unistd_64.h~reintroduce-accept4
+++ a/arch/x86/include/asm/unistd_64.h
@@ -639,8 +639,8 @@ __SYSCALL(__NR_fallocate, sys_fallocate)
__SYSCALL(__NR_timerfd_settime, sys_timerfd_settime)
#define __NR_timerfd_gettime 287
__SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime)
-#define __NR_paccept 288
-__SYSCALL(__NR_paccept, sys_paccept)
+#define __NR_accept4 288
+__SYSCALL(__NR_accept4, sys_accept4)
#define __NR_signalfd4 289
__SYSCALL(__NR_signalfd4, sys_signalfd4)
#define __NR_eventfd2 290
diff -puN include/linux/net.h~reintroduce-accept4 include/linux/net.h
--- a/include/linux/net.h~reintroduce-accept4
+++ a/include/linux/net.h
@@ -40,7 +40,7 @@
#define SYS_GETSOCKOPT 15 /* sys_getsockopt(2) */
#define SYS_SENDMSG 16 /* sys_sendmsg(2) */
#define SYS_RECVMSG 17 /* sys_recvmsg(2) */
-#define SYS_PACCEPT 18 /* sys_paccept(2) */
+#define SYS_ACCEPT4 18 /* sys_accept4(2) */

typedef enum {
SS_FREE = 0, /* not allocated */
@@ -100,7 +100,7 @@ enum sock_type {
* remaining bits are used as flags. */
#define SOCK_TYPE_MASK 0xf

-/* Flags for socket, socketpair, paccept */
+/* Flags for socket, socketpair, accept4 */
#define SOCK_CLOEXEC O_CLOEXEC
#ifndef SOCK_NONBLOCK
#define SOCK_NONBLOCK O_NONBLOCK
@@ -223,8 +223,6 @@ extern int sock_map_fd(struct sock
extern struct socket *sockfd_lookup(int fd, int *err);
#define sockfd_put(sock) fput(sock->file)
extern int net_ratelimit(void);
-extern long do_accept(int fd, struct sockaddr __user *upeer_sockaddr,
- int __user *upeer_addrlen, int flags);

#define net_random() random32()
#define net_srandom(seed) srandom32((__force u32)seed)
diff -puN include/linux/syscalls.h~reintroduce-accept4 include/linux/syscalls.h
--- a/include/linux/syscalls.h~reintroduce-accept4
+++ a/include/linux/syscalls.h
@@ -410,8 +410,7 @@ asmlinkage long sys_getsockopt(int fd, i
asmlinkage long sys_bind(int, struct sockaddr __user *, int);
asmlinkage long sys_connect(int, struct sockaddr __user *, int);
asmlinkage long sys_accept(int, struct sockaddr __user *, int __user *);
-asmlinkage long sys_paccept(int, struct sockaddr __user *, int __user *,
- const __user sigset_t *, size_t, int);
+asmlinkage long sys_accept4(int, struct sockaddr __user *, int __user *, int);
asmlinkage long sys_getsockname(int, struct sockaddr __user *, int __user *);
asmlinkage long sys_getpeername(int, struct sockaddr __user *, int __user *);
asmlinkage long sys_send(int, void __user *, size_t, unsigned);
diff -puN kernel/sys_ni.c~reintroduce-accept4 kernel/sys_ni.c
--- a/kernel/sys_ni.c~reintroduce-accept4
+++ a/kernel/sys_ni.c
@@ -31,7 +31,7 @@ cond_syscall(sys_socketpair);
cond_syscall(sys_bind);
cond_syscall(sys_listen);
cond_syscall(sys_accept);
-cond_syscall(sys_paccept);
+cond_syscall(sys_accept4);
cond_syscall(sys_connect);
cond_syscall(sys_getsockname);
cond_syscall(sys_getpeername);
diff -puN net/compat.c~reintroduce-accept4 net/compat.c
--- a/net/compat.c~reintroduce-accept4
+++ a/net/compat.c
@@ -725,7 +725,7 @@ EXPORT_SYMBOL(compat_mc_getsockopt);
static unsigned char nas[19]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
- AL(6)};
+ AL(4)};
#undef AL

asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags)
@@ -738,52 +738,13 @@ asmlinkage long compat_sys_recvmsg(int f
return sys_recvmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT);
}

-asmlinkage long compat_sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
- int __user *upeer_addrlen,
- const compat_sigset_t __user *sigmask,
- compat_size_t sigsetsize, int flags)
-{
- compat_sigset_t ss32;
- sigset_t ksigmask, sigsaved;
- int ret;
-
- if (sigmask) {
- if (sigsetsize != sizeof(compat_sigset_t))
- return -EINVAL;
- if (copy_from_user(&ss32, sigmask, sizeof(ss32)))
- return -EFAULT;
- sigset_from_compat(&ksigmask, &ss32);
-
- sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
- sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
- }
-
- ret = do_accept(fd, upeer_sockaddr, upeer_addrlen, flags);
-
- if (ret == -ERESTARTNOHAND) {
- /*
- * Don't restore the signal mask yet. Let do_signal() deliver
- * the signal on the way back to userspace, before the signal
- * mask is restored.
- */
- if (sigmask) {
- memcpy(&current->saved_sigmask, &sigsaved,
- sizeof(sigsaved));
- set_restore_sigmask();
- }
- } else if (sigmask)
- sigprocmask(SIG_SETMASK, &sigsaved, NULL);
-
- return ret;
-}
-
asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
{
int ret;
u32 a[6];
u32 a0, a1;

- if (call < SYS_SOCKET || call > SYS_PACCEPT)
+ if (call < SYS_SOCKET || call > SYS_ACCEPT4)
return -EINVAL;
if (copy_from_user(a, args, nas[call]))
return -EFAULT;
@@ -804,7 +765,7 @@ asmlinkage long compat_sys_socketcall(in
ret = sys_listen(a0, a1);
break;
case SYS_ACCEPT:
- ret = do_accept(a0, compat_ptr(a1), compat_ptr(a[2]), 0);
+ ret = sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), 0);
break;
case SYS_GETSOCKNAME:
ret = sys_getsockname(a0, compat_ptr(a1), compat_ptr(a[2]));
@@ -844,9 +805,8 @@ asmlinkage long compat_sys_socketcall(in
case SYS_RECVMSG:
ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]);
break;
- case SYS_PACCEPT:
- ret = compat_sys_paccept(a0, compat_ptr(a1), compat_ptr(a[2]),
- compat_ptr(a[3]), a[4], a[5]);
+ case SYS_ACCEPT4:
+ ret = sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), a[3]);
break;
default:
ret = -EINVAL;
diff -puN net/socket.c~reintroduce-accept4 net/socket.c
--- a/net/socket.c~reintroduce-accept4
+++ a/net/socket.c
@@ -1425,8 +1425,8 @@ asmlinkage long sys_listen(int fd, int b
* clean when we restucture accept also.
*/

-long do_accept(int fd, struct sockaddr __user *upeer_sockaddr,
- int __user *upeer_addrlen, int flags)
+asmlinkage long sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
+ int __user *upeer_addrlen, int flags)
{
struct socket *sock, *newsock;
struct file *newfile;
@@ -1509,66 +1509,10 @@ out_fd:
goto out_put;
}

-#if 0
-#ifdef HAVE_SET_RESTORE_SIGMASK
-asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
- int __user *upeer_addrlen,
- const sigset_t __user *sigmask,
- size_t sigsetsize, int flags)
-{
- sigset_t ksigmask, sigsaved;
- int ret;
-
- if (sigmask) {
- /* XXX: Don't preclude handling different sized sigset_t's. */
- if (sigsetsize != sizeof(sigset_t))
- return -EINVAL;
- if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
- return -EFAULT;
-
- sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP));
- sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
- }
-
- ret = do_accept(fd, upeer_sockaddr, upeer_addrlen, flags);
-
- if (ret < 0 && signal_pending(current)) {
- /*
- * Don't restore the signal mask yet. Let do_signal() deliver
- * the signal on the way back to userspace, before the signal
- * mask is restored.
- */
- if (sigmask) {
- memcpy(&current->saved_sigmask, &sigsaved,
- sizeof(sigsaved));
- set_restore_sigmask();
- }
- } else if (sigmask)
- sigprocmask(SIG_SETMASK, &sigsaved, NULL);
-
- return ret;
-}
-#else
-asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr,
- int __user *upeer_addrlen,
- const sigset_t __user *sigmask,
- size_t sigsetsize, int flags)
-{
- /* The platform does not support restoring the signal mask in the
- * return path. So we do not allow using paccept() with a signal
- * mask. */
- if (sigmask)
- return -EINVAL;
-
- return do_accept(fd, upeer_sockaddr, upeer_addrlen, flags);
-}
-#endif
-#endif
-
asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen)
{
- return do_accept(fd, upeer_sockaddr, upeer_addrlen, 0);
+ return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
}

/*
@@ -2095,7 +2039,7 @@ static const unsigned char nargs[19]={
AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
- AL(6)
+ AL(4)
};

#undef AL
@@ -2114,7 +2058,7 @@ asmlinkage long sys_socketcall(int call,
unsigned long a0, a1;
int err;

- if (call < 1 || call > SYS_PACCEPT)
+ if (call < 1 || call > SYS_ACCEPT4)
return -EINVAL;

/* copy_from_user should be SMP safe. */
@@ -2142,9 +2086,8 @@ asmlinkage long sys_socketcall(int call,
err = sys_listen(a0, a1);
break;
case SYS_ACCEPT:
- err =
- do_accept(a0, (struct sockaddr __user *)a1,
- (int __user *)a[2], 0);
+ err = sys_accept4(a0, (struct sockaddr __user *)a1,
+ (int __user *)a[2], 0);
break;
case SYS_GETSOCKNAME:
err =
@@ -2191,12 +2134,9 @@ asmlinkage long sys_socketcall(int call,
case SYS_RECVMSG:
err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
break;
- case SYS_PACCEPT:
- err =
- sys_paccept(a0, (struct sockaddr __user *)a1,
- (int __user *)a[2],
- (const sigset_t __user *) a[3],
- a[4], a[5]);
+ case SYS_ACCEPT4:
+ err = sys_accept4(a0, (struct sockaddr __user *)a1,
+ (int __user *)a[2], a[3]);
break;
default:
err = -EINVAL;
_

2008-11-14 00:08:36

by David Miller

[permalink] [raw]
Subject: Re: [PATCH] reintroduce accept4

From: Andrew Morton <[email protected]>
Date: Thu, 13 Nov 2008 14:57:37 -0800

> Here's the latest version, for review-n-test enjoyment:

This adds the sparc syscall hookups.

Signed-off-by: David S. Miller <[email protected]>

diff --git a/arch/sparc/include/asm/unistd_32.h b/arch/sparc/include/asm/unistd_32.h
index 648643a..0d13d2a 100644
--- a/arch/sparc/include/asm/unistd_32.h
+++ b/arch/sparc/include/asm/unistd_32.h
@@ -338,8 +338,9 @@
#define __NR_dup3 320
#define __NR_pipe2 321
#define __NR_inotify_init1 322
+#define __NR_accept4 323

-#define NR_SYSCALLS 323
+#define NR_SYSCALLS 324

/* Sparc 32-bit only has the "setresuid32", "getresuid32" variants,
* it never had the plain ones and there is no value to adding those
diff --git a/arch/sparc/include/asm/unistd_64.h b/arch/sparc/include/asm/unistd_64.h
index c5cc0e0..fa5d3c0 100644
--- a/arch/sparc/include/asm/unistd_64.h
+++ b/arch/sparc/include/asm/unistd_64.h
@@ -340,8 +340,9 @@
#define __NR_dup3 320
#define __NR_pipe2 321
#define __NR_inotify_init1 322
+#define __NR_accept4 323

-#define NR_SYSCALLS 323
+#define NR_SYSCALLS 324

#ifdef __KERNEL__
#define __ARCH_WANT_IPC_PARSE_VERSION
diff --git a/arch/sparc/kernel/systbls.S b/arch/sparc/kernel/systbls.S
index e1b9233..7d08075 100644
--- a/arch/sparc/kernel/systbls.S
+++ b/arch/sparc/kernel/systbls.S
@@ -81,4 +81,4 @@ sys_call_table:
/*305*/ .long sys_set_mempolicy, sys_kexec_load, sys_move_pages, sys_getcpu, sys_epoll_pwait
/*310*/ .long sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate
/*315*/ .long sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1
-/*320*/ .long sys_dup3, sys_pipe2, sys_inotify_init1
+/*320*/ .long sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4
diff --git a/arch/sparc64/kernel/sys32.S b/arch/sparc64/kernel/sys32.S
index ade18ba..f061c4d 100644
--- a/arch/sparc64/kernel/sys32.S
+++ b/arch/sparc64/kernel/sys32.S
@@ -150,7 +150,7 @@ sys32_mmap2:
sys32_socketcall: /* %o0=call, %o1=args */
cmp %o0, 1
bl,pn %xcc, do_einval
- cmp %o0, 17
+ cmp %o0, 18
bg,pn %xcc, do_einval
sub %o0, 1, %o0
sllx %o0, 5, %o0
@@ -319,6 +319,15 @@ do_sys_recvmsg: /* compat_sys_recvmsg(int, struct compat_msghdr *, unsigned int)
nop
nop
nop
+do_sys_accept4: /* sys_accept4(int, struct sockaddr *, int *, int) */
+63: ldswa [%o1 + 0x0] %asi, %o0
+ sethi %hi(sys_accept4), %g1
+64: lduwa [%o1 + 0x8] %asi, %o2
+65: ldswa [%o1 + 0xc] %asi, %o3
+ jmpl %g1 + %lo(sys_accept4), %g0
+66: lduwa [%o1 + 0x4] %asi, %o1
+ nop
+ nop

.section __ex_table,"a"
.align 4
@@ -353,4 +362,6 @@ do_sys_recvmsg: /* compat_sys_recvmsg(int, struct compat_msghdr *, unsigned int)
.word 57b, __retl_efault, 58b, __retl_efault
.word 59b, __retl_efault, 60b, __retl_efault
.word 61b, __retl_efault, 62b, __retl_efault
+ .word 63b, __retl_efault, 64b, __retl_efault
+ .word 65b, __retl_efault, 66b, __retl_efault
.previous
diff --git a/arch/sparc64/kernel/systbls.S b/arch/sparc64/kernel/systbls.S
index b2fa4c1..9fc78cf 100644
--- a/arch/sparc64/kernel/systbls.S
+++ b/arch/sparc64/kernel/systbls.S
@@ -82,7 +82,7 @@ sys_call_table32:
.word compat_sys_set_mempolicy, compat_sys_kexec_load, compat_sys_move_pages, sys_getcpu, compat_sys_epoll_pwait
/*310*/ .word compat_sys_utimensat, compat_sys_signalfd, sys_timerfd_create, sys_eventfd, compat_sys_fallocate
.word compat_sys_timerfd_settime, compat_sys_timerfd_gettime, compat_sys_signalfd4, sys_eventfd2, sys_epoll_create1
-/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1
+/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4

#endif /* CONFIG_COMPAT */

@@ -156,4 +156,4 @@ sys_call_table:
.word sys_set_mempolicy, sys_kexec_load, sys_move_pages, sys_getcpu, sys_epoll_pwait
/*310*/ .word sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate
.word sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1
-/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1
+/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4

Subject: Re: [PATCH] reintroduce accept4

>> Andrew, you noted a lack of explanation accompanying the original
>> patch. Here's something to fill the gap, and which may be suitable
>> for the changelog.
>>
>> ==
>> Introduce a new accept4() system call. The addition of this system
>> call matches analogous changes in 2.6.27 (dup3(), evenfd2(),
>> signalfd4(), inotify_init1(), epoll_create1(), pipe2()) which added
>> new system calls that differed from analogous traditional system calls
>> in adding a flags argument that can be used to access additional
>> functionality. The accept4() system call is exactly the same as
>> accept(), except that it adds a flags bit-mask argument. Two flags
>> are initially implemented. (Most of the new system calls in 2.6.27
>> also had both of these flags.) SOCK_CLOEXEC causes the close-on-exec
>> (FD_CLOEXEC) flag to be enabled for the new file descriptor returned
>> by accept4(). This is a useful security feature to avoid leaking
>> information in a multithreaded program where one thread is doing an
>> accept() at the same time as another thread is doing a fork() plus
>> exec(). (More details here:
>> http://udrepper.livejournal.com/20407.html "Secure File Descriptor
>> Handling", Ulrich Drepper) The other flag is SOCK_NONBLOCK, which
>> causes the O_NONBLOCK flag to be enabled on the new open file
>> description created by accept4(). (This flag is merely a convenience,
>> saving the use of additional calls fcntl(F_GETFL) and fcntl (F_SETFL)
>> to achieve the same result.)
>
> I replaced the existing changelog with the above (plus some paragraph
> breaks ;)). Will add the new test app when it comes along.

Git allows paragraph breaks in changelogs?! You gotta love technology ;-).

--
Michael Kerrisk Linux man-pages maintainer;
http://www.kernel.org/doc/man-pages/ Found a documentation bug?
http://www.kernel.org/doc/man-pages/reporting_bugs.html

Subject: Re: [PATCH] reintroduce accept4

Andrew,

> From: Ulrich Drepper <[email protected]>
>
> Introduce a new accept4() system call. The addition of this system call
> matches analogous changes in 2.6.27 (dup3(), evenfd2(), signalfd4(),
> inotify_init1(), epoll_create1(), pipe2()) which added new system calls
> that differed from analogous traditional system calls in adding a flags
> argument that can be used to access additional functionality.
>
> The accept4() system call is exactly the same as accept(), except that it
> adds a flags bit-mask argument. Two flags are initially implemented.
> (Most of the new system calls in 2.6.27 also had both of these flags.)

Add a para break here.

> SOCK_CLOEXEC causes the close-on-exec (FD_CLOEXEC) flag to be enabled for
> the new file descriptor returned by accept4(). This is a useful security
> feature to avoid leaking information in a multithreaded program where one
> thread is doing an accept() at the same time as another thread is doing a
> fork() plus exec().
>

This para break is in the wrong place.

> More details here: http://udrepper.livejournal.com/20407.html "Secure File
> Descriptor Handling", Ulrich Drepper) The other flag is SOCK_NONBLOCK,

The para break should actually be at the sentence break in the previous line.

Cheers,

Michael

2008-11-14 17:41:19

by Michael Kerrisk

[permalink] [raw]
Subject: Re: [PATCH] reintroduce accept4

> Here's the latest version, for review-n-test enjoyment:

Andrew,

I made a few small tweaks to the changelog: wording fixes, and para
break fixes; other than that, changed the text to say that I rewrote
(rather than updated) the test program. Please substitute it with the
following.

Cheers,

Michael

===
From: Ulrich Drepper <[email protected]>

Introduce a new accept4() system call. The addition of this system call
matches analogous changes in 2.6.27 (dup3(), evenfd2(), signalfd4(),
inotify_init1(), epoll_create1(), pipe2()) which added new system calls
that differed from analogous traditional system calls in adding a flags
argument that can be used to access additional functionality.

The accept4() system call is exactly the same as accept(), except that
it adds a flags bit-mask argument. Two flags are initially implemented.
(Most of the new system calls in 2.6.27 also had both of these flags.)

SOCK_CLOEXEC causes the close-on-exec (FD_CLOEXEC) flag to be enabled
for the new file descriptor returned by accept4(). This is a useful
security feature to avoid leaking information in a multithreaded
program where one thread is doing an accept() at the same time as
another thread is doing a fork() plus exec(). More details here:
http://udrepper.livejournal.com/20407.html "Secure File Descriptor Handling",
Ulrich Drepper).

The other flag is SOCK_NONBLOCK, which causes the O_NONBLOCK flag
to be enabled on the new open file description created by accept4().
(This flag is merely a convenience, saving the use of additional calls
fcntl(F_GETFL) and fcntl (F_SETFL) to achieve the same result.

Here's a test program. Works on x86-32. Should work on x86-64, but
I (mtk) don't have a system to hand to test with.

It tests accept4() with each of the four possible combinations of
SOCK_CLOEXEC and SOCK_NONBLOCK set/clear in 'flags', and verifies
that the appropriate flags are set on the file descriptor/open file
description returned by accept4().

I tested Ulrich's patch in this thread by applying against 2.6.28-rc2,
and it passes according to my test program.

/* test_accept4.c

Copyright (C) 2008, Linux Foundation, written by Michael Kerrisk
<[email protected]>

Licensed under the GNU GPLv2 or later.
*/
#define _GNU_SOURCE
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <stdlib.h>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>

#define PORT_NUM 33333

#define die(msg) do { perror(msg); exit(EXIT_FAILURE); } while (0)

/**********************************************************************/

/* The following is what we need until glibc gets a wrapper for
accept4() */

/* Flags for socket(), socketpair(), accept4() */
#ifndef SOCK_CLOEXEC
#define SOCK_CLOEXEC O_CLOEXEC
#endif
#ifndef SOCK_NONBLOCK
#define SOCK_NONBLOCK O_NONBLOCK
#endif

#ifdef __x86_64__
#define SYS_accept4 288
#elif __i386__
#define USE_SOCKETCALL 1
#define SYS_ACCEPT4 18
#else
#error "Sorry -- don't know the syscall # on this architecture"
#endif

static int
accept4(int fd, struct sockaddr *sockaddr, socklen_t *addrlen, int flags)
{
printf("Calling accept4(): flags = %x", flags);
if (flags != 0) {
printf(" (");
if (flags & SOCK_CLOEXEC)
printf("SOCK_CLOEXEC");
if ((flags & SOCK_CLOEXEC) && (flags & SOCK_NONBLOCK))
printf(" ");
if (flags & SOCK_NONBLOCK)
printf("SOCK_NONBLOCK");
printf(")");
}
printf("\n");

#if USE_SOCKETCALL
long args[6];

args[0] = fd;
args[1] = (long) sockaddr;
args[2] = (long) addrlen;
args[3] = flags;

return syscall(SYS_socketcall, SYS_ACCEPT4, args);
#else
return syscall(SYS_accept4, fd, sockaddr, addrlen, flags);
#endif
}

/**********************************************************************/


static int
do_test(int lfd, struct sockaddr_in *conn_addr,
int closeonexec_flag, int nonblock_flag)
{
int connfd, acceptfd;
int fdf, flf, fdf_pass, flf_pass;
struct sockaddr_in claddr;
socklen_t addrlen;

printf("=======================================\n");

connfd = socket(AF_INET, SOCK_STREAM, 0);
if (connfd == -1)
die("socket");
if (connect(connfd, (struct sockaddr *) conn_addr,
sizeof(struct sockaddr_in)) == -1)
die("connect");

addrlen = sizeof(struct sockaddr_in);
acceptfd = accept4(lfd, (struct sockaddr *) &claddr, &addrlen,
closeonexec_flag | nonblock_flag);
if (acceptfd == -1) {
perror("accept4()");
close(connfd);
return 0;
}

fdf = fcntl(acceptfd, F_GETFD);
if (fdf == -1)
die("fcntl:F_GETFD");
fdf_pass = ((fdf & FD_CLOEXEC) != 0) ==
((closeonexec_flag & SOCK_CLOEXEC) != 0);
printf("Close-on-exec flag is %sset (%s); ",
(fdf & FD_CLOEXEC) ? "" : "not ",
fdf_pass ? "OK" : "failed");

flf = fcntl(acceptfd, F_GETFL);
if (flf == -1)
die("fcntl:F_GETFD");
flf_pass = ((flf & O_NONBLOCK) != 0) ==
((nonblock_flag & SOCK_NONBLOCK) !=0);
printf("nonblock flag is %sset (%s)\n",
(flf & O_NONBLOCK) ? "" : "not ",
flf_pass ? "OK" : "failed");

close(acceptfd);
close(connfd);

printf("Test result: %s\n", (fdf_pass && flf_pass) ? "PASS" : "FAIL");
return fdf_pass && flf_pass;
}


static int
create_listening_socket(int port_num)
{
struct sockaddr_in svaddr;
int lfd;
int optval;

memset(&svaddr, 0, sizeof(struct sockaddr_in));
svaddr.sin_family = AF_INET;
svaddr.sin_addr.s_addr = htonl(INADDR_ANY);
svaddr.sin_port = htons(port_num);

lfd = socket(AF_INET, SOCK_STREAM, 0);
if (lfd == -1)
die("socket");

optval = 1;
if (setsockopt(lfd, SOL_SOCKET, SO_REUSEADDR, &optval,
sizeof(optval)) == -1)
die("setsockopt");

if (bind(lfd, (struct sockaddr *) &svaddr,
sizeof(struct sockaddr_in)) == -1)
die("bind");

if (listen(lfd, 5) == -1)
die("listen");

return lfd;
}


int
main(int argc, char *argv[])
{
struct sockaddr_in conn_addr;
int lfd;
int port_num;
int passed;

passed = 1;

port_num = (argc > 1) ? atoi(argv[1]) : PORT_NUM;

memset(&conn_addr, 0, sizeof(struct sockaddr_in));
conn_addr.sin_family = AF_INET;
conn_addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
conn_addr.sin_port = htons(port_num);

lfd = create_listening_socket(port_num);

if (!do_test(lfd, &conn_addr, 0, 0))
passed = 0;
if (!do_test(lfd, &conn_addr, SOCK_CLOEXEC, 0))
passed = 0;
if (!do_test(lfd, &conn_addr, 0, SOCK_NONBLOCK))
passed = 0;
if (!do_test(lfd, &conn_addr, SOCK_CLOEXEC, SOCK_NONBLOCK))
passed = 0;

close(lfd);

exit(passed ? EXIT_SUCCESS : EXIT_FAILURE);
}

[[email protected]: rewrote changelog and test program]
Signed-off-by: Ulrich Drepper <[email protected]>
Tested-by: Michael Kerrisk <[email protected]>
Acked-by: Michael Kerrisk <[email protected]>
Cc: <[email protected]>
Cc: <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>