2010-02-24 07:08:04

by André Goddard Rosa

[permalink] [raw]
Subject: [PATCH 0/6] Fix file descriptor leak on user-space processes and cleanup

Fix a file descriptor leak on user-space processes and perform a cleanup,
reducing the code size:
text data bss dec hex filename
9949 72 16 10037 2735 ipc/mqueue-BEFORE.o
9885 72 16 9973 26f5 ipc/mqueue-AFTER.o

André Goddard Rosa (6):
mqueue: remove unneeded info->messages initialization
mqueue: apply mathematics distributivity on mq_bytes calculation
mqueue: simplify do_open() error handling
mqueue: only set error codes if they are really necessary
mqueue: fix typo "failues" -> "failures"
mqueue: fix mq_open() file descriptor leak on user-space processes


2010-02-24 07:08:11

by André Goddard Rosa

[permalink] [raw]
Subject: [PATCH 3/6] mqueue: simplify do_open() error handling

It reduces code size:
text data bss dec hex filename
9925 72 16 10013 271d ipc/mqueue-BEFORE.o
9885 72 16 9973 26f5 ipc/mqueue-AFTER.o

Signed-off-by: André Goddard Rosa <[email protected]>
---
ipc/mqueue.c | 16 ++++++++++------
1 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 04403fd..2cddf93 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -657,24 +657,28 @@ out:
static struct file *do_open(struct ipc_namespace *ipc_ns,
struct dentry *dentry, int oflag)
{
+ int ret;
const struct cred *cred = current_cred();

static const int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE,
MAY_READ | MAY_WRITE };

if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY)) {
- dput(dentry);
- mntput(ipc_ns->mq_mnt);
- return ERR_PTR(-EINVAL);
+ ret = -EINVAL;
+ goto err;
}

if (inode_permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE])) {
- dput(dentry);
- mntput(ipc_ns->mq_mnt);
- return ERR_PTR(-EACCES);
+ ret = -EACCES;
+ goto err;
}

return dentry_open(dentry, ipc_ns->mq_mnt, oflag, cred);
+
+err:
+ dput(dentry);
+ mntput(ipc_ns->mq_mnt);
+ return ERR_PTR(ret);
}

SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode,
--
1.7.0.87.g0901d

2010-02-24 07:08:15

by André Goddard Rosa

[permalink] [raw]
Subject: [PATCH 4/6] mqueue: only set error codes if they are really necessary

... postponing assignments until they're needed. Doesn't change code size.

Signed-off-by: André Goddard Rosa <[email protected]>
---
ipc/mqueue.c | 77 +++++++++++++++++++++++++++++++++++++--------------------
1 files changed, 50 insertions(+), 27 deletions(-)

diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 2cddf93..906e873 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -184,7 +184,7 @@ static int mqueue_fill_super(struct super_block *sb, void *data, int silent)
{
struct inode *inode;
struct ipc_namespace *ns = data;
- int error = 0;
+ int error;

sb->s_blocksize = PAGE_CACHE_SIZE;
sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
@@ -202,7 +202,9 @@ static int mqueue_fill_super(struct super_block *sb, void *data, int silent)
if (!sb->s_root) {
iput(inode);
error = -ENOMEM;
+ goto out;
}
+ error = 0;

out:
return error;
@@ -621,9 +623,10 @@ static struct file *do_create(struct ipc_namespace *ipc_ns, struct dentry *dir,
int ret;

if (attr) {
- ret = -EINVAL;
- if (!mq_attr_ok(ipc_ns, attr))
+ if (!mq_attr_ok(ipc_ns, attr)) {
+ ret = -EINVAL;
goto out;
+ }
/* store for use during create */
dentry->d_fsdata = attr;
}
@@ -714,9 +717,10 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode,
if (oflag & O_CREAT) {
if (dentry->d_inode) { /* entry already exists */
audit_inode(name, dentry);
- error = -EEXIST;
- if (oflag & O_EXCL)
+ if (oflag & O_EXCL) {
+ error = -EEXIST;
goto out;
+ }
filp = do_open(ipc_ns, dentry, oflag);
} else {
filp = do_create(ipc_ns, ipc_ns->mq_mnt->mnt_root,
@@ -724,9 +728,10 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode,
u_attr ? &attr : NULL);
}
} else {
- error = -ENOENT;
- if (!dentry->d_inode)
+ if (!dentry->d_inode) {
+ error = -ENOENT;
goto out;
+ }
audit_inode(name, dentry);
filp = do_open(ipc_ns, dentry, oflag);
}
@@ -874,19 +879,24 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
audit_mq_sendrecv(mqdes, msg_len, msg_prio, p);
timeout = prepare_timeout(p);

- ret = -EBADF;
filp = fget(mqdes);
- if (unlikely(!filp))
+ if (unlikely(!filp)) {
+ ret = -EBADF;
goto out;
+ }

inode = filp->f_path.dentry->d_inode;
- if (unlikely(filp->f_op != &mqueue_file_operations))
+ if (unlikely(filp->f_op != &mqueue_file_operations)) {
+ ret = -EBADF;
goto out_fput;
+ }
info = MQUEUE_I(inode);
audit_inode(NULL, filp->f_path.dentry);

- if (unlikely(!(filp->f_mode & FMODE_WRITE)))
+ if (unlikely(!(filp->f_mode & FMODE_WRITE))) {
+ ret = -EBADF;
goto out_fput;
+ }

if (unlikely(msg_len > info->attr.mq_msgsize)) {
ret = -EMSGSIZE;
@@ -963,19 +973,24 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr,
audit_mq_sendrecv(mqdes, msg_len, 0, p);
timeout = prepare_timeout(p);

- ret = -EBADF;
filp = fget(mqdes);
- if (unlikely(!filp))
+ if (unlikely(!filp)) {
+ ret = -EBADF;
goto out;
+ }

inode = filp->f_path.dentry->d_inode;
- if (unlikely(filp->f_op != &mqueue_file_operations))
+ if (unlikely(filp->f_op != &mqueue_file_operations)) {
+ ret = -EBADF;
goto out_fput;
+ }
info = MQUEUE_I(inode);
audit_inode(NULL, filp->f_path.dentry);

- if (unlikely(!(filp->f_mode & FMODE_READ)))
+ if (unlikely(!(filp->f_mode & FMODE_READ))) {
+ ret = -EBADF;
goto out_fput;
+ }

/* checks if buffer is big enough */
if (unlikely(msg_len < info->attr.mq_msgsize)) {
@@ -1065,13 +1080,14 @@ SYSCALL_DEFINE2(mq_notify, mqd_t, mqdes,

/* create the notify skb */
nc = alloc_skb(NOTIFY_COOKIE_LEN, GFP_KERNEL);
- ret = -ENOMEM;
- if (!nc)
+ if (!nc) {
+ ret = -ENOMEM;
goto out;
- ret = -EFAULT;
+ }
if (copy_from_user(nc->data,
notification.sigev_value.sival_ptr,
NOTIFY_COOKIE_LEN)) {
+ ret = -EFAULT;
goto out;
}

@@ -1080,9 +1096,10 @@ SYSCALL_DEFINE2(mq_notify, mqd_t, mqdes,
/* and attach it to the socket */
retry:
filp = fget(notification.sigev_signo);
- ret = -EBADF;
- if (!filp)
+ if (!filp) {
+ ret = -EBADF;
goto out;
+ }
sock = netlink_getsockbyfilp(filp);
fput(filp);
if (IS_ERR(sock)) {
@@ -1094,7 +1111,7 @@ retry:
timeo = MAX_SCHEDULE_TIMEOUT;
ret = netlink_attachskb(sock, nc, &timeo, NULL);
if (ret == 1)
- goto retry;
+ goto retry;
if (ret) {
sock = NULL;
nc = NULL;
@@ -1103,14 +1120,17 @@ retry:
}
}

- ret = -EBADF;
filp = fget(mqdes);
- if (!filp)
+ if (!filp) {
+ ret = -EBADF;
goto out;
+ }

inode = filp->f_path.dentry->d_inode;
- if (unlikely(filp->f_op != &mqueue_file_operations))
+ if (unlikely(filp->f_op != &mqueue_file_operations)) {
+ ret = -EBADF;
goto out_fput;
+ }
info = MQUEUE_I(inode);

ret = 0;
@@ -1173,14 +1193,17 @@ SYSCALL_DEFINE3(mq_getsetattr, mqd_t, mqdes,
return -EINVAL;
}

- ret = -EBADF;
filp = fget(mqdes);
- if (!filp)
+ if (!filp) {
+ ret = -EBADF;
goto out;
+ }

inode = filp->f_path.dentry->d_inode;
- if (unlikely(filp->f_op != &mqueue_file_operations))
+ if (unlikely(filp->f_op != &mqueue_file_operations)) {
+ ret = -EBADF;
goto out_fput;
+ }
info = MQUEUE_I(inode);

spin_lock(&info->lock);
--
1.7.0.87.g0901d

2010-02-24 07:13:16

by André Goddard Rosa

[permalink] [raw]
Subject: [PATCH 1/6] mqueue: remove unneeded info->messages initialization

... and abort earlier if we couldn't allocate the message pointers array,
avoiding the u->mq_bytes accounting logic.

It reduces code size:
text data bss dec hex filename
9949 72 16 10037 2735 ipc/mqueue-BEFORE.o
9941 72 16 10029 272d ipc/mqueue-AFTER.o

Signed-off-by: André Goddard Rosa <[email protected]>
---
ipc/mqueue.c | 13 +++++--------
1 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index c79bd57..2d76647 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -134,7 +134,6 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
init_waitqueue_head(&info->wait_q);
INIT_LIST_HEAD(&info->e_wait_q[0].list);
INIT_LIST_HEAD(&info->e_wait_q[1].list);
- info->messages = NULL;
info->notify_owner = NULL;
info->qsize = 0;
info->user = NULL; /* set when all is ok */
@@ -146,6 +145,10 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
info->attr.mq_msgsize = attr->mq_msgsize;
}
mq_msg_tblsz = info->attr.mq_maxmsg * sizeof(struct msg_msg *);
+ info->messages = kmalloc(mq_msg_tblsz, GFP_KERNEL);
+ if (!info->messages)
+ goto out_inode;
+
mq_bytes = (mq_msg_tblsz +
(info->attr.mq_maxmsg * info->attr.mq_msgsize));

@@ -154,18 +157,12 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
u->mq_bytes + mq_bytes >
p->signal->rlim[RLIMIT_MSGQUEUE].rlim_cur) {
spin_unlock(&mq_lock);
+ kfree(info->messages);
goto out_inode;
}
u->mq_bytes += mq_bytes;
spin_unlock(&mq_lock);

- info->messages = kmalloc(mq_msg_tblsz, GFP_KERNEL);
- if (!info->messages) {
- spin_lock(&mq_lock);
- u->mq_bytes -= mq_bytes;
- spin_unlock(&mq_lock);
- goto out_inode;
- }
/* all is ok */
info->user = get_uid(u);
} else if (S_ISDIR(mode)) {
--
1.7.0.87.g0901d

2010-02-24 07:13:45

by André Goddard Rosa

[permalink] [raw]
Subject: [PATCH 6/6] mqueue: fix mq_open() file descriptor leak on user-space processes

It can be triggered by the following test program:

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <mqueue.h>
#include <fcntl.h>
#include <errno.h>
#include <sys/stat.h>
#include <sys/resource.h>

int main(int argc, char *argv[])
{
struct rlimit limit;
char queue_name[] = "/mq_open/bug";
char tmp_name[] = "/tmp/tmp";
int fd, i = 1;

if (getrlimit(RLIMIT_NOFILE, &limit) != 0) {
printf("%s\n", "Failed to get RLIMIT_NOFILE");
return EXIT_FAILURE;
}
printf("Max number of open files is: %d\n", limit.rlim_cur);

while (i <= limit.rlim_cur) {
mqd_t queue;

errno = 0;
queue = mq_open(queue_name, O_CREAT |O_RDWR, S_IRUSR | S_IWUSR
, NULL);
if (queue != (mqd_t)-1) {
/* Success opening mqueue, no leak will happen */
printf("Successfully opened an mqueue[%d]\n", queue);
printf("mq_close(%d) = %d\n", queue, mq_close(queue));
return EXIT_SUCCESS;
}
/* Failed to open mqueue, maybe a leak is happening... */
if (errno == EMFILE)
{
printf("\nRun out of file descriptors");
break;
}
printf("\rLeaking [%d] files?!?!", i++);
fflush(stdout);
usleep(500);
}
/* Double check that no file descriptor is available anymore indeed */
putchar('\n');
errno = 0;
fd = open(tmp_name, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR);
if (fd == -1) {
printf("open() failed: %s\n", strerror(errno));
if (errno == EMFILE) {
printf("%s\n", "Cannot open new files, fds exhausted");
return EXIT_FAILURE;
}
} else
printf("close(%d) = %d\n", fd, close(fd));
printf("%s\n", "Expected output: kernel is not leaking any fds!");

return EXIT_SUCCESS;
}

## Preparing for testing

$ touch /tmp/tmp
$ gcc -g main_mq_open_fd_leak.c -lrt

## Linux kernel with the fix applied:

$ ./a.out
Max number of open files is: 1024
Leaking [1024] files?!?!
close(3) = 0
Expected output: kernel is not leaking any fds!

## Linux kernel without the fix:

## Shell execution:

$ ./a.out
Max number of open files is: 1024
Leaking [1019] files?!?!
Run out of file descriptors
Segmentation fault

## Valgrind execution:

$ valgrind --track-fds=yes ./a.out
==2895== Memcheck, a memory error detector
==2895== Copyright (C) 2002-2009, and GNU GPL'd, by Julian Seward et al.
==2895== Using Valgrind-3.6.0.SVN and LibVEX; rerun with -h for copyright info
==2895== Command: ./a.out
==2895==
Max number of open files is: 1024
Leaking [1024] files?!?!
open() failed: Too many open files
Cannot open new files, fds exhausted
==2895==
==2895== FILE DESCRIPTORS: 5 open at exit.
==2895== Open file descriptor 13:
==2895== <inherited from parent>
==2895==
==2895== Open file descriptor 12:
==2895== <inherited from parent>
==2895==
==2895== Open file descriptor 2: /dev/pts/1
==2895== <inherited from parent>
==2895==
==2895== Open file descriptor 1: /dev/pts/1
==2895== <inherited from parent>
==2895==
==2895== Open file descriptor 0: /dev/pts/1
==2895== <inherited from parent>
==2895==
==2895==
==2895== HEAP SUMMARY:
==2895== in use at exit: 0 bytes in 0 blocks
==2895== total heap usage: 0 allocs, 0 frees, 0 bytes allocated
==2895==
==2895== All heap blocks were freed -- no leaks are possible
==2895==
==2895== For counts of detected and suppressed errors, rerun with: -v
==2895== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 2 from 2)

When not running valgrind, user-space program segfaults trying to execute
strerror(errno). With valgrind, it executes successfully and prints the
5 open files: stdin, stdout, stderr, pipe[0] and pipe[1].

Signed-off-by: André Goddard Rosa <[email protected]>
---
ipc/mqueue.c | 3 +--
1 files changed, 1 insertions(+), 2 deletions(-)

diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index e47c9eb..b6cb064 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -710,7 +710,7 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode,
dentry = lookup_one_len(name, ipc_ns->mq_mnt->mnt_root, strlen(name));
if (IS_ERR(dentry)) {
error = PTR_ERR(dentry);
- goto out_err;
+ goto out_putfd;
}
mntget(ipc_ns->mq_mnt);

@@ -749,7 +749,6 @@ out:
mntput(ipc_ns->mq_mnt);
out_putfd:
put_unused_fd(fd);
-out_err:
fd = error;
out_upsem:
mutex_unlock(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex);
--
1.7.0.87.g0901d

2010-02-24 07:14:30

by André Goddard Rosa

[permalink] [raw]
Subject: [PATCH 5/6] mqueue: fix typo "failues" -> "failures"

Signed-off-by: André Goddard Rosa <[email protected]>
---
ipc/mqueue.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 906e873..e47c9eb 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -1297,7 +1297,7 @@ static int __init init_mqueue_fs(void)
if (mqueue_inode_cachep == NULL)
return -ENOMEM;

- /* ignore failues - they are not fatal */
+ /* ignore failures - they are not fatal */
mq_sysctl_table = mq_register_sysctl_table();

error = register_filesystem(&mqueue_fs_type);
--
1.7.0.87.g0901d

2010-02-24 07:14:54

by André Goddard Rosa

[permalink] [raw]
Subject: [PATCH 2/6] mqueue: apply mathematics distributivity on mq_bytes calculation

Code size reduction:
text data bss dec hex filename
9941 72 16 10029 272d ipc/mqueue-BEFORE.o
9925 72 16 10013 271d ipc/mqueue-AFTER.o

Signed-off-by: André Goddard Rosa <[email protected]>
---
ipc/mqueue.c | 9 +++++----
1 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 2d76647..04403fd 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -261,8 +261,9 @@ static void mqueue_delete_inode(struct inode *inode)

clear_inode(inode);

- mq_bytes = (info->attr.mq_maxmsg * sizeof(struct msg_msg *) +
- (info->attr.mq_maxmsg * info->attr.mq_msgsize));
+ /* Total amount of bytes accounted for the mqueue */
+ mq_bytes = info->attr.mq_maxmsg * (sizeof(struct msg_msg *)
+ + info->attr.mq_msgsize);
user = info->user;
if (user) {
spin_lock(&mq_lock);
@@ -601,8 +602,8 @@ static int mq_attr_ok(struct ipc_namespace *ipc_ns, struct mq_attr *attr)
/* check for overflow */
if (attr->mq_msgsize > ULONG_MAX/attr->mq_maxmsg)
return 0;
- if ((unsigned long)(attr->mq_maxmsg * attr->mq_msgsize) +
- (attr->mq_maxmsg * sizeof (struct msg_msg *)) <
+ if ((unsigned long)(attr->mq_maxmsg * (attr->mq_msgsize
+ + sizeof (struct msg_msg *))) <
(unsigned long)(attr->mq_maxmsg * attr->mq_msgsize))
return 0;
return 1;
--
1.7.0.87.g0901d

2010-02-24 22:03:10

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH 0/6] Fix file descriptor leak on user-space processes and cleanup

On Tue, 23 Feb 2010 04:04:22 -0300 Andr__ Goddard Rosa <[email protected]> wrote:

> Fix a file descriptor leak on user-space processes and perform a cleanup,
> reducing the code size:
> text data bss dec hex filename
> 9949 72 16 10037 2735 ipc/mqueue-BEFORE.o
> 9885 72 16 9973 26f5 ipc/mqueue-AFTER.o
>
> Andr__ Goddard Rosa (6):
> mqueue: remove unneeded info->messages initialization
> mqueue: apply mathematics distributivity on mq_bytes calculation
> mqueue: simplify do_open() error handling
> mqueue: only set error codes if they are really necessary
> mqueue: fix typo "failues" -> "failures"
> mqueue: fix mq_open() file descriptor leak on user-space processes

Fixing the leak is far more important than the other five patches, and
we'll want to backport the leak fix into earlier kernels. So the
bugfix patch should have been the first in the series!

So I've reordered the patches in that fashion and shall tag "mqueue:
fix mq_open() file descriptor leak on user-space processes" as needing
-stable backporting.

The patches apply and build OK with that reordering, but please do
double-check it, thanks.

2010-02-25 03:35:06

by Cong Wang

[permalink] [raw]
Subject: Re: [PATCH 6/6] mqueue: fix mq_open() file descriptor leak on user-space processes

On Tue, Feb 23, 2010 at 3:04 PM, André Goddard Rosa
<[email protected]> wrote:
> It can be triggered by the following test program:
>

<snip>

>
> When not running valgrind, user-space program segfaults trying to execute
> strerror(errno). With valgrind, it executes successfully and prints the
> 5 open files: stdin, stdout, stderr, pipe[0] and pipe[1].
>
> Signed-off-by: André Goddard Rosa <[email protected]>
> ---

The code has more than just this problem, could you please try
my patch below?

Thanks.

---------------------------->

Clean up the failure path of sys_mq_open().

Reorder the goto labels;
Rename 'upsem' to 'upunlock';
Remove some unused labels;
Fix some wrong goto path.

Signed-off-by: WANG Cong <[email protected]>

---


Attachments:
ipc-mqueue_c-cleanup-failure-path.diff (1.32 kB)

2010-02-25 04:09:10

by Xiaotian Feng

[permalink] [raw]
Subject: Re: [PATCH 6/6] mqueue: fix mq_open() file descriptor leak on user-space processes

2010/2/25 Américo Wang <[email protected]>:
> On Tue, Feb 23, 2010 at 3:04 PM, André Goddard Rosa
> <[email protected]> wrote:
>> It can be triggered by the following test program:
>>
>
> <snip>
>
>>
>> When not running valgrind, user-space program segfaults trying to execute
>> strerror(errno). With valgrind, it executes successfully and prints the
>> 5 open files: stdin, stdout, stderr, pipe[0] and pipe[1].
>>
>> Signed-off-by: André Goddard Rosa <[email protected]>
>> ---
>
> The code has more than just this problem, could you please try
> my patch below?
>
> Thanks.
>
> ---------------------------->
>
> Clean up the failure path of sys_mq_open().
>
> Reorder the goto labels;
> Rename 'upsem' to 'upunlock';
> Remove some unused labels;
> Fix some wrong goto path.
>

I think it's wrong to move dput after mntput

> Signed-off-by: WANG Cong <[email protected]>
>
> ---
>

2010-02-25 04:26:04

by Cong Wang

[permalink] [raw]
Subject: Re: [PATCH 6/6] mqueue: fix mq_open() file descriptor leak on user-space processes

On Thu, Feb 25, 2010 at 12:00 PM, Xiaotian Feng <[email protected]> wrote:
> 2010/2/25 Américo Wang <[email protected]>:
>> On Tue, Feb 23, 2010 at 3:04 PM, André Goddard Rosa
>> <[email protected]> wrote:
>>> It can be triggered by the following test program:
>>>
>>
>> <snip>
>>
>>>
>>> When not running valgrind, user-space program segfaults trying to execute
>>> strerror(errno). With valgrind, it executes successfully and prints the
>>> 5 open files: stdin, stdout, stderr, pipe[0] and pipe[1].
>>>
>>> Signed-off-by: André Goddard Rosa <[email protected]>
>>> ---
>>
>> The code has more than just this problem, could you please try
>> my patch below?
>>
>> Thanks.
>>
>> ---------------------------->
>>
>> Clean up the failure path of sys_mq_open().
>>
>> Reorder the goto labels;
>> Rename 'upsem' to 'upunlock';
>> Remove some unused labels;
>> Fix some wrong goto path.
>>
>
> I think it's wrong to move dput after mntput

Oh, this is to say mntget() should be called before lookup_one_len(),
the original code seems wrong again...

2010-02-25 06:59:27

by Cong Wang

[permalink] [raw]
Subject: Re: [PATCH 6/6] mqueue: fix mq_open() file descriptor leak on user-space processes

On Thu, Feb 25, 2010 at 12:25 PM, Américo Wang <[email protected]> wrote:
> On Thu, Feb 25, 2010 at 12:00 PM, Xiaotian Feng <[email protected]> wrote:
>> 2010/2/25 Américo Wang <[email protected]>:
>>> On Tue, Feb 23, 2010 at 3:04 PM, André Goddard Rosa
>>> <[email protected]> wrote:
>>>> It can be triggered by the following test program:
>>>>
>>>
>>> <snip>
>>>
>>>>
>>>> When not running valgrind, user-space program segfaults trying to execute
>>>> strerror(errno). With valgrind, it executes successfully and prints the
>>>> 5 open files: stdin, stdout, stderr, pipe[0] and pipe[1].
>>>>
>>>> Signed-off-by: André Goddard Rosa <[email protected]>
>>>> ---
>>>
>>> The code has more than just this problem, could you please try
>>> my patch below?
>>>
>>> Thanks.
>>>
>>> ---------------------------->
>>>
>>> Clean up the failure path of sys_mq_open().
>>>
>>> Reorder the goto labels;
>>> Rename 'upsem' to 'upunlock';
>>> Remove some unused labels;
>>> Fix some wrong goto path.
>>>
>>
>> I think it's wrong to move dput after mntput
>
> Oh, this is to say mntget() should be called before lookup_one_len(),
> the original code seems wrong again...
>

How about the one below?
---------

Signed-off-by: WANG Cong <[email protected]>


Attachments:
ipc-mqueue_c-cleanup-failure-path.diff (1.53 kB)

2010-02-25 10:49:12

by Xiaotian Feng

[permalink] [raw]
Subject: Re: [PATCH 6/6] mqueue: fix mq_open() file descriptor leak on user-space processes

On Thu, Feb 25, 2010 at 2:59 PM, Américo Wang <[email protected]> wrote:
> On Thu, Feb 25, 2010 at 12:25 PM, Américo Wang <[email protected]> wrote:
>> On Thu, Feb 25, 2010 at 12:00 PM, Xiaotian Feng <[email protected]> wrote:
>>> 2010/2/25 Américo Wang <[email protected]>:
>>>> On Tue, Feb 23, 2010 at 3:04 PM, André Goddard Rosa
>>>> <[email protected]> wrote:
>>>>> It can be triggered by the following test program:
>>>>>
>>>>
>>>> <snip>
>>>>
>>>>>
>>>>> When not running valgrind, user-space program segfaults trying to execute
>>>>> strerror(errno). With valgrind, it executes successfully and prints the
>>>>> 5 open files: stdin, stdout, stderr, pipe[0] and pipe[1].
>>>>>
>>>>> Signed-off-by: André Goddard Rosa <[email protected]>
>>>>> ---
>>>>
>>>> The code has more than just this problem, could you please try
>>>> my patch below?
>>>>
>>>> Thanks.
>>>>
>>>> ---------------------------->
>>>>
>>>> Clean up the failure path of sys_mq_open().
>>>>
>>>> Reorder the goto labels;
>>>> Rename 'upsem' to 'upunlock';
>>>> Remove some unused labels;
>>>> Fix some wrong goto path.
>>>>
>>>
>>> I think it's wrong to move dput after mntput
>>
>> Oh, this is to say mntget() should be called before lookup_one_len(),
>> the original code seems wrong again...
>>
>
> How about the one below?

This is definitely wrong,

if (IS_ERR(filp)) {
error = PTR_ERR(filp);
- goto out_putfd;
+ goto out;
}

filp is assigned by do_open or do_create in mqueue.c, take a look at
the code, if do_open/do_create is failed, kernel is already dput &
mntput...

So I think original patch from André is enough....

> ---------
>
> Signed-off-by: WANG Cong <[email protected]>
>

2010-02-25 10:56:57

by Xiaotian Feng

[permalink] [raw]
Subject: Re: [PATCH 6/6] mqueue: fix mq_open() file descriptor leak on user-space processes

On Tue, Feb 23, 2010 at 3:04 PM, André Goddard Rosa
<[email protected]> wrote:
> It can be triggered by the following test program:
>
> #include <stdio.h>
> #include <stdlib.h>
> #include <unistd.h>
> #include <mqueue.h>
> #include <fcntl.h>
> #include <errno.h>
> #include <sys/stat.h>
> #include <sys/resource.h>
>
> int main(int argc, char *argv[])
> {
>        struct rlimit limit;
>        char queue_name[] = "/mq_open/bug";
>        char tmp_name[]   = "/tmp/tmp";
>        int fd, i = 1;
>
>        if (getrlimit(RLIMIT_NOFILE, &limit) != 0) {
>                printf("%s\n", "Failed to get RLIMIT_NOFILE");
>                return EXIT_FAILURE;
>        }
>        printf("Max number of open files is: %d\n", limit.rlim_cur);
>
>        while (i <= limit.rlim_cur) {
>                mqd_t queue;
>
>                errno = 0;
>                queue = mq_open(queue_name, O_CREAT |O_RDWR, S_IRUSR | S_IWUSR
>                    , NULL);
>                if (queue != (mqd_t)-1) {
>                        /* Success opening mqueue, no leak will happen */
>                        printf("Successfully opened an mqueue[%d]\n", queue);
>                        printf("mq_close(%d) = %d\n", queue, mq_close(queue));
>                        return EXIT_SUCCESS;
>                }
>                /* Failed to open mqueue, maybe a leak is happening... */
>                if (errno == EMFILE)
>                {
>                        printf("\nRun out of file descriptors");
>                        break;
>                }
>                printf("\rLeaking [%d] files?!?!", i++);
>                fflush(stdout);
>                usleep(500);
>        }
>        /* Double check that no file descriptor is available anymore indeed */
>        putchar('\n');
>        errno = 0;
>        fd = open(tmp_name, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR);
>        if (fd == -1) {
>                printf("open() failed: %s\n", strerror(errno));
>                if (errno == EMFILE) {
>                        printf("%s\n", "Cannot open new files, fds exhausted");
>                        return EXIT_FAILURE;
>                }
>        } else
>                printf("close(%d) = %d\n", fd, close(fd));
>        printf("%s\n", "Expected output: kernel is not leaking any fds!");
>
>        return EXIT_SUCCESS;
> }
>
> ## Preparing for testing
>
> $ touch /tmp/tmp
> $ gcc -g main_mq_open_fd_leak.c -lrt
>
> ## Linux kernel with the fix applied:
>
> $ ./a.out
> Max number of open files is: 1024
> Leaking [1024] files?!?!
> close(3) = 0
> Expected output: kernel is not leaking any fds!
>
> ## Linux kernel without the fix:
>
> ## Shell execution:
>
> $ ./a.out
> Max number of open files is: 1024
> Leaking [1019] files?!?!
> Run out of file descriptors
> Segmentation fault
>
> ## Valgrind execution:
>
> $ valgrind --track-fds=yes ./a.out
> ==2895== Memcheck, a memory error detector
> ==2895== Copyright (C) 2002-2009, and GNU GPL'd, by Julian Seward et al.
> ==2895== Using Valgrind-3.6.0.SVN and LibVEX; rerun with -h for copyright info
> ==2895== Command: ./a.out
> ==2895==
> Max number of open files is: 1024
> Leaking [1024] files?!?!
> open() failed: Too many open files
> Cannot open new files, fds exhausted
> ==2895==
> ==2895== FILE DESCRIPTORS: 5 open at exit.
> ==2895== Open file descriptor 13:
> ==2895==    <inherited from parent>
> ==2895==
> ==2895== Open file descriptor 12:
> ==2895==    <inherited from parent>
> ==2895==
> ==2895== Open file descriptor 2: /dev/pts/1
> ==2895==    <inherited from parent>
> ==2895==
> ==2895== Open file descriptor 1: /dev/pts/1
> ==2895==    <inherited from parent>
> ==2895==
> ==2895== Open file descriptor 0: /dev/pts/1
> ==2895==    <inherited from parent>
> ==2895==
> ==2895==
> ==2895== HEAP SUMMARY:
> ==2895==     in use at exit: 0 bytes in 0 blocks
> ==2895==   total heap usage: 0 allocs, 0 frees, 0 bytes allocated
> ==2895==
> ==2895== All heap blocks were freed -- no leaks are possible
> ==2895==
> ==2895== For counts of detected and suppressed errors, rerun with: -v
> ==2895== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 2 from 2)
>
> When not running valgrind, user-space program segfaults trying to execute
> strerror(errno). With valgrind, it executes successfully and prints the
> 5 open files: stdin, stdout, stderr, pipe[0] and pipe[1].
>
> Signed-off-by: André Goddard Rosa <[email protected]>

If kernel failed to lookup the dentry in mqueue, the fd allocated by
get_unused_fd_flags will be leaked then.
I think this is a good catch ;-)

Acked-by: Xiaotian Feng <[email protected]>

> ---
>  ipc/mqueue.c |    3 +--
>  1 files changed, 1 insertions(+), 2 deletions(-)
>
> diff --git a/ipc/mqueue.c b/ipc/mqueue.c
> index e47c9eb..b6cb064 100644
> --- a/ipc/mqueue.c
> +++ b/ipc/mqueue.c
> @@ -710,7 +710,7 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode,
>        dentry = lookup_one_len(name, ipc_ns->mq_mnt->mnt_root, strlen(name));
>        if (IS_ERR(dentry)) {
>                error = PTR_ERR(dentry);
> -               goto out_err;
> +               goto out_putfd;
>        }
>        mntget(ipc_ns->mq_mnt);
>
> @@ -749,7 +749,6 @@ out:
>        mntput(ipc_ns->mq_mnt);
>  out_putfd:
>        put_unused_fd(fd);
> -out_err:
>        fd = error;
>  out_upsem:
>        mutex_unlock(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex);
> --
> 1.7.0.87.g0901d
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
>

2010-02-25 13:14:54

by Cong Wang

[permalink] [raw]
Subject: Re: [PATCH 6/6] mqueue: fix mq_open() file descriptor leak on user-space processes

On Thu, Feb 25, 2010 at 06:49:09PM +0800, Xiaotian Feng wrote:
>On Thu, Feb 25, 2010 at 2:59 PM, Américo Wang <[email protected]> wrote:
>> On Thu, Feb 25, 2010 at 12:25 PM, Américo Wang <[email protected]> wrote:
>>> On Thu, Feb 25, 2010 at 12:00 PM, Xiaotian Feng <[email protected]> wrote:
>>>> 2010/2/25 Américo Wang <[email protected]>:
>>>>> On Tue, Feb 23, 2010 at 3:04 PM, André Goddard Rosa
>>>>> <[email protected]> wrote:
>>>>>> It can be triggered by the following test program:
>>>>>>
>>>>>
>>>>> <snip>
>>>>>
>>>>>>
>>>>>> When not running valgrind, user-space program segfaults trying to execute
>>>>>> strerror(errno). With valgrind, it executes successfully and prints the
>>>>>> 5 open files: stdin, stdout, stderr, pipe[0] and pipe[1].
>>>>>>
>>>>>> Signed-off-by: André Goddard Rosa <[email protected]>
>>>>>> ---
>>>>>
>>>>> The code has more than just this problem, could you please try
>>>>> my patch below?
>>>>>
>>>>> Thanks.
>>>>>
>>>>> ---------------------------->
>>>>>
>>>>> Clean up the failure path of sys_mq_open().
>>>>>
>>>>> Reorder the goto labels;
>>>>> Rename 'upsem' to 'upunlock';
>>>>> Remove some unused labels;
>>>>> Fix some wrong goto path.
>>>>>
>>>>
>>>> I think it's wrong to move dput after mntput
>>>
>>> Oh, this is to say mntget() should be called before lookup_one_len(),
>>> the original code seems wrong again...
>>>
>>
>> How about the one below?
>
>This is definitely wrong,
>
> if (IS_ERR(filp)) {
> error = PTR_ERR(filp);
>- goto out_putfd;
>+ goto out;
> }
>
>filp is assigned by do_open or do_create in mqueue.c, take a look at
>the code, if do_open/do_create is failed, kernel is already dput &
>mntput...
>

Clearly the original code is a piece of sh*t.

2010-02-25 13:37:37

by Cong Wang

[permalink] [raw]
Subject: [Patch] mqueue: fix the bad code in sys_mq_open()


Fix bad code in ipc/mqueue.c.

Inspired by André Goddard Rosa's patch.

a) do_create() and do_open() should not release the resources which
are not acquired within themselves, it's their caller's work;

b) Fix an fd leak;

c) The goto label 'out_upsem' doesn't make any sense, rename to
'out_unlock';

d) mntget() should be called before looking up dentry within
->mnt->mnt_root;

e) When dealing with failure case, we should release the resources
in a reversed order of acquiring, so reorder the goto labels;

f) Remove some unused labels due to reorder.

Also, this shrinks the binary by 60 bytes:

text data bss dec hex filename
7674 1684 8 9366 2496 ipc/mqueue.o.BEFORE
7614 1684 8 9306 245a ipc/mqueue.o.AFTER


Signed-off-by: WANG Cong <[email protected]>
Cc: André Goddard Rosa <[email protected]>

---
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index c79bd57..fdd09da 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -650,8 +650,6 @@ static struct file *do_create(struct ipc_namespace *ipc_ns, struct dentry *dir,
out_drop_write:
mnt_drop_write(ipc_ns->mq_mnt);
out:
- dput(dentry);
- mntput(ipc_ns->mq_mnt);
return ERR_PTR(ret);
}

@@ -664,17 +662,11 @@ static struct file *do_open(struct ipc_namespace *ipc_ns,
static const int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE,
MAY_READ | MAY_WRITE };

- if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY)) {
- dput(dentry);
- mntput(ipc_ns->mq_mnt);
+ if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY))
return ERR_PTR(-EINVAL);
- }

- if (inode_permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE])) {
- dput(dentry);
- mntput(ipc_ns->mq_mnt);
+ if (inode_permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE]))
return ERR_PTR(-EACCES);
- }

return dentry_open(dentry, ipc_ns->mq_mnt, oflag, cred);
}
@@ -686,7 +678,7 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode,
struct file *filp;
char *name;
struct mq_attr attr;
- int fd, error;
+ int fd, error = 0;
struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;

if (u_attr && copy_from_user(&attr, u_attr, sizeof(struct mq_attr)))
@@ -701,13 +693,13 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode,
if (fd < 0)
goto out_putname;

+ mntget(ipc_ns->mq_mnt);
mutex_lock(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex);
dentry = lookup_one_len(name, ipc_ns->mq_mnt->mnt_root, strlen(name));
if (IS_ERR(dentry)) {
error = PTR_ERR(dentry);
- goto out_err;
+ goto out_unlock;
}
- mntget(ipc_ns->mq_mnt);

if (oflag & O_CREAT) {
if (dentry->d_inode) { /* entry already exists */
@@ -731,24 +723,23 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode,

if (IS_ERR(filp)) {
error = PTR_ERR(filp);
- goto out_putfd;
+ goto out;
}

fd_install(fd, filp);
- goto out_upsem;
+ goto out_unlock;

out:
dput(dentry);
- mntput(ipc_ns->mq_mnt);
-out_putfd:
- put_unused_fd(fd);
-out_err:
- fd = error;
-out_upsem:
+out_unlock:
mutex_unlock(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex);
+ if (error) {
+ mntput(ipc_ns->mq_mnt);
+ put_unused_fd(fd);
+ }
out_putname:
putname(name);
- return fd;
+ return error;
}

SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)

2010-02-25 15:42:10

by André Goddard Rosa

[permalink] [raw]
Subject: Re: [Patch] mqueue: fix the bad code in sys_mq_open()

Hi, Am?rico!

On Thu, Feb 25, 2010 at 10:40 AM, Am?rico Wang <[email protected]> wrote:
>
> Fix bad code in ipc/mqueue.c.
>
> Inspired by Andr? Goddard Rosa's ?patch.
>
> a) do_create() and do_open() should not release the resources which
> ? are not acquired within themselves, it's their caller's work;
>
> b) Fix an fd leak;
>
> c) The goto label 'out_upsem' doesn't make any sense, rename to
> ? 'out_unlock';
>
> d) mntget() should be called before looking up dentry within
> ? ->mnt->mnt_root;
>
> e) When dealing with failure case, we should release the resources
> ? in a reversed order of acquiring, so reorder the goto labels;
>
> f) Remove some unused labels due to reorder.
>
> Also, this shrinks the binary by 60 bytes:
>
> ? text ? ? ? data ? ? bss ? ? ? dec ? ? ? ?hex filename
> ? 7674 ? ? ? 1684 ? ? ? ?8 ? ? ?9366 ? ? ?2496 ipc/mqueue.o.BEFORE
> ? 7614 ? ? ? 1684 ? ? ? ?8 ? ? ?9306 ? ? ?245a ipc/mqueue.o.AFTER
>
>
> Signed-off-by: WANG Cong <[email protected]>
> Cc: Andr? Goddard Rosa <[email protected]>
>
> ---
> diff --git a/ipc/mqueue.c b/ipc/mqueue.c
> index c79bd57..fdd09da 100644
> --- a/ipc/mqueue.c
> +++ b/ipc/mqueue.c
> @@ -650,8 +650,6 @@ static struct file *do_create(struct ipc_namespace *ipc_ns, struct dentry *dir,
> ?out_drop_write:
> ? ? ? ?mnt_drop_write(ipc_ns->mq_mnt);
> ?out:
> - ? ? ? dput(dentry);
> - ? ? ? mntput(ipc_ns->mq_mnt);
> ? ? ? ?return ERR_PTR(ret);
> ?}
>
> @@ -664,17 +662,11 @@ static struct file *do_open(struct ipc_namespace *ipc_ns,
> ? ? ? ?static const int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE,
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?MAY_READ | MAY_WRITE };
>
> - ? ? ? if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY)) {
> - ? ? ? ? ? ? ? dput(dentry);
> - ? ? ? ? ? ? ? mntput(ipc_ns->mq_mnt);
> + ? ? ? if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY))
> ? ? ? ? ? ? ? ?return ERR_PTR(-EINVAL);
> - ? ? ? }
>
> - ? ? ? if (inode_permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE])) {
> - ? ? ? ? ? ? ? dput(dentry);
> - ? ? ? ? ? ? ? mntput(ipc_ns->mq_mnt);
> + ? ? ? if (inode_permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE]))
> ? ? ? ? ? ? ? ?return ERR_PTR(-EACCES);
> - ? ? ? }
>
> ? ? ? ?return dentry_open(dentry, ipc_ns->mq_mnt, oflag, cred);
> ?}
> @@ -686,7 +678,7 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode,
> ? ? ? ?struct file *filp;
> ? ? ? ?char *name;
> ? ? ? ?struct mq_attr attr;
> - ? ? ? int fd, error;
> + ? ? ? int fd, error = 0;
> ? ? ? ?struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
>
> ? ? ? ?if (u_attr && copy_from_user(&attr, u_attr, sizeof(struct mq_attr)))
> @@ -701,13 +693,13 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode,
> ? ? ? ?if (fd < 0)
> ? ? ? ? ? ? ? ?goto out_putname;
>
> + ? ? ? mntget(ipc_ns->mq_mnt);
> ? ? ? ?mutex_lock(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex);
> ? ? ? ?dentry = lookup_one_len(name, ipc_ns->mq_mnt->mnt_root, strlen(name));
> ? ? ? ?if (IS_ERR(dentry)) {
> ? ? ? ? ? ? ? ?error = PTR_ERR(dentry);
> - ? ? ? ? ? ? ? goto out_err;
> + ? ? ? ? ? ? ? goto out_unlock;
> ? ? ? ?}
> - ? ? ? mntget(ipc_ns->mq_mnt);
>
> ? ? ? ?if (oflag & O_CREAT) {
> ? ? ? ? ? ? ? ?if (dentry->d_inode) { ?/* entry already exists */
> @@ -731,24 +723,23 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode,
>
> ? ? ? ?if (IS_ERR(filp)) {
> ? ? ? ? ? ? ? ?error = PTR_ERR(filp);
> - ? ? ? ? ? ? ? goto out_putfd;
> + ? ? ? ? ? ? ? goto out;
> ? ? ? ?}
>
> ? ? ? ?fd_install(fd, filp);
> - ? ? ? goto out_upsem;
> + ? ? ? goto out_unlock;
>
> ?out:
> ? ? ? ?dput(dentry);
> - ? ? ? mntput(ipc_ns->mq_mnt);
> -out_putfd:
> - ? ? ? put_unused_fd(fd);
> -out_err:
> - ? ? ? fd = error;
> -out_upsem:
> +out_unlock:
> ? ? ? ?mutex_unlock(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex);
> + ? ? ? if (error) {
> + ? ? ? ? ? ? ? mntput(ipc_ns->mq_mnt);
> + ? ? ? ? ? ? ? put_unused_fd(fd);
> + ? ? ? }
> ?out_putname:
> ? ? ? ?putname(name);
> - ? ? ? return fd;
> + ? ? ? return error;
> ?}
>
> ?SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)

I have some questions below:

Inside do_open() and do_create() on mqueue.c, we call
dentry_open()/__dentry_open().

If dentry_open() fails, it'll automatically call:
dput(dentry);
mntput(mnt);

Then we'll go here and set "error":

> ? ? ? ?if (IS_ERR(filp)) {
> ? ? ? ? ? ? ? ?error = PTR_ERR(filp);
> - ? ? ? ? ? ? ? goto out_putfd;
> + ? ? ? ? ? ? ? goto out;
> ? ? ? ?}
>

And finally here:

> ?out:
> ? ? ? ?dput(dentry);
> - ? ? ? mntput(ipc_ns->mq_mnt);
> -out_putfd:
> - ? ? ? put_unused_fd(fd);
> -out_err:
> - ? ? ? fd = error;
> -out_upsem:
> +out_unlock:
> ? ? ? ?mutex_unlock(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex);
> + ? ? ? if (error) {
> + ? ? ? ? ? ? ? mntput(ipc_ns->mq_mnt);
> + ? ? ? ? ? ? ? put_unused_fd(fd);
> + ? ? ? }

Is it ok to call:
dput(dentry);
mntput(ipc_ns->mq_mnt);

multiple times?

I also do not see where you set "error" to "fd":
> - ? ? ? return fd;
> + ? ? ? return error;

Am I missing something?

Could you please base your patch on top of mine?

Thank you,
Andr?

2010-02-25 15:53:08

by André Goddard Rosa

[permalink] [raw]
Subject: Re: [PATCH 0/6] Fix file descriptor leak on user-space processes and cleanup

Hi, Andrew!

On Wed, Feb 24, 2010 at 7:01 PM, Andrew Morton
<[email protected]> wrote:
> On Tue, 23 Feb 2010 04:04:22 -0300 Andr__ Goddard Rosa <[email protected]> wrote:
>
>> Fix a file descriptor leak on user-space processes and perform a cleanup,
>> reducing the code size:
>> text ? ?data ? ? bss ? ? dec ? ? hex filename
>> 9949 ? ? ?72 ? ? ?16 ? 10037 ? ?2735 ipc/mqueue-BEFORE.o
>> 9885 ? ? ?72 ? ? ?16 ? ?9973 ? ?26f5 ipc/mqueue-AFTER.o
>>
>> Andr__ Goddard Rosa (6):
>> ? mqueue: remove unneeded info->messages initialization
>> ? mqueue: apply mathematics distributivity on mq_bytes calculation
>> ? mqueue: simplify do_open() error handling
>> ? mqueue: only set error codes if they are really necessary
>> ? mqueue: fix typo "failues" -> "failures"
>> ? mqueue: fix mq_open() file descriptor leak on user-space processes
>
> Fixing the leak is far more important than the other five patches, and
> we'll want to backport the leak fix into earlier kernels. ?So the
> bugfix patch should have been the first in the series!

Sure, thank you for that, I'll consider your good advice.

> So I've reordered the patches in that fashion and shall tag "mqueue:
> fix mq_open() file descriptor leak on user-space processes" as needing
> -stable backporting.
>
> The patches apply and build OK with that reordering, but please do
> double-check it, thanks.
>

I have double checked and they look good; thanks for the follow-up
patch pleasing checkpatch.
:)

Do I need to send another patch adding the Acked-by's or is it done by
email system automatically?

Best regards,
Andr?

2010-02-25 16:13:10

by Cong Wang

[permalink] [raw]
Subject: Re: [Patch] mqueue: fix the bad code in sys_mq_open()

On Thu, Feb 25, 2010 at 12:41:47PM -0300, André Goddard Rosa wrote:
>Hi, Américo!
>
...
>I have some questions below:
>
>Inside do_open() and do_create() on mqueue.c, we call
>dentry_open()/__dentry_open().
>
>If dentry_open() fails, it'll automatically call:
> dput(dentry);
> mntput(mnt);
>

Oh, I trusted the current code too much, clearly this needs to be fixed
too. I already checked the 14 callers of dentry_open(), and will send
out a patchset to fix this tomorrow. (And the mqueue part will be based
on your patch.)

Thanks!