Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id ; Sun, 24 Nov 2002 19:50:16 -0500 Received: (majordomo@vger.kernel.org) by vger.kernel.org id ; Sun, 24 Nov 2002 19:50:16 -0500 Received: from smtpout.mac.com ([17.250.248.97]:34257 "EHLO smtpout.mac.com") by vger.kernel.org with ESMTP id ; Sun, 24 Nov 2002 19:50:00 -0500 From: Peter Waechtler To: Andrew Morton Subject: Re: [PATCH] unified SysV and POSIX mqueues - complete rewrite Date: Mon, 25 Nov 2002 02:02:17 +0100 User-Agent: KMail/1.4.3 References: <200211250005.00447.pwaechtler@mac.com> <3DE15F9E.A585E26F@digeo.com> In-Reply-To: <3DE15F9E.A585E26F@digeo.com> Cc: linux-kernel@vger.kernel.org MIME-Version: 1.0 Content-Type: Multipart/Mixed; boundary="------------Boundary-00=_TJX3848ZAMQ7XYRYQZZP" Message-Id: <200211250202.17120.pwaechtler@mac.com> Sender: linux-kernel-owner@vger.kernel.org X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 29444 Lines: 1102 --------------Boundary-00=_TJX3848ZAMQ7XYRYQZZP Content-Type: text/plain; charset="iso-8859-1" Content-Transfer-Encoding: 8bit Am Montag, 25. November 2002 00:24 schrieb Andrew Morton: > Peter Waechtler wrote: > > ... > > kernel.patch Type: text/x-diff > > It's not legal to call copy_*_user inside spinlock. You > sem to dothin in several places. Yes, I know. here a fixed version - there was also an error in putting the mqflags together in setattr/getattr --------------Boundary-00=_TJX3848ZAMQ7XYRYQZZP Content-Type: text/x-diff; charset="iso-8859-1"; name="kernel.patch" Content-Transfer-Encoding: 8bit Content-Disposition: attachment; filename="kernel.patch" diff -X dontdiff -Nur vanilla-2.5.49/arch/i386/kernel/entry.S linux-2.5.49/arch/i386/kernel/entry.S --- vanilla-2.5.49/arch/i386/kernel/entry.S 2002-11-23 17:04:27.000000000 +0100 +++ linux-2.5.49/arch/i386/kernel/entry.S 2002-11-23 17:13:38.000000000 +0100 @@ -741,8 +741,19 @@ .long sys_epoll_create .long sys_epoll_ctl /* 255 */ .long sys_epoll_wait - .long sys_remap_file_pages - .long sys_set_tid_address + .long sys_remap_file_pages + .long sys_set_tid_address + .long sys_ni_syscall + .long sys_ni_syscall /* 260 */ + .long sys_ni_syscall + .long sys_ni_syscall + .long sys_mq_open + .long sys_mq_unlink + .long sys_mq_timedsend /* 265 */ + .long sys_mq_timedreceive + .long sys_mq_notify + .long sys_mq_getattr + .long sys_mq_setattr .rept NR_syscalls-(.-sys_call_table)/4 diff -X dontdiff -Nur vanilla-2.5.49/include/asm-i386/unistd.h linux-2.5.49/include/asm-i386/unistd.h --- vanilla-2.5.49/include/asm-i386/unistd.h 2002-11-21 00:18:34.000000000 +0100 +++ linux-2.5.49/include/asm-i386/unistd.h 2002-11-24 22:31:00.000000000 +0100 @@ -261,9 +261,16 @@ #define __NR_sys_epoll_create 254 #define __NR_sys_epoll_ctl 255 #define __NR_sys_epoll_wait 256 -#define __NR_remap_file_pages 257 +#define __NR_remap_file_pages 257 #define __NR_set_tid_address 258 - +#define __NR_sys_mq_open 263 +#define __NR_sys_mq_unlink 264 +#define __NR_mq_timedsend 265 +#define __NR_mq_timedreceive 266 +#define __NR_mq_notify 267 +#define __NR_mq_getattr 268 +#define __NR_mq_setattr 269 + /* user-visible error numbers are in the range -1 - -124: see */ diff -X dontdiff -Nur vanilla-2.5.49/include/linux/mqueue.h linux-2.5.49/include/linux/mqueue.h --- vanilla-2.5.49/include/linux/mqueue.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.5.49/include/linux/mqueue.h 2002-11-24 17:20:20.000000000 +0100 @@ -0,0 +1,49 @@ +#ifndef _LINUX_MQUEUE_H +#define _LINUX_MQUEUE_H + +#define MQ_MAXMSG 40 /* max number of messages in each queue */ +#define MQ_MAXSYSSIZE 1048576 /* max size that all m.q. can have together */ +#define MQ_PRIO_MAX 100000 /* max priority */ + +#define MSGFS_MAGIC 0x4D455347 +#define _POSIX_MESSAGE_PASSING 1 + +typedef int mqd_t; /* message queue descriptor */ + +struct mq_attr { + long mq_flags; /* message queue flags */ + long mq_maxmsg; /* maximum number of messages */ + long mq_msgsize; /* maximum message size */ + long mq_curmsgs; /* number of messages currently queued */ +}; + +asmlinkage mqd_t sys_mq_open(const char *u_path, int oflag, mode_t mode, + struct mq_attr *u_attr); +asmlinkage int sys_mq_close(mqd_t mqdes); +asmlinkage int sys_mq_unlink(const char *u_name); +asmlinkage int sys_mq_timedsend(mqd_t mqdes, const char *msg_ptr, + size_t msg_len, unsigned int msg_prio, struct timespec *utime); +asmlinkage ssize_t sys_mq_timedreceive(mqd_t mqdes, char *msg_ptr, + size_t msg_len, unsigned int *msg_prio, struct timespec *utime); +asmlinkage int sys_mq_notify(mqd_t mqdes, + const struct sigevent *u_notification); +asmlinkage int sys_mq_getattr(mqd_t mqdes, struct mq_attr *u_mqstat); +asmlinkage int sys_mq_setattr(mqd_t mqdes, const struct mq_attr *u_mqstat, + struct mq_attr *u_omqstat); + +#ifdef __KERNEL__ + +struct mqueue_ds { /* queue */ + struct mq_attr attr; + struct msg_queue queue; /* ipc/msg */ + + spinlock_t lock; + wait_queue_head_t wait_recv; + wait_queue_head_t wait_send; + + pid_t notify_pid; /* who we have to notify (or 0) */ + struct sigevent notify; /* notification */ +}; +#endif /* __KERNEL__ */ + +#endif diff -X dontdiff -Nur vanilla-2.5.49/include/linux/sys.h linux-2.5.49/include/linux/sys.h --- vanilla-2.5.49/include/linux/sys.h 2002-11-01 01:15:04.000000000 +0100 +++ linux-2.5.49/include/linux/sys.h 2002-11-01 16:36:48.000000000 +0100 @@ -4,7 +4,7 @@ /* * system call entry points ... but not all are defined */ -#define NR_syscalls 260 +#define NR_syscalls 270 /* * These are system calls that will be removed at some time diff -X dontdiff -Nur vanilla-2.5.49/init/Kconfig linux-2.5.49/init/Kconfig --- vanilla-2.5.49/init/Kconfig 2002-11-21 00:18:37.000000000 +0100 +++ linux-2.5.49/init/Kconfig 2002-11-21 00:26:40.000000000 +0100 @@ -69,6 +69,13 @@ section 6.4 of the Linux Programmer's Guide, available from . +config POSIXMSG + bool "POSIX message queues" + depends on SYSVIPC + ---help--- + This gives you POSIX compliant interfaces for message queues. + + config BSD_PROCESS_ACCT bool "BSD Process Accounting" help diff -X dontdiff -Nur vanilla-2.5.49/ipc/Makefile linux-2.5.49/ipc/Makefile --- vanilla-2.5.49/ipc/Makefile 2002-09-23 15:54:57.000000000 +0200 +++ linux-2.5.49/ipc/Makefile 2002-11-04 17:50:24.000000000 +0100 @@ -6,4 +6,8 @@ obj-$(CONFIG_SYSVIPC) += msg.o sem.o shm.o +ifeq ($(CONFIG_POSIXMSG),y) +obj-$(CONFIG_SYSVIPC) += posixmsg.o +endif + include $(TOPDIR)/Rules.make diff -X dontdiff -Nur vanilla-2.5.49/ipc/msg.c linux-2.5.49/ipc/msg.c --- vanilla-2.5.49/ipc/msg.c 2002-11-21 00:18:37.000000000 +0100 +++ linux-2.5.49/ipc/msg.c 2002-11-21 00:26:40.000000000 +0100 @@ -127,7 +127,7 @@ return msg_buildid(id,msq->q_perm.seq); } -static void free_msg(struct msg_msg* msg) +void free_msg(struct msg_msg* msg) { struct msg_msgseg* seg; seg = msg->next; @@ -139,7 +139,7 @@ } } -static struct msg_msg* load_msg(void* src, int len) +struct msg_msg* load_msg(void* src, int len) { struct msg_msg* msg; struct msg_msgseg** pseg; @@ -191,7 +191,7 @@ return ERR_PTR(err); } -static int store_msg(void* dest, struct msg_msg* msg, int len) +int store_msg(void* dest, struct msg_msg* msg, int len) { int alen; struct msg_msgseg *seg; diff -X dontdiff -Nur vanilla-2.5.49/ipc/posixmsg.c linux-2.5.49/ipc/posixmsg.c --- vanilla-2.5.49/ipc/posixmsg.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.5.49/ipc/posixmsg.c 2002-11-25 01:18:27.000000000 +0100 @@ -0,0 +1,828 @@ +/* + * linux/ipc/posixmsg.c + * + * Copyright 2002 Peter W?chtler + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * root can mount the filesystem to see the names of the currently + * used mqueues. The i_size shows the number of msgs in the queue. + * The only shell operation on the "fifos" is an unlink() via rm(1) + * The queues are accessed through syscalls - poll is supported + * + * put MSGFS_MAGIC into mqueue.h + * update inode->m_time on send (yes, no posix semantics on this) + * update inode->a_time on recv + * translate absolute timeouts in kernel to prevent too long sleeps + * corrected spinlocks + [gs]etattr/mq_flags + * + * TODO: + * check for more sysv msg limits (or add some new, e.g. MQ_PRIO_MAX)? + * implement SIGEV_THREAD with clone_startup(hey, where is it?) + * think about prio based waitqueues: simply enqueue them in order? + * bits/posix_opt.h claims to support _POSIX_PRIORITY_SCHEDULING + */ +#include +#include +#include +#include +#include +#include +#include +#include /* PAGE_CACHE_SIZE */ +#include + +#include +#include +#include + +/* functions used from ipc/msg.c */ +extern void free_msg(struct msg_msg *msg); +extern int store_msg(void *dest, struct msg_msg *msg, int len); +extern struct msg_msg *load_msg(void *src, int len); + +extern int msg_ctlmnb; /* default max size of a message queue (all msgs sum up) */ +extern int msg_ctlmni; /* max # of msg queue identifiers */ +extern int msg_ctlmax; /* max size of one message (bytes) */ + +static int mqueue_release(struct inode *inode, struct dentry *dentry); +static int mqueue_close(struct inode *inode, struct file *filp); +static unsigned int mqueue_poll(struct file *, struct poll_table_struct *); + +static atomic_t msg_bytes = ATOMIC_INIT(0); +static struct vfsmount *msg_mnt; + +static struct file_operations msg_fops = { + .llseek = no_llseek, + .release = mqueue_close, + .poll = mqueue_poll, +}; +static struct inode_operations msg_dir_inode_operations = { + .lookup = simple_lookup, + .unlink = mqueue_release, +}; + +static struct msg_queue * +init_queue(struct msg_queue *queue) +{ + int retval; + + queue->q_perm.mode = 0; + queue->q_perm.key = 0; + queue->q_perm.security = NULL; + retval = security_ops->msg_queue_alloc_security(queue); + if (retval) + return ERR_PTR(retval);; + + queue->q_stime = queue->q_rtime = 0; + queue->q_qbytes = queue->q_cbytes = queue->q_qnum = 0; + queue->q_lspid = queue->q_lrpid = 0; + INIT_LIST_HEAD(&queue->q_messages); + INIT_LIST_HEAD(&queue->q_receivers); /* unused */ + INIT_LIST_HEAD(&queue->q_senders); /* unused */ + + return queue; +} + +static struct inode * +get_msg_inode(struct super_block *sb, mode_t mode) +{ + struct mqueue_ds *q; + struct inode *inode = new_inode(sb); + + if (inode) { + inode->i_mode = mode; + inode->i_uid = current->fsuid; + inode->i_gid = current->fsgid; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + inode->i_blksize = 1024; + switch (mode & S_IFMT) { + default: + case S_IFIFO: + case S_IFREG: + inode->i_fop = &msg_fops; + if ((q = kmalloc(sizeof (*q), GFP_KERNEL))) { + if (!init_queue(&q->queue)) { + kfree(q); + iput(inode); + inode = ERR_PTR(-EACCES); + } + inode->u.generic_ip = q; + } else { + iput(inode); + inode = ERR_PTR(-ENOSPC); + } + break; + case S_IFDIR: + inode->i_op = &msg_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + /* directory inodes start off with i_nlink == 2 (for "." entry) */ + inode->i_nlink++; + break; + case S_IFLNK: + break; + } + } else + inode = ERR_PTR(-ENOSPC); + + return inode; +} + +#define get_mqueue(filp)\ + ((filp) ? filp->f_dentry->d_inode->u.generic_ip: filp) + +/* don't use fget() to avoid the fput() for no good reason */ +static struct file * +mqueue_lookup(mqd_t fd) +{ + struct files_struct *files = current->files; + + read_lock(&files->file_lock); + if (fd >= files->max_fds || fd < 0) + goto out_unlock; + read_unlock(&files->file_lock); + return files->fd[fd]; + + out_unlock: + read_unlock(&files->file_lock); + return NULL; +} + +static inline int +freespace(struct mqueue_ds *q, size_t msg_len) +{ + int rc; + spin_lock(&q->lock); + + rc = msg_len + q->queue.q_cbytes <= q->attr.mq_msgsize && + q->queue.q_qnum < q->attr.mq_maxmsg; + + spin_unlock(&q->lock); + return rc; +} + +/* cleans up after close() or exit() */ +static int +mqueue_close(struct inode *inode, struct file *filp) +{ + struct mqueue_ds *q = inode->u.generic_ip; + + if (!q || !filp) + return -EBADFD; + + spin_lock(&q->lock); + /* remove possible notification; + * sys_getpid() returns the tgid if multithreaded */ + if (q->notify_pid == current->pid) { + q->notify_pid = 0; + q->notify.sigev_signo = 0; + q->notify.sigev_notify = 0; + } + spin_unlock(&q->lock); + return 0; +} + +/* removes a queue - inode is the inode of the directory */ +static int +mqueue_release(struct inode *inode, struct dentry *dentry) +{ + struct mqueue_ds *q = dentry->d_inode->u.generic_ip; + struct msg_queue *queue; + struct list_head *tmp; + + if (!q) + return -EBADFD; + + queue = &q->queue; + + tmp = queue->q_messages.next; + while (tmp != &queue->q_messages) { + struct msg_msg *msg = list_entry(tmp, struct msg_msg, m_list); + tmp = tmp->next; + free_msg(msg); + } + atomic_sub(queue->q_cbytes, &msg_bytes); + security_ops->msg_queue_free_security(queue); + dentry->d_inode->u.generic_ip = NULL; + + kfree(q); + return 0; +} + +static unsigned int +mqueue_poll(struct file *filp, struct poll_table_struct *wait) +{ + struct mqueue_ds *q = get_mqueue(filp); + int ret = 0; + + poll_wait(filp, &q->wait_recv, wait); + poll_wait(filp, &q->wait_send, wait); + + if (q->queue.q_qnum) + ret = POLLIN | POLLRDNORM; + + if (q->queue.q_qnum < q->attr.mq_maxmsg) + ret |= POLLOUT | POLLWRNORM; + + return ret; +} + +static int +create_queue(struct dentry *dir, struct qstr *qname, + int oflag, mode_t mode, struct mq_attr *u_attr) +{ + int ret, fd; + struct file *filp; + struct mqueue_ds *q; + struct inode *inode; + struct dentry *dentry; + + inode = get_msg_inode(msg_mnt->mnt_sb, S_IFIFO | (mode & S_IRWXUGO)); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); + goto out_ret; + } + q = inode->u.generic_ip; + + if (u_attr != NULL) { + if (copy_from_user(&q->attr, u_attr, sizeof (struct mq_attr))) { + ret = -EFAULT; + goto out_inode; + } + if (q->attr.mq_maxmsg <= 0 + || q->attr.mq_msgsize <= 0 + || q->attr.mq_maxmsg > MQ_MAXMSG + || q->attr.mq_msgsize > msg_ctlmax) { + ret = -EINVAL; + goto out_inode; + } + } else { /* implementation defined */ + q->attr.mq_maxmsg = MQ_MAXMSG; + q->attr.mq_msgsize = 1024 /*msg_ctlmax */ ; + } + q->attr.mq_flags = oflag & O_ACCMODE; + q->notify_pid = 0; + q->notify.sigev_signo = 0; + q->notify.sigev_notify = 0; + init_waitqueue_head(&q->wait_send); + init_waitqueue_head(&q->wait_recv); + + ret = -ENFILE; + if ((fd = get_unused_fd()) < 0) + goto out_inode; + if (!(filp = get_empty_filp())) + goto out_fd; + + qname->hash = full_name_hash(qname->name, qname->len); + dentry = d_alloc(dir, qname); + if (!dentry) { + ret = -ENOMEM; + goto out_filp; + } + d_add(dentry, inode); + ret = get_write_access(inode); + if (ret) + goto out_filp; + + filp->f_vfsmnt = mntget(msg_mnt); + filp->f_dentry = dget(dentry); /* leave it active */ + filp->f_op = &msg_fops; + filp->f_mode = (q->attr.mq_flags + 1) & O_ACCMODE; + filp->f_flags = oflag; + + /* Now we map fd to filp, so userspace can access it */ + fd_install(fd, filp); + ret = fd; + goto out_ret; + + out_filp: + put_filp(filp); + out_fd: + put_unused_fd(fd); + out_inode: + kfree(q); + iput(inode); + out_ret: + return ret; +} + +/** + * sys_mq_open - opens a message queue associated with @u_name + * @mqdes: descriptor of mqueue + * @oflag: flags like O_CREAT, O_EXCL, O_RDWR + * @mode: when O_CREAT is specified, the permission bits + * @u_attr: pointer to the attributes, like max msgsize, when creating + * + * returns a descriptor to the opened queue or negative value on error + */ +asmlinkage mqd_t +sys_mq_open(const char *u_name, int oflag, mode_t mode, struct mq_attr * u_attr) +{ + struct file *filp; + struct dentry *dentry; + struct qstr this; + static int oflag2acc[O_ACCMODE] = + { MAY_READ, MAY_WRITE, MAY_READ | MAY_WRITE }; + int fd, ret; + + if (IS_ERR(this.name = getname(u_name))) + return -ENOMEM; + this.len = strlen(this.name); + dentry = lookup_one_len(this.name, msg_mnt->mnt_root, this.len); + + if (IS_ERR(dentry)) { + ret = PTR_ERR(dentry); + goto out_ret; + } + if (oflag & O_CREAT) { + if (dentry->d_inode) { + /* entry exists already */ + if (oflag & O_EXCL) { + ret = -EEXIST; + } else { + goto open_existing; + } + goto out_dput; + } else { + ret = + create_queue(msg_mnt->mnt_root, &this, oflag, mode, + u_attr); + } + + } else { /* O_CREAT */ + if (!dentry->d_inode) { + ret = -ENOENT; + } else { +open_existing: + if (permission + (dentry->d_inode, oflag2acc[oflag & O_ACCMODE])) { + ret = -EACCES; + } else { + fd = get_unused_fd(); + if (fd >= 0) { + mntget(msg_mnt); + filp = + dentry_open(dentry, msg_mnt, oflag); + filp->f_op = &msg_fops; + if (IS_ERR(filp)) { + ret = PTR_ERR(filp); + put_unused_fd(fd); + goto out_dput; + } + dget(dentry); + fd_install(fd, filp); + } + ret = fd; + } + } + } +out_dput: + dput(dentry); + out_ret: + putname(this.name); + + return ret; +} + +/** + * sys_mq_unlink - removes a message queue from the namespace + * + * @u_name: pointer to the name + */ +asmlinkage int +sys_mq_unlink(const char *u_name) +{ + int err; + struct dentry *dentry, *dir; + char *name = getname(u_name); + + if (IS_ERR(name)) + return PTR_ERR(name); + + dentry = lookup_one_len(name, msg_mnt->mnt_root, strlen(name)); + putname(name); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + + if (!dentry->d_inode) + return -ENOENT; + + err = -EACCES; + dir = dentry->d_parent; + if (dir) { + err = vfs_unlink(dir->d_inode, dentry); + if (!err) + d_delete(dentry); + } + dput(dentry); + return err; +} + +static inline long get_timeout( struct timespec *abs) +{ + struct timespec t; + + if (abs->tv_nsec >= 1000000000L || abs->tv_nsec < 0 || abs->tv_sec < 0) + return -EINVAL; + t=current_kernel_time(); + if (t.tv_sec > abs->tv_sec || + (t.tv_sec == abs->tv_sec && t.tv_nsec > abs->tv_nsec)) + return -ETIMEDOUT; + + t.tv_sec = abs->tv_sec - t.tv_sec; + t.tv_nsec = abs->tv_nsec - t.tv_nsec; + if (t.tv_nsec < 0){ + t.tv_sec--; + t.tv_nsec+= 1000000000; + } + return timespec_to_jiffies(&t) + 1; +} + +/** + * sys_mq_timedsend - send a message to the queue associated + * with the descriptor mqdes + * @mqdes: descriptor of mqueue + * @msg_ptr: pointer to buffer holding the message + * @msg_len: length of the message + * @msg_prio: the priority of the message + * @utime: if !NULL the function will only block for specified time + */ +asmlinkage int +sys_mq_timedsend(mqd_t mqdes, + const char *msg_ptr, size_t msg_len, + unsigned int msg_prio, struct timespec *utime) +{ + struct siginfo sig_i; + struct msg_msg *msg; + struct list_head *p; + struct msg_queue *queue; + int err; + long timeout; + struct timespec ts; + struct mqueue_ds *q; + struct file *filp = mqueue_lookup(mqdes); + + if (!(q = get_mqueue(filp))) + return -EBADF; + if ((filp->f_mode & O_ACCMODE) == O_RDONLY) + return -EBADF; + if ((unsigned int) msg_prio >= (unsigned int) MQ_PRIO_MAX) + return -EINVAL; + if (msg_len > q->attr.mq_msgsize) + return -EMSGSIZE; + + queue = &q->queue; + if ((filp->f_flags & O_NONBLOCK) && !freespace(q, msg_len)) + return -EAGAIN; + + /* check if this message will exceed overall limit for messages */ + if (atomic_read(&msg_bytes) + msg_len > MQ_MAXSYSSIZE) + return -ENOMEM; + + if (utime) { + if (copy_from_user(&ts, utime, sizeof (ts))) + return -EFAULT; + if ((timeout = get_timeout(&ts))<0) + return timeout; + } else + timeout = 0L; + + msg = load_msg((char *) msg_ptr, msg_len); + if (IS_ERR(msg)) + return PTR_ERR(msg); + + msg->m_type = msg_prio; + msg->m_ts = msg_len; + + if (!timeout) + timeout = MAX_SCHEDULE_TIMEOUT; + + err = wait_event_interruptible_timeout(q->wait_send, + freespace(q, msg_len), timeout); + + if (err == -ERESTARTSYS) + return -EINTR; + if (err == 0) + return -ETIMEDOUT; + + err = 0; + /* if we lose the race for the lock, we can overflow the limits */ + spin_lock(&q->lock); + /* enqueue message in prio order */ + p = queue->q_messages.next; /* used as flag if msg was queued */ + if (msg_prio > 0 && !list_empty(&queue->q_messages)) { + + list_for_each(p, &queue->q_messages) { + struct msg_msg *tmp = + list_entry(p, struct msg_msg, m_list); + if (tmp->m_type < msg_prio) { + list_add_tail(&msg->m_list, p); + p = NULL; + break; + } + } + } + if (p) /* ok, put it at the end */ + list_add_tail(&msg->m_list, &queue->q_messages); + + queue->q_lspid = current->pid; + queue->q_cbytes += msg_len; + atomic_add(msg_len, &msg_bytes); + queue->q_qnum++; + filp->f_dentry->d_inode->i_size = queue->q_qnum; + filp->f_dentry->d_inode->i_mtime = CURRENT_TIME; + + if (waitqueue_active(&q->wait_recv)) { + wake_up_interruptible(&q->wait_recv); + } else { + /* since there was no synchronously waiting process for message + * we notify it when the state of queue changed from + * empty to not empty */ + if (q->notify_pid != 0 && queue->q_qnum == 1) { + /* TODO: Add support for sigev_notify==SIGEV_THREAD + * should we really create a thread? I think so. + */ + if (q->notify.sigev_notify == SIGEV_THREAD) { + + err = -ENOSYS; + pr_info("mq_*send: SIGEV_THREAD not supported\n"); + } + /* sends signal */ + if (q->notify.sigev_notify == SIGEV_SIGNAL) { + sig_i.si_signo = q->notify.sigev_signo; + sig_i.si_errno = 0; + sig_i.si_code = SI_MESGQ; + sig_i.si_pid = current->pid; + sig_i.si_uid = current->uid; + kill_proc_info(q->notify.sigev_signo, + &sig_i, q->notify_pid); + } + /* after notification unregisters process */ + q->notify_pid = 0; + } + } + spin_unlock(&q->lock); + return err; +} + +/** + * sys_mq_timedreceive - receive a message from the queue associated + * with the descriptor mqdes + * @mqdes: descriptor of mqueue + * @msg_ptr: pointer to buffer to hold the message + * @msg_len: length of the userspace buffer + * @msg_prio: will hold the priority if a message was received + * @utime: if !NULL the function will only block for specified time + */ +asmlinkage ssize_t +sys_mq_timedreceive(mqd_t mqdes, + char *msg_ptr, size_t msg_len, + unsigned int *msg_prio, struct timespec * utime) +{ + struct msg_queue *queue; + struct msg_msg *msg; + int err; + long timeout; + struct timespec ts; + struct mqueue_ds *q; + struct file *filp = mqueue_lookup(mqdes); + + if (!(q = get_mqueue(filp))) + return -EBADF; + + if (!(filp->f_mode & FMODE_READ)) + return -EBADF; + + queue = &q->queue; + if ((filp->f_flags & O_NONBLOCK) && queue->q_qnum == 0) + return -EAGAIN; + + if (utime) { + if (copy_from_user(&ts, utime, sizeof (ts))) + return -EFAULT; + if ((timeout = get_timeout(&ts))<0) + return timeout; + } else + timeout = 0L; +wait_on_msg: + if (!timeout) + timeout = MAX_SCHEDULE_TIMEOUT; + + err = wait_event_interruptible_timeout(q->wait_recv, + queue->q_qnum > 0, timeout); + + if (err == -ERESTARTSYS) + return -EINTR; + if (err == 0) + return -ETIMEDOUT; + + err = 0; + spin_lock(&q->lock); + if (!list_empty(&queue->q_messages)) { + + msg = + list_entry(queue->q_messages.next, struct msg_msg, m_list); + if (msg_len < msg->m_ts) { + err = -EMSGSIZE; + goto out_unlock; + } + list_del(&msg->m_list); + queue->q_lrpid = current->pid; + queue->q_cbytes -= msg->m_ts; + atomic_sub(msg->m_ts, &msg_bytes); + queue->q_qnum--; + filp->f_dentry->d_inode->i_size = queue->q_qnum; + filp->f_dentry->d_inode->i_atime = CURRENT_TIME; + /* msg is removed from list, we're already safe */ + spin_unlock(&q->lock); + + wake_up_interruptible(&q->wait_send); + + msg_len = (msg_len > msg->m_ts) ? msg->m_ts : msg_len; + + if ((err = store_msg(msg_ptr, msg, msg_len)) || + put_user(msg->m_type, msg_prio)) { + msg_len = -EFAULT; /* hmh, now the msg is lost */ + } + free_msg(msg); + return msg_len; + } else { + spin_unlock(&q->lock); + goto wait_on_msg; + } +out_unlock: + spin_unlock(&q->lock); + return err; +} + +/** + * sys_mq_notify - set or remove a notification on the queue associated + * with the descriptor mqdes + * @mqdes: descriptor of mqueue + * @u_notification: pointer to struct sigevent + * + */ +asmlinkage int +sys_mq_notify(mqd_t mqdes, const struct sigevent *u_notification) +{ + struct sigevent notify; + struct inode *inode; + struct file *filp = mqueue_lookup(mqdes); + struct mqueue_ds *q = get_mqueue(filp); + int err = 0; + + if (!q) + return -EBADF; + if (u_notification != NULL) + if (copy_from_user + (¬ify, u_notification, sizeof (struct sigevent))) + return -EFAULT; + + inode = filp->f_dentry->d_inode; + spin_lock(&q->lock); + if (q->notify_pid == current->pid + && (u_notification == NULL || notify.sigev_notify == SIGEV_NONE)) { + q->notify_pid = 0; /* remove notification */ + q->notify.sigev_signo = 0; + q->notify.sigev_notify = 0; + } else if (q->notify_pid > 0) { + err = -EBUSY; + } else if (u_notification != NULL) { + if (notify.sigev_notify == SIGEV_SIGNAL) { + /* add notification */ + q->notify_pid = current->pid; + q->notify.sigev_signo = notify.sigev_signo; + q->notify.sigev_notify = notify.sigev_notify; + } else if (notify.sigev_notify == SIGEV_THREAD) { + err = -ENOSYS; + pr_info("mq_*send: SIGEV_THREAD not supported yet\n"); + } + } + spin_unlock(&q->lock); + return err; +} + +static inline void +fill_flags(struct mqueue_ds *q, struct file *filp) +{ + spin_lock(&q->lock); + q->attr.mq_flags = (filp->f_mode - 1) & O_ACCMODE; + q->attr.mq_flags |= (filp->f_flags) & O_NONBLOCK; + q->attr.mq_curmsgs = q->queue.q_qnum; + spin_unlock(&q->lock); +} + +/** + * sys_mq_getattr - get the attributes of the queue associated + * with the descriptor mqdes + * @mqdes: descriptor of mqueue + * @u_mqstat: pointer to struct holding the new values + * + */ +asmlinkage int +sys_mq_getattr(mqd_t mqdes, struct mq_attr *u_mqstat) +{ + int err = 0; + struct mqueue_ds *q; + struct file *filp = mqueue_lookup(mqdes); + + if (!(q = get_mqueue(filp))) + return -EBADF; + + fill_flags(q,filp); + if (copy_to_user(u_mqstat, &q->attr, sizeof (struct mq_attr))) + err = -EFAULT; + return err; +} + +/** + * sys_mq_setattr - set the attributes of the queue associated + * with the descriptor mqdes + * @mqdes: descriptor of mqueue + * @u_mqstat: pointer to struct holding the new values + * @u_omqstat: pointer to store the original attributes (if !NULL) + * + */ +asmlinkage int +sys_mq_setattr(mqd_t mqdes, const struct mq_attr *u_mqstat, + struct mq_attr *u_omqstat) +{ + struct mq_attr mqstat; + struct mqueue_ds *q; + struct file *filp = mqueue_lookup(mqdes); + + if (!(q = get_mqueue(filp))) + return -EBADF; + + if (u_omqstat != NULL) { + fill_flags(q,filp); + if (copy_to_user(u_omqstat, &q->attr, sizeof (struct mq_attr))) + return -EFAULT; + } + if (copy_from_user(&mqstat, u_mqstat, sizeof (struct mq_attr))) + return -EFAULT; + if (mqstat.mq_flags & O_NONBLOCK) + filp->f_flags |= O_NONBLOCK; + else + filp->f_flags &= ~O_NONBLOCK; + + return 0; +} + +static struct super_operations msg_s_ops = { + .statfs = simple_statfs, + .drop_inode = generic_delete_inode, +}; + +static int +msg_fill_super(struct super_block *sb, void *data, int silent) +{ + struct inode *root; + struct dentry *root_dentry; + + sb->s_blocksize = PAGE_CACHE_SIZE; + sb->s_blocksize_bits = PAGE_CACHE_SHIFT; + sb->s_magic = MSGFS_MAGIC; + sb->s_op = &msg_s_ops; + + root = get_msg_inode(sb, S_IFDIR | S_IRWXUGO | S_ISVTX); + if (!root) + goto out; + root_dentry = d_alloc_root(root); + if (!root_dentry) + goto out_iput; + sb->s_root = root_dentry; + return 0; + +out_iput: + iput(root); +out: + return -ENOMEM; +} + +static struct super_block * +msgfs_get_sb(struct file_system_type *fs_type, + int flags, char *dev_name, void *data) +{ + return get_sb_single(fs_type, flags, data, msg_fill_super); +} + +static struct file_system_type msg_fs_type = { + .name = "msgfs", + .get_sb = msgfs_get_sb, + .kill_sb = kill_anon_super, +}; + +static int __init +mqueue_init(void) +{ + register_filesystem(&msg_fs_type); + if (IS_ERR(msg_mnt = kern_mount(&msg_fs_type))) + return PTR_ERR(msg_mnt); + + return 0; +} + +__initcall(mqueue_init); diff -X dontdiff -Nur vanilla-2.5.49/ipc/util.c linux-2.5.49/ipc/util.c --- vanilla-2.5.49/ipc/util.c 2002-11-23 17:04:49.000000000 +0100 +++ linux-2.5.49/ipc/util.c 2002-11-23 17:14:05.000000000 +0100 @@ -24,6 +24,7 @@ #include #include #include +#include #if defined(CONFIG_SYSVIPC) @@ -585,3 +586,52 @@ } #endif /* CONFIG_SYSVIPC */ + +#if defined(CONFIG_POSIXMSG) +/* nothing yet */ +#else +/* + * Dummy functions when POSIXMSG isn't configured + */ + +asmlinkage mqd_t sys_mq_open(const char *u_path, int oflag, mode_t mode, + struct mq_attr *u_attr) +{ + return (mqd_t) -ENOSYS; +} + +asmlinkage int sys_mq_unlink(const char *u_name) +{ + return -ENOSYS; +} + +asmlinkage int sys_mq_timedsend(mqd_t mqdes, const char *msg_ptr, + size_t msg_len, unsigned int msg_prio, struct timespec *utime) +{ + return -ENOSYS; +} + +asmlinkage ssize_t sys_mq_timedreceive(mqd_t mqdes, char *msg_ptr, + size_t msg_len, unsigned int *msg_prio, struct timespec *utime) +{ + return -ENOSYS; +} + +asmlinkage int sys_mq_notify(mqd_t mqdes, + const struct sigevent *u_notification) +{ + return -ENOSYS; +} + +asmlinkage int sys_mq_getattr(mqd_t mqdes, struct mq_attr *u_mqstat) +{ + return -ENOSYS; +} + +asmlinkage int sys_mq_setattr(mqd_t mqdes, const struct mq_attr *u_mqstat, + struct mq_attr *u_omqstat) +{ + return -ENOSYS; +} + +#endif /* CONFIG_POSIXMSG */ --------------Boundary-00=_TJX3848ZAMQ7XYRYQZZP-- - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/