2002-10-27 15:25:42

by Peter Waechtler

[permalink] [raw]
Subject: [PATCH] unified SysV and Posix mqueues as FS

diff -Nur -X dontdiff vanilla-2.5.44/Documentation/ioctl-number.txt linux-2.5.44/Documentation/ioctl-number.txt
--- vanilla-2.5.44/Documentation/ioctl-number.txt 2002-04-20 18:22:08.000000000 +0200
+++ linux-2.5.44/Documentation/ioctl-number.txt 2002-10-27 15:33:23.000000000 +0100
@@ -186,6 +186,7 @@
0xB0 all RATIO devices in development:
<mailto:[email protected]>
0xB1 00-1F PPPoX <mailto:[email protected]>
+0xB2 00-1F linux/mqueue.h
0xCB 00-1F CBM serial IEC bus in development:
<mailto:[email protected]>

diff -Nur -X dontdiff vanilla-2.5.44/include/linux/mqueue.h linux-2.5.44/include/linux/mqueue.h
--- vanilla-2.5.44/include/linux/mqueue.h 1970-01-01 01:00:00.000000000 +0100
+++ linux-2.5.44/include/linux/mqueue.h 2002-10-23 14:48:31.000000000 +0200
@@ -0,0 +1,37 @@
+#ifndef _LINUX_MQUEUE_H
+#define _LINUX_MQUEUE_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+#include <asm/siginfo.h>
+
+struct mq_attr {
+ long mq_flags; /* O_NONBLOCK or 0 */
+ long mq_maxmsg; /* Maximum number of messages in the queue */
+ long mq_msgsize; /* Maximum size of one message in bytes */
+ long mq_curmsgs; /* Current number of messages in the queue */
+ long __pad[2];
+};
+
+struct mq_open {
+ char *mq_name; /* pathname */
+ int mq_oflag; /* flags */
+ mode_t mq_mode; /* mode */
+ struct mq_attr mq_attr; /* attributes */
+};
+
+struct mq_sndrcv {
+ size_t mq_len; /* message length */
+ long mq_type; /* message type */
+ char *mq_buf; /* message buffer */
+};
+
+#define MQ_OPEN _IOW(0xB2, 0, struct mq_open)
+#define MQ_GETATTR _IOR(0xB2, 1, struct mq_attr)
+#define MQ_SEND _IOW(0xB2, 2, struct mq_sndrcv)
+#define MQ_RECEIVE _IOWR(0xB2, 3, struct mq_sndrcv)
+#define MQ_NOTIFY _IOW(0xB2, 4, struct sigevent)
+
+#define MQ_DEFAULT_TYPE 0x7FFFFFFE
+
+#endif /* _LINUX_MQUEUE_H */
diff -Nur -X dontdiff vanilla-2.5.44/include/linux/msg.h linux-2.5.44/include/linux/msg.h
--- vanilla-2.5.44/include/linux/msg.h 2002-08-10 00:09:02.000000000 +0200
+++ linux-2.5.44/include/linux/msg.h 2002-10-25 20:06:47.000000000 +0200
@@ -2,6 +2,7 @@
#define _LINUX_MSG_H

#include <linux/ipc.h>
+#include <linux/signal.h>

/* ipcs ctl commands */
#define MSG_STAT 11
@@ -49,7 +50,7 @@
unsigned short msgseg;
};

-#define MSGMNI 16 /* <= IPCMNI */ /* max # of msg queue identifiers */
+#define MSGMNI 128 /* <= IPCMNI */ /* max # of msg queue identifiers */
#define MSGMAX 8192 /* <= INT_MAX */ /* max size of message (bytes) */
#define MSGMNB 16384 /* <= INT_MAX */ /* default max size of a message queue */

@@ -63,33 +64,88 @@

#ifdef __KERNEL__

+#define SEARCH_ANY 1
+#define SEARCH_EQUAL 2
+#define SEARCH_NOTEQUAL 3
+#define SEARCH_LESSEQUAL 4
+
+#define DATALEN_MSG (PAGE_SIZE-sizeof(struct msg_msg))
+#define DATALEN_SEG (PAGE_SIZE-sizeof(struct msg_msgseg))
+
+/* used by sys_msgctl(,IPC_SET,) */
+struct msq_setbuf {
+ unsigned long qbytes;
+ uid_t uid;
+ gid_t gid;
+ mode_t mode;
+};
+
+/* one msg_receiver structure for each sleeping receiver */
+struct msg_receiver {
+ struct list_head r_list;
+ struct task_struct *r_tsk;
+
+ int r_mode;
+ long r_msgtype;
+ long r_maxsize;
+
+ struct msg_msg* volatile r_msg;
+};
+
+/* one msg_sender for each sleeping sender */
+struct msg_sender {
+ struct list_head list;
+ struct task_struct *tsk;
+};
+
+struct msg_msgseg {
+ struct msg_msgseg *next;
+ /* the next part of the message follows immediately */
+};
+
/* one msg_msg structure for each message */
struct msg_msg {
struct list_head m_list;
long m_type;
int m_ts; /* message text size */
- struct msg_msgseg* next;
+ struct msg_msgseg *next;
/* the actual message follows immediately */
};

-#define DATALEN_MSG (PAGE_SIZE-sizeof(struct msg_msg))
-#define DATALEN_SEG (PAGE_SIZE-sizeof(struct msg_msgseg))
+struct mq_link {
+ struct list_head link;
+ struct task_struct *tsk;
+ struct mq_attr *attr;
+};

/* one msq_queue structure for each present queue on the system */
struct msg_queue {
struct kern_ipc_perm q_perm;
- time_t q_stime; /* last msgsnd time */
- time_t q_rtime; /* last msgrcv time */
- time_t q_ctime; /* last change time */
- unsigned long q_cbytes; /* current number of bytes on queue */
- unsigned long q_qnum; /* number of messages in queue */
- unsigned long q_qbytes; /* max number of bytes on queue */
- pid_t q_lspid; /* pid of last msgsnd */
- pid_t q_lrpid; /* last receive pid */
+#define q_flags q_perm.mode
+ time_t q_stime; /* last msgsnd time */
+ time_t q_rtime; /* last msgrcv time */
+ time_t q_ctime; /* last change time */
+ unsigned long q_cbytes; /* current number of bytes on queue */
+ unsigned long q_qnum; /* number of messages in queue */
+ unsigned long q_qbytes; /* max number of bytes on queue */
+
+ unsigned int q_msgsize; /* max number of bytes for one message */
+ unsigned int q_maxmsg; /* max number of outstanding messages */
+
+ pid_t q_lspid; /* pid of last msgsnd */
+ pid_t q_lrpid; /* last receive pid */
+
+ int q_signo; /* signal to be sent if empty queue with no waiting
+ receivers should be sent */
+ pid_t q_pid; /* to which pid */
+ sigval_t q_sigval; /* which value to pass */
+ int id;

struct list_head q_messages;
struct list_head q_receivers;
struct list_head q_senders;
+ unsigned int q_namelen;
+ unsigned char q_name[0];
};

asmlinkage long sys_msgget (key_t key, int msgflg);
diff -Nur -X dontdiff vanilla-2.5.44/ipc/msg.c linux-2.5.44/ipc/msg.c
--- vanilla-2.5.44/ipc/msg.c 2002-10-13 23:03:57.000000000 +0200
+++ linux-2.5.44/ipc/msg.c 2002-10-27 15:42:12.000000000 +0100
@@ -13,15 +13,23 @@
* mostly rewritten, threaded and wake-one semantics added
* MSGMAX limit removed, sysctl's added
* (c) 1999 Manfred Spraul <[email protected]>
+ *
+ * make it a filesystem (based on Christoph Rohland's work on shmfs),
+ * (c) 2000 Jakub Jelinek <[email protected]>
+ * adapted and cleaned up for 2.5.44 by Peter W?chtler <[email protected]>
*/

#include <linux/config.h>
#include <linux/slab.h>
-#include <linux/msg.h>
#include <linux/spinlock.h>
#include <linux/init.h>
+#include <linux/fs.h>
#include <linux/proc_fs.h>
#include <linux/list.h>
+#include <linux/signal.h>
+#include <linux/mqueue.h>
+#include <linux/msg.h>
+#include <linux/namei.h>
#include <linux/security.h>
#include <asm/uaccess.h>
#include "util.h"
@@ -30,34 +38,87 @@
int msg_ctlmax = MSGMAX;
int msg_ctlmnb = MSGMNB;
int msg_ctlmni = MSGMNI;
+static int msg_mode;
+
+#define MSG_FS_MAGIC 822419456
+
+#define MSG_NAME_LEN NAME_MAX
+#define MSG_FMT ".IPC_%08x"
+#define MSG_FMT_LEN 13
+
+#define MSG_UNLK 0010000 /* filename is unlinked */
+#define MSG_SYSV 0020000 /* It is a SYSV message queue */
+
+static struct super_block * msg_sb;
+
+static struct super_block *msg_read_super(struct file_system_type *,int , char *, void *);
+static void msg_put_super(struct super_block *);
+static int msg_remount_fs(struct super_block *, int *, char *);
+static void msg_fill_inode(struct inode *);
+static int msg_statfs(struct super_block *, struct statfs *);
+static int msg_create(struct inode *,struct dentry *,int);
+static struct dentry *msg_lookup(struct inode *,struct dentry *);
+static int msg_unlink(struct inode *,struct dentry *);
+static int msg_setattr(struct dentry *dent, struct iattr *attr);
+static void msg_delete(struct inode *);
+static int msg_readdir(struct file *, void *, filldir_t);
+static int msg_remove_name(int id);
+static int msg_ioctl(struct inode *, struct file *, unsigned int, unsigned long);
+static int msg_root_ioctl(struct inode *, struct file *, unsigned int, unsigned long);
+static ssize_t msg_read(struct file *, char *, size_t, loff_t *);
+static ssize_t msg_write(struct file *, const char *, size_t, loff_t *);
+/* FIXME: Support poll on mq
+static unsigned int msg_poll(struct file *, poll_table *);
+ */
+static ssize_t msg_send (struct inode *, struct file *, const char *, size_t, long);
+static ssize_t msg_receive (struct inode *, struct file *, char *, size_t, long *);
+static int msg_flush (struct file *);
+static int msg_release (struct inode *, struct file *);

-/* one msg_receiver structure for each sleeping receiver */
-struct msg_receiver {
- struct list_head r_list;
- struct task_struct* r_tsk;
-
- int r_mode;
- long r_msgtype;
- long r_maxsize;
+static void freeque (int id);
+static int newque (key_t key, const char *name, int namelen, struct mq_attr *attr, int msgflg);

- struct msg_msg* volatile r_msg;
+static struct file_system_type msg_fs_type = {
+ .name = "msgfs",
+ .get_sb = msg_read_super,
+ .kill_sb = kill_litter_super,
};

-/* one msg_sender for each sleeping sender */
-struct msg_sender {
- struct list_head list;
- struct task_struct* tsk;
+static struct super_operations msg_sops = {
+ .read_inode= msg_fill_inode,
+ .delete_inode= msg_delete,
+ .put_super= msg_put_super,
+ .statfs= msg_statfs,
+ .remount_fs= msg_remount_fs,
};

-struct msg_msgseg {
- struct msg_msgseg* next;
- /* the next part of the message follows immediately */
+static struct file_operations msg_root_operations = {
+ .readdir= msg_readdir,
+ .ioctl= msg_root_ioctl,
};

-#define SEARCH_ANY 1
-#define SEARCH_EQUAL 2
-#define SEARCH_NOTEQUAL 3
-#define SEARCH_LESSEQUAL 4
+static struct inode_operations msg_root_inode_operations = {
+ .create= msg_create,
+ .lookup= msg_lookup,
+ .unlink= msg_unlink,
+};
+
+static struct file_operations msg_file_operations = {
+ .read= msg_read,
+ .write= msg_write,
+ .ioctl= msg_ioctl,
+/* FIXME: Support poll on mq *
+ poll= msg_poll,
+ */
+ .flush= msg_flush,
+ .release= msg_release,
+};
+
+static struct inode_operations msg_inode_operations = {
+ .setattr= msg_setattr,
+};
+
+static LIST_HEAD(mq_open_links);

static atomic_t msg_bytes = ATOMIC_INIT(0);
static atomic_t msg_hdrs = ATOMIC_INIT(0);
@@ -67,33 +128,529 @@
#define msg_lock(id) ((struct msg_queue*)ipc_lock(&msg_ids,id))
#define msg_unlock(id) ipc_unlock(&msg_ids,id)
#define msg_rmid(id) ((struct msg_queue*)ipc_rmid(&msg_ids,id))
-#define msg_checkid(msq, msgid) \
- ipc_checkid(&msg_ids,&msq->q_perm,msgid)
-#define msg_buildid(id, seq) \
- ipc_buildid(&msg_ids, id, seq)
+#define msg_get(id) ((struct msg_queue*)ipc_get(&msg_ids,id))
+#define msg_buildid(id, seq) ipc_buildid(&msg_ids, id, seq)

-static void freeque (int id);
-static int newque (key_t key, int msgflg);
#ifdef CONFIG_PROC_FS
static int sysvipc_msg_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data);
#endif

void __init msg_init (void)
{
+ struct vfsmount *res;
ipc_init_ids(&msg_ids,msg_ctlmni);
-
+ register_filesystem (&msg_fs_type);
+ res = kern_mount(&msg_fs_type);
+ if (IS_ERR(res)) {
+ unregister_filesystem(&msg_fs_type);
+ return;
+ }
#ifdef CONFIG_PROC_FS
create_proc_read_entry("sysvipc/msg", 0, 0, sysvipc_msg_read_proc, NULL);
#endif
}

-static int newque (key_t key, int msgflg)
+static int msg_parse_options(char *options)
+{
+ int blocks = msg_ctlmnb * msg_ctlmni;
+ int inodes = msg_ctlmni;
+ umode_t mode = msg_mode;
+ char *this_char, *value;
+
+ this_char = NULL;
+ if ( options )
+ this_char = strsep(&options,",");
+ for ( ; this_char; this_char = strsep(&options,",")) {
+ if ((value = strchr(this_char,'=')) != NULL)
+ *value++ = 0;
+ if (!strcmp(this_char,"nr_blocks")) {
+ if (!value || !*value)
+ return 1;
+ blocks = simple_strtoul(value,&value,0);
+ if (*value)
+ return 1;
+ }
+ else if (!strcmp(this_char,"nr_inodes")) {
+ if (!value || !*value)
+ return 1;
+ inodes = simple_strtoul(value,&value,0);
+ if (*value)
+ return 1;
+ }
+ else if (!strcmp(this_char,"mode")) {
+ if (!value || !*value)
+ return 1;
+ mode = simple_strtoul(value,&value,8);
+ if (*value)
+ return 1;
+ }
+ else
+ return 1;
+ }
+/* FIXME *
+ msg_ctlmni = inodes;
+ msg_ctlmnb = inodes ? blocks / inodes : 0;
+ */
+ msg_mode = mode;
+
+ return 0;
+}
+
+static int
+msg_fill_super (struct super_block *sb, void *data, int silent)
+{
+ struct inode * root_inode;
+
+/* FIXME *
+ msg_ctlmnb = MSGMNB;
+ msg_ctlmni = MSGMNI;
+ */
+ msg_mode = S_IRWXUGO | S_ISVTX;
+ if (msg_parse_options (data)) {
+ printk(KERN_ERR "msg fs invalid option\n");
+ return -EINVAL;
+ }
+
+ sb->s_blocksize = PAGE_SIZE;
+ sb->s_blocksize_bits = PAGE_SHIFT;
+ sb->s_magic = MSG_FS_MAGIC;
+ sb->s_op = &msg_sops;
+ root_inode = iget (sb, SEQ_MULTIPLIER);
+ if (!root_inode)
+ return -ENOMEM;
+ root_inode->i_op = &msg_root_inode_operations;
+ root_inode->i_sb = sb;
+ root_inode->i_nlink = 2;
+ root_inode->i_mode = S_IFDIR | msg_mode;
+ sb->s_root = d_alloc_root(root_inode);
+ if (!sb->s_root)
+ goto out_no_root;
+ msg_sb = sb;
+ return 0;
+
+out_no_root:
+ printk(KERN_ERR "msg_fill_super: get root inode failed\n");
+ iput(root_inode);
+ return -ENOMEM;
+}
+
+static struct super_block *msg_read_super(struct file_system_type *fs_type,
+ int flags, char *dev_name, void *data)
+{
+ return get_sb_single (fs_type, flags, data, msg_fill_super);
+}
+
+static int msg_remount_fs (struct super_block *sb, int *flags, char *data)
+{
+ if (msg_parse_options (data))
+ return -EINVAL;
+ return 0;
+}
+
+static inline int msg_checkid(struct msg_queue *msq, int id)
+{
+ if (!(msq->q_flags & MSG_SYSV))
+ return -EINVAL;
+ if (ipc_checkid(&msg_ids,&msq->q_perm,id))
+ return -EIDRM;
+ return 0;
+}
+
+static void msg_put_super(struct super_block *sb)
+{
+ int i;
+ struct msg_queue *msq;
+
+ down(&msg_ids.sem);
+ for(i = 0; i <= msg_ids.max_id; i++) {
+ if (!(msq = msg_lock (i)))
+ continue;
+ freeque(i);
+ }
+ dput (sb->s_root);
+ up(&msg_ids.sem);
+}
+
+static int msg_statfs(struct super_block *sb, struct statfs *buf)
+{
+ buf->f_type = MSG_FS_MAGIC;
+ buf->f_bsize = PAGE_SIZE;
+ buf->f_blocks = (msg_ctlmnb * msg_ctlmni) >> PAGE_SHIFT;
+ buf->f_bavail = buf->f_bfree = buf->f_blocks - (atomic_read(&msg_bytes) >> PAGE_SHIFT);
+ buf->f_files = msg_ctlmni;
+ buf->f_ffree = msg_ctlmni - atomic_read(&msg_hdrs);
+ buf->f_namelen = MSG_NAME_LEN;
+ return 0;
+}
+
+static void msg_fill_inode(struct inode * inode)
+{
+ int id;
+ struct msg_queue *msq;
+ id = inode->i_ino;
+ inode->i_op = NULL;
+ inode->i_mode = 0;
+
+ if (id < SEQ_MULTIPLIER) {
+ if (!(msq = msg_lock (id)))
+ return;
+ inode->i_mode = (msq->q_flags & S_IRWXUGO) | S_IFIFO;
+ inode->i_uid = msq->q_perm.uid;
+ inode->i_gid = msq->q_perm.gid;
+ inode->i_size = msq->q_cbytes;
+ inode->i_mtime = msq->q_stime;
+ inode->i_atime = msq->q_stime > msq->q_rtime ? msq->q_stime : msq->q_rtime;
+ inode->i_ctime = msq->q_ctime;
+ msg_unlock (id);
+ inode->i_op = &msg_inode_operations;
+ inode->i_fop = &msg_file_operations;
+ return;
+ }
+ inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+ inode->i_op = &msg_root_inode_operations;
+ inode->i_fop = &msg_root_operations;
+ inode->i_sb = msg_sb;
+ inode->i_nlink = 2;
+ inode->i_mode = S_IFDIR | msg_mode;
+ inode->i_uid = inode->i_gid = 0;
+}
+
+static int msg_create (struct inode *dir, struct dentry *dent, int mode)
+{
+ int id, err;
+ struct inode *inode;
+ struct mq_attr attr, *p;
+ struct list_head *tmp;
+
+ attr.mq_maxmsg = 32;
+ attr.mq_msgsize = 64;
+ p = &attr;
+
+ down(&msg_ids.sem);
+ list_for_each(tmp, &mq_open_links) {
+ struct mq_link *l = list_entry(tmp, struct mq_link, link);
+ if (l->tsk == current) {
+ p = l->attr;
+ break;
+ }
+ }
+ err = id = newque (IPC_PRIVATE, dent->d_name.name, dent->d_name.len, p, mode);
+ if (err < 0)
+ goto out;
+
+ inode = iget (msg_sb, id % SEQ_MULTIPLIER);
+ if (!inode){
+ err = -ENOMEM;
+ goto out;
+ }
+ err = 0;
+ down (&inode->i_sem);
+ inode->i_mode = (mode & S_IRWXUGO) | S_IFIFO;
+ inode->i_op = &msg_inode_operations;
+ d_instantiate(dent, inode);
+ up (&inode->i_sem);
+
+out:
+ up(&msg_ids.sem);
+ return err;
+}
+
+static int msg_readdir (struct file *filp, void *dirent, filldir_t filldir)
+{
+ struct inode * inode = filp->f_dentry->d_inode;
+ struct msg_queue *msq;
+ off_t nr;
+
+ nr = filp->f_pos;
+
+ switch(nr)
+ {
+ case 0:
+ if (filldir(dirent, ".", 1, nr, inode->i_ino, DT_DIR) < 0)
+ return 0;
+ filp->f_pos = ++nr;
+ /* fall through */
+ case 1:
+ if (filldir(dirent, "..", 2, nr, inode->i_ino, DT_DIR) < 0)
+ return 0;
+ filp->f_pos = ++nr;
+ /* fall through */
+ default:
+ down(&msg_ids.sem);
+ for (; nr-2 <= msg_ids.max_id; nr++) {
+ if (!(msq = msg_get (nr-2)))
+ continue;
+ if (msq->q_flags & MSG_UNLK)
+ continue;
+ if (filldir(dirent, msq->q_name, msq->q_namelen, nr, nr, DT_FIFO) < 0)
+ break;;
+ }
+ filp->f_pos = nr;
+ up(&msg_ids.sem);
+ break;
+ }
+
+ UPDATE_ATIME(inode);
+ return 0;
+}
+
+static struct dentry *msg_lookup (struct inode *dir, struct dentry *dent)
+{
+ int i, err = 0;
+ struct msg_queue* msq;
+ struct inode *inode = NULL;
+
+ if (dent->d_name.len > MSG_NAME_LEN)
+ return ERR_PTR(-ENAMETOOLONG);
+
+ down(&msg_ids.sem);
+ for(i = 0; i <= msg_ids.max_id; i++) {
+ if (!(msq = msg_lock(i)))
+ continue;
+ if (!(msq->q_flags & MSG_UNLK) &&
+ dent->d_name.len == msq->q_namelen &&
+ strncmp(dent->d_name.name, msq->q_name, msq->q_namelen) == 0)
+ goto found;
+ msg_unlock(i);
+ }
+
+ /*
+ * prevent the reserved names as negative dentries.
+ * This also prevents object creation through the filesystem
+ */
+ if (dent->d_name.len == MSG_FMT_LEN &&
+ memcmp (MSG_FMT, dent->d_name.name, MSG_FMT_LEN - 8) == 0)
+ err = -EINVAL; /* EINVAL to give IPC_RMID the right error */
+
+ goto out;
+
+found:
+ msg_unlock(i);
+ inode = iget(dir->i_sb, i);
+
+ if (!inode)
+ err = -EACCES;
+out:
+ if (err == 0)
+ d_add (dent, inode);
+ up (&msg_ids.sem);
+ return ERR_PTR(err);
+}
+
+static inline int msg_do_unlink (struct inode *dir, struct dentry *dent, int sysv)
+{
+ struct inode * inode = dent->d_inode;
+ struct msg_queue *msq;
+
+ down (&msg_ids.sem);
+ if (!(msq = msg_lock (inode->i_ino)))
+ BUG();
+ if (sysv) {
+ int ret = 0;
+
+ if (!(msq->q_flags & MSG_SYSV))
+ ret = -EINVAL;
+ else if (current->euid != msq->q_perm.cuid &&
+ current->euid != msq->q_perm.uid && !capable(CAP_SYS_ADMIN))
+ ret = -EPERM;
+ if (ret) {
+ msg_unlock (inode->i_ino);
+ up (&msg_ids.sem);
+ return ret;
+ }
+ }
+ msq->q_flags |= MSG_UNLK;
+ msq->q_perm.key = IPC_PRIVATE; /* Do not find it any more */
+ msg_unlock (inode->i_ino);
+ up (&msg_ids.sem);
+ inode->i_nlink -= 1;
+ /*
+ * If it's a reserved name we have to drop the dentry instead
+ * of creating a negative dentry
+ */
+ if (dent->d_name.len == MSG_FMT_LEN &&
+ memcmp (MSG_FMT, dent->d_name.name, MSG_FMT_LEN - 8) == 0)
+ d_drop (dent);
+ return 0;
+}
+
+static int msg_unlink (struct inode *dir, struct dentry *dent)
+{
+ return msg_do_unlink (dir, dent, 0);
+}
+static int msg_setattr (struct dentry *dentry, struct iattr *attr)
+{
+ int error;
+ struct inode *inode = dentry->d_inode;
+ struct msg_queue *msq;
+
+ error = inode_change_ok(inode, attr);
+ if (error)
+ return error;
+ if (attr->ia_valid & ATTR_SIZE)
+ return -EINVAL;
+
+ if (attr->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID)) {
+ if (!(msq = msg_lock(inode->i_ino)))
+ BUG();
+ if (attr->ia_valid & ATTR_MODE)
+ msq->q_flags = (msq->q_flags & ~S_IRWXUGO)
+ | (S_IRWXUGO & attr->ia_mode);
+ if (attr->ia_valid & ATTR_UID)
+ msq->q_perm.uid = attr->ia_uid;
+ if (attr->ia_valid & ATTR_GID)
+ msq->q_perm.gid = attr->ia_gid;
+ msq->q_ctime = attr->ia_ctime;
+ msg_unlock (inode->i_ino);
+ }
+
+ inode_setattr(inode, attr);
+ return error;
+}
+
+static int msg_root_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, unsigned long arg)
+{
+ struct mq_open o;
+ struct mq_link link;
+ int ret;
+
+ if (cmd != MQ_OPEN)
+ return -EINVAL;
+ ret = -EFAULT;
+ if (copy_from_user(&o, (struct mq_open *)arg, sizeof(struct mq_open)))
+ goto out;
+ ret = -EINVAL;
+ if ((unsigned long)o.mq_attr.mq_msgsize > msg_ctlmnb ||
+ (unsigned long)o.mq_attr.mq_maxmsg > msg_ctlmnb ||
+ o.mq_attr.mq_msgsize * o.mq_attr.mq_maxmsg > msg_ctlmnb)
+ goto out;
+ link.attr = &o.mq_attr;
+ link.tsk = current;
+ down(&msg_ids.sem);
+ list_add(&link.link, &mq_open_links);
+ up(&msg_ids.sem);
+ /* FIXME: Shouldn't we check here whether mq_name is really a file within the msg filesystem?
+ Otherwise people tracing the open(2) syscall might miss this place... */
+ ret = sys_open(o.mq_name, o.mq_oflag, o.mq_mode);
+ down(&msg_ids.sem);
+ list_del(&link.link);
+ up(&msg_ids.sem);
+out:
+ return ret;
+}
+
+static int msg_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, unsigned long arg)
+{
+ int ret = -EINVAL;
+ struct msg_queue *msq;
+ struct mq_sndrcv sr;
+
+ switch (cmd) {
+ case MQ_GETATTR: {
+ struct mq_attr attr;
+ memset(&attr, 0, sizeof(attr));
+ msq = msg_lock (inode->i_ino);
+ if (msq == NULL)
+ BUG();
+ attr.mq_maxmsg = msq->q_maxmsg;
+ attr.mq_msgsize = msq->q_msgsize;
+ attr.mq_curmsgs = msq->q_qnum;
+ attr.mq_flags = filp->f_flags & O_NONBLOCK;
+ msg_unlock (inode->i_ino);
+ ret = copy_to_user((struct mq_attr *)arg, &attr, sizeof(attr)) ? -EFAULT : 0;
+ break;
+ }
+ case MQ_SEND:
+ ret = -EBADF;
+ if (!(filp->f_mode & FMODE_WRITE))
+ break;
+ ret = -EFAULT;
+ if (copy_from_user(&sr, (struct mq_sndrcv *)arg, sizeof(sr)))
+ break;
+ ret = -EINVAL;
+ if (sr.mq_type <= 0)
+ break;
+ ret = msg_send (inode, filp, sr.mq_buf, sr.mq_len, sr.mq_type);
+ break;
+ case MQ_RECEIVE:
+ ret = -EBADF;
+ if (!(filp->f_mode & FMODE_READ))
+ break;
+ ret = -EFAULT;
+ if (copy_from_user(&sr, (struct mq_sndrcv *)arg, sizeof(sr)))
+ break;
+ ret = msg_receive (inode, filp, sr.mq_buf, sr.mq_len, &sr.mq_type);
+ if (!ret && put_user (sr.mq_type, &((struct mq_sndrcv *)arg)->mq_type))
+ ret = -EFAULT;
+ break;
+ case MQ_NOTIFY: {
+ struct sigevent sev;
+ struct msg_queue *msg;
+ ret = -EFAULT;
+ if (copy_from_user(&sev, (struct sigevent *)arg, sizeof(sev)))
+ break;
+ ret = -EINVAL;
+ if (sev.sigev_notify != SIGEV_SIGNAL && sev.sigev_notify != SIGEV_NONE)
+ break;
+ if (sev.sigev_signo <= 0 || sev.sigev_signo > _NSIG)
+ break;
+ msg = msg_lock(inode->i_ino);
+ if (!msg) BUG();
+ ret = 0;
+ if (msg->q_signo)
+ ret = -EBUSY;
+ else if (sev.sigev_notify == SIGEV_SIGNAL) {
+ msg->q_signo = sev.sigev_signo;
+ msg->q_sigval = sev.sigev_value;
+ } else
+ msg->q_signo = 0;
+ msg_unlock(inode->i_ino);
+ }
+ default:
+ break;
+ }
+ return ret;
+}
+
+static ssize_t msg_write(struct file * file,
+ const char * buf, size_t count, loff_t *ppos)
+{
+ int ret = msg_send(file->f_dentry->d_inode, file, buf, count, MQ_DEFAULT_TYPE);
+ return ret ?: count;
+}
+
+static ssize_t msg_read(struct file * file,
+ char * buf, size_t count, loff_t *ppos)
+{
+ return msg_receive(file->f_dentry->d_inode, file, buf, count, NULL);
+}
+
+static int msg_release (struct inode *ino, struct file *filp)
+{
+ struct msg_queue *msq = msg_lock(ino->i_ino);
+ if (!msq) BUG();
+ if (msq->q_signo && msq->q_pid == current->pid)
+ msq->q_signo = 0;
+ msg_unlock(ino->i_ino);
+ return 0;
+}
+
+static int msg_flush (struct file *filp)
+{
+ return msg_release(filp->f_dentry->d_inode, filp);
+}
+
+static int newque (key_t key, const char *name, int namelen,
+ struct mq_attr *attr, int msgflg)
{
int id;
int retval;
struct msg_queue *msq;

- msq = (struct msg_queue *) kmalloc (sizeof (*msq), GFP_KERNEL);
+ if (namelen > MSG_NAME_LEN)
+ return -ENAMETOOLONG;
+ msq = (struct msg_queue *) kmalloc (sizeof (*msq) + namelen, GFP_KERNEL);
+
if (!msq)
return -ENOMEM;

@@ -113,18 +670,94 @@
kfree(msq);
return -ENOSPC;
}
+ msq->q_flags = (msgflg & S_IRWXUGO);
+ msq->q_perm.key = key;

msq->q_stime = msq->q_rtime = 0;
msq->q_ctime = CURRENT_TIME;
msq->q_cbytes = msq->q_qnum = 0;
msq->q_qbytes = msg_ctlmnb;
msq->q_lspid = msq->q_lrpid = 0;
+ msq->q_signo = 0;
+
INIT_LIST_HEAD(&msq->q_messages);
INIT_LIST_HEAD(&msq->q_receivers);
INIT_LIST_HEAD(&msq->q_senders);
+ msq->id = msg_buildid(id, msq->q_perm.seq);
+ if (name) {
+ msq->q_maxmsg = attr->mq_maxmsg;
+ msq->q_msgsize = attr->mq_msgsize;
+ msq->q_qbytes = msq->q_maxmsg * msq->q_msgsize;
+ msq->q_namelen = namelen;
+ memcpy(msq->q_name, name, namelen);
+ } else {
+ msq->q_qbytes = msg_ctlmnb;
+ msq->q_maxmsg = msg_ctlmnb;
+ msq->q_msgsize = msg_ctlmax;
+ msq->q_flags |= MSG_SYSV;
+ msq->q_namelen = sprintf(msq->q_name, MSG_FMT, msq->id);
+ }
msg_unlock(id);

- return msg_buildid(id,msq->q_perm.seq);
+ return msq->id;
+}
+
+/* FIXME: maybe we need lock_kernel() here */
+static void msg_delete (struct inode *ino)
+{
+ int msgid = ino->i_ino;
+ struct msg_queue *msq;
+
+ down(&msg_ids.sem);
+ msq = msg_lock(msgid);
+ if(msq==NULL)
+ BUG();
+ freeque(msgid);
+ up(&msg_ids.sem);
+ clear_inode(ino);
+}
+
+static int msg_remove_name(int msqid)
+{
+ struct dentry *dir;
+ struct dentry *dentry;
+ struct msg_queue *msq;
+ int error, id;
+ char name[MSG_FMT_LEN+1];
+
+ down(&msg_ids.sem);
+ msq = msg_lock(msqid);
+ if (msq == NULL)
+ return -EINVAL;
+ id = msq->id;
+ if (msg_checkid (msq, msqid)) {
+ msg_unlock(msqid);
+ return -EIDRM;
+ }
+ msg_unlock(msqid);
+ up(&msg_ids.sem);
+ sprintf (name, MSG_FMT, id);
+ dir=msg_sb->s_root;
+ down(&dir->d_inode->i_sem);
+ dentry = lookup_one_len(name, dir, strlen(name) );
+ error = PTR_ERR(dentry);
+ if (!IS_ERR(dentry)) {
+ /*
+ * We have to do our own unlink to prevent the vfs
+ * permission check. We'll do the SYSV IPC style check
+ * inside of msg_do_unlink when we hold msg lock and
+ * msg_ids semaphore.
+ */
+ struct inode *inode = dir->d_inode;
+ down(&inode->i_sem);
+ error = msg_do_unlink(inode, dentry, 1);
+ if (!error)
+ d_delete(dentry);
+ up(&inode->i_sem);
+ dput(dentry);
+ }
+ up(&dir->d_inode->i_sem);
+ return error;
}

static void free_msg(struct msg_msg* msg)
@@ -139,7 +772,7 @@
}
}

-static struct msg_msg* load_msg(void* src, int len)
+static struct msg_msg* load_msg(const char * src, int len)
{
struct msg_msg* msg;
struct msg_msgseg** pseg;
@@ -191,9 +824,9 @@
return ERR_PTR(err);
}

-static int store_msg(void* dest, struct msg_msg* msg, int len)
+static int store_msg(void* dest, struct msg_msg* msg, size_t len)
{
- int alen;
+ size_t alen;
struct msg_msgseg *seg;

alen = len;
@@ -213,7 +846,7 @@
return -1;
len -= alen;
dest = ((char*)dest)+alen;
- seg=seg->next;
+ seg = seg->next;
}
return 0;
}
@@ -272,7 +905,7 @@
expunge_all(msq,-EIDRM);
ss_wakeup(&msq->q_senders,1);
msg_unlock(id);
-
+
tmp = msq->q_messages.next;
while(tmp != &msq->q_messages) {
struct msg_msg* msg = list_entry(tmp,struct msg_msg,m_list);
@@ -292,12 +925,12 @@

down(&msg_ids.sem);
if (key == IPC_PRIVATE)
- ret = newque(key, msgflg);
+ ret = newque(key, NULL, MSG_FMT_LEN + 1, NULL, msgflg);
else if ((id = ipc_findkey(&msg_ids, key)) == -1) { /* key not used */
if (!(msgflg & IPC_CREAT))
ret = -ENOENT;
else
- ret = newque(key, msgflg);
+ ret = newque(key, NULL, MSG_FMT_LEN + 1, NULL, msgflg);
} else if (msgflg & IPC_CREAT && msgflg & IPC_EXCL) {
ret = -EEXIST;
} else {
@@ -358,13 +991,6 @@
}
}

-struct msq_setbuf {
- unsigned long qbytes;
- uid_t uid;
- gid_t gid;
- mode_t mode;
-};
-
static inline unsigned long copy_msqid_from_user(struct msq_setbuf *out, void *buf, int version)
{
switch(version) {
@@ -468,10 +1094,13 @@
return -EINVAL;

if(cmd == MSG_STAT) {
+ err = -EINVAL;
+ if (!(msq->q_flags & MSG_SYSV))
+ goto out_unlock;
success_return = msg_buildid(msqid, msq->q_perm.seq);
} else {
- err = -EIDRM;
- if (msg_checkid(msq,msqid))
+ err = msg_checkid(msq,msqid);
+ if (err)
goto out_unlock;
success_return = 0;
}
@@ -480,6 +1109,7 @@
goto out_unlock;

kernel_to_ipc64_perm(&msq->q_perm, &tbuf.msg_perm);
+ tbuf.msg_perm.mode &= S_IRWXUGO;
tbuf.msg_stime = msq->q_stime;
tbuf.msg_rtime = msq->q_rtime;
tbuf.msg_ctime = msq->q_ctime;
@@ -500,7 +1130,7 @@
return -EFAULT;
break;
case IPC_RMID:
- break;
+ return msg_remove_name(msqid);
default:
return -EINVAL;
}
@@ -521,12 +1151,11 @@
/* We _could_ check for CAP_CHOWN above, but we don't */
goto out_unlock_up;

- switch (cmd) {
- case IPC_SET:
- {
+ if (cmd == IPC_SET) {
if (setbuf.qbytes > msg_ctlmnb && !capable(CAP_SYS_RESOURCE))
goto out_unlock_up;
msq->q_qbytes = setbuf.qbytes;
+ msq->q_maxmsg = setbuf.qbytes;

ipcp->uid = setbuf.uid;
ipcp->gid = setbuf.gid;
@@ -542,11 +1171,6 @@
*/
ss_wakeup(&msq->q_senders,0);
msg_unlock(msqid);
- break;
- }
- case IPC_RMID:
- freeque (msqid);
- break;
}
err = 0;
out_up:
@@ -608,6 +1232,105 @@
return 0;
}

+static int msg_do_send (struct msg_queue **msqp, int msqid,
+ struct msg_msg *msg, size_t msgsz, int nowait)
+{
+ struct msg_queue *msq = *msqp;
+
+ if(msgsz + msq->q_cbytes > msq->q_qbytes ||
+ 1 + msq->q_qnum > msq->q_maxmsg) {
+ struct msg_sender s;
+
+ if(nowait)
+ return -EAGAIN;
+
+ ss_add(msq, &s);
+ msg_unlock(msqid);
+ schedule();
+ current->state = TASK_RUNNING;
+
+ *msqp = msq = msg_lock(msqid);
+ if(msq==NULL)
+ return -EIDRM;
+ ss_del(&s);
+
+ if (signal_pending(current))
+ return -EINTR;
+ return -EBUSY;
+ }
+
+ if(!pipelined_send(msq,msg)) {
+ /* noone is waiting for this message, enqueue it */
+ list_add_tail(&msg->m_list,&msq->q_messages);
+ msq->q_cbytes += msgsz;
+ msq->q_qnum++;
+ atomic_add(msgsz,&msg_bytes);
+ atomic_inc(&msg_hdrs);
+ if (msq->q_qnum == 1 && msq->q_signo) {
+ struct task_struct *p;
+ siginfo_t si;
+ read_lock(&tasklist_lock);
+ p = find_task_by_pid(msq->q_pid);
+ if (p) {
+ si.si_signo = msq->q_signo;
+ si.si_errno = 0;
+ si.si_code = SI_MESGQ;
+ si.si_pid = current->pid;
+ si.si_uid = current->euid;
+ si.si_value = msq->q_sigval;
+ if (!send_sig_info(msq->q_signo, &si, p))
+ send_sig(msq->q_signo, p, 1);
+ }
+ read_unlock(&tasklist_lock);
+ msq->q_signo = 0;
+ }
+ }
+
+ msq->q_lspid = current->pid;
+ msq->q_stime = CURRENT_TIME;
+ return 0;
+}
+
+static ssize_t msg_send (struct inode *ino, struct file *filp, const char *mtext, size_t msgsz, long mtype)
+{
+ struct msg_queue *msq;
+ struct msg_msg *msg;
+ int err = 0;
+
+ if (mtype < 1)
+ return -EINVAL;
+ msq = msg_lock(ino->i_ino);
+ if (!msq) BUG();
+ if (msgsz > msq->q_msgsize)
+ err = -EMSGSIZE;
+ msg_unlock(ino->i_ino);
+ if (err) return err;
+
+ msg = load_msg(mtext, msgsz);
+ if(IS_ERR(msg))
+ return PTR_ERR(msg);
+
+ msg->m_type = mtype;
+ msg->m_ts = msgsz;
+
+ msq = msg_lock(ino->i_ino);
+ if (!msq) BUG();
+
+ do {
+ err = -EACCES;
+ if (msq->q_flags & MSG_SYSV && ipcperms(&msq->q_perm, S_IWUGO))
+ break;
+
+ err = msg_do_send(&msq, ino->i_ino, msg, msgsz, filp->f_flags & O_NONBLOCK);
+
+ } while (err == -EBUSY);
+
+ msg_unlock(ino->i_ino);
+ if (msg && err)
+ free_msg(msg);
+ return err;
+}
+
asmlinkage long sys_msgsnd (int msqid, struct msgbuf *msgp, size_t msgsz, int msgflg)
{
struct msg_queue *msq;
@@ -633,60 +1356,23 @@
err=-EINVAL;
if(msq==NULL)
goto out_free;
-retry:
- err= -EIDRM;
- if (msg_checkid(msq,msqid))
- goto out_unlock_free;
-
- err=-EACCES;
- if (ipcperms(&msq->q_perm, S_IWUGO))
- goto out_unlock_free;
-
- if(msgsz + msq->q_cbytes > msq->q_qbytes ||
- 1 + msq->q_qnum > msq->q_qbytes) {
- struct msg_sender s;
-
- if(msgflg&IPC_NOWAIT) {
- err=-EAGAIN;
- goto out_unlock_free;
- }
- ss_add(msq, &s);
- msg_unlock(msqid);
- schedule();
- current->state= TASK_RUNNING;
+ do {
+ err= -EIDRM;
+ if (msg_checkid(msq,msqid))
+ break;

- msq = msg_lock(msqid);
- err = -EIDRM;
- if(msq==NULL)
- goto out_free;
- ss_del(&s);
-
- if (signal_pending(current)) {
- err=-EINTR;
- goto out_unlock_free;
- }
- goto retry;
- }
+ err=-EACCES;
+ if (ipcperms(&msq->q_perm, S_IWUGO))
+ break;

- msq->q_lspid = current->pid;
- msq->q_stime = CURRENT_TIME;
+ err = msg_do_send(&msq, msqid, msg, msgsz, msgflg & IPC_NOWAIT);

- if(!pipelined_send(msq,msg)) {
- /* noone is waiting for this message, enqueue it */
- list_add_tail(&msg->m_list,&msq->q_messages);
- msq->q_cbytes += msgsz;
- msq->q_qnum++;
- atomic_add(msgsz,&msg_bytes);
- atomic_inc(&msg_hdrs);
- }
-
- err = 0;
- msg = NULL;
+ } while (err == -EBUSY);

-out_unlock_free:
- msg_unlock(msqid);
+ if (msq)
+ msg_unlock(msqid);
out_free:
- if(msg!=NULL)
+ if (msg && err)
free_msg(msg);
return err;
}
@@ -710,127 +1396,169 @@
return SEARCH_EQUAL;
}

+static struct msg_msg *
+msg_do_receive (struct msg_queue *msq, int *msqidp, size_t msgsz,
+ long msgtyp, int mode, int msgflg)
+{
+ struct msg_receiver msr_d;
+ struct list_head *tmp;
+ struct msg_msg *msg, *found_msg;
+ int msqid = *msqidp;
+
+ for (;;) {
+ if (msq->q_flags & MSG_SYSV && ipcperms (&msq->q_perm, S_IRUGO))
+ return ERR_PTR(-EACCES);
+
+ tmp = msq->q_messages.next;
+ found_msg = NULL;
+ while (tmp != &msq->q_messages) {
+ msg = list_entry(tmp,struct msg_msg,m_list);
+ if(testmsg(msg, msgtyp, mode)) {
+ found_msg = msg;
+ if(mode == SEARCH_LESSEQUAL && msg->m_type != 1)
+ msgtyp = msg->m_type - 1;
+ else
+ break;
+ }
+ tmp = tmp->next;
+ }
+ if (found_msg) {
+ msg = found_msg;
+ if ((msgsz < msg->m_ts) && !(msgflg & MSG_NOERROR))
+ return ERR_PTR(-E2BIG);
+ list_del(&msg->m_list);
+ msq->q_qnum--;
+ msq->q_rtime = CURRENT_TIME;
+ msq->q_lrpid = current->pid;
+ msq->q_cbytes -= msg->m_ts;
+ atomic_sub(msg->m_ts,&msg_bytes);
+ atomic_dec(&msg_hdrs);
+ ss_wakeup(&msq->q_senders,0);
+ msg_unlock(msqid);
+ return msg;
+ } else {
+ struct msg_queue *t;
+ /* no message waiting. Prepare for pipelined
+ * receive.
+ */
+ if (msgflg & IPC_NOWAIT)
+ return ERR_PTR(-ENOMSG);
+ list_add_tail(&msr_d.r_list,&msq->q_receivers);
+ msr_d.r_tsk = current;
+ msr_d.r_msgtype = msgtyp;
+ msr_d.r_mode = mode;
+ if(msgflg & MSG_NOERROR)
+ msr_d.r_maxsize = INT_MAX;
+ else
+ msr_d.r_maxsize = msgsz;
+ msr_d.r_msg = ERR_PTR(-EAGAIN);
+ current->state = TASK_INTERRUPTIBLE;
+ msg_unlock(msqid);
+
+ schedule();
+ current->state = TASK_RUNNING;
+
+ msg = (struct msg_msg*) msr_d.r_msg;
+ if(!IS_ERR(msg))
+ return msg;
+
+ t = msg_lock(msqid);
+ if(t == NULL)
+ *msqidp = msqid = -1;
+ msg = (struct msg_msg*)msr_d.r_msg;
+ if(!IS_ERR(msg)) {
+ /* our message arived while we waited for
+ * the spinlock. Process it.
+ */
+ if (msqid != -1)
+ msg_unlock(msqid);
+ return msg;
+ }
+ if(PTR_ERR(msg) == -EAGAIN) {
+ if(msqid == -1)
+ BUG();
+ list_del(&msr_d.r_list);
+ if (signal_pending(current))
+ return ERR_PTR(-EINTR);
+ else
+ continue;
+ }
+ return msg;
+ }
+ }
+}
+
+static int msg_receive (struct inode *ino, struct file *filp, char *mtext,
+ size_t msgsz, long *msgtypp)
+{
+ struct msg_queue *msq;
+ struct msg_msg *msg;
+ long msgtyp;
+ int err, mode, msqid = ino->i_ino;
+
+ if (msgtypp)
+ msgtyp = *msgtypp;
+ else
+ msgtyp = -MQ_DEFAULT_TYPE;
+ mode = convert_mode(&msgtyp, 0);
+ msq = msg_lock(msqid);
+ if (!msq) BUG();
+ if (msgtypp && msgsz < msq->q_msgsize) {
+ msg_unlock(msqid);
+ return -EMSGSIZE;
+ }
+
+ msg = msg_do_receive (msq, &msqid, msgsz, msgtyp, mode,
+ (filp->f_flags & O_NONBLOCK) ? IPC_NOWAIT : 0);
+ if (!IS_ERR (msg)) {
+ msgsz = (msgsz > msg->m_ts) ? msg->m_ts : msgsz;
+ if (store_msg(mtext, msg, msgsz))
+ msgsz = -EFAULT;
+ else if (msgtypp)
+ *msgtypp = msg->m_type;
+ free_msg(msg);
+ return msgsz;
+ }
+ if (msqid != -1)
+ msg_unlock(msqid);
+ err = PTR_ERR(msg);
+ switch (err) {
+ case -ENOMSG: err = -EAGAIN; break;
+ case -E2BIG: err = -EMSGSIZE; break;
+ }
+ return err;
+}
+
asmlinkage long sys_msgrcv (int msqid, struct msgbuf *msgp, size_t msgsz,
long msgtyp, int msgflg)
{
struct msg_queue *msq;
- struct msg_receiver msr_d;
- struct list_head* tmp;
- struct msg_msg* msg, *found_msg;
- int err;
+ struct msg_msg *msg;
int mode;

if (msqid < 0 || (long) msgsz < 0)
return -EINVAL;
- mode = convert_mode(&msgtyp,msgflg);
+ mode = convert_mode(&msgtyp, msgflg);

- msq = msg_lock(msqid);
- if(msq==NULL)
+ msq = msg_lock (msqid);
+ if (msq==NULL)
+ return -EINVAL;
+ if (!(msq->q_flags & MSG_SYSV)) {
+ msg_unlock (msqid);
return -EINVAL;
-retry:
- err = -EIDRM;
- if (msg_checkid(msq,msqid))
- goto out_unlock;
-
- err=-EACCES;
- if (ipcperms (&msq->q_perm, S_IRUGO))
- goto out_unlock;
-
- tmp = msq->q_messages.next;
- found_msg=NULL;
- while (tmp != &msq->q_messages) {
- msg = list_entry(tmp,struct msg_msg,m_list);
- if(testmsg(msg,msgtyp,mode)) {
- found_msg = msg;
- if(mode == SEARCH_LESSEQUAL && msg->m_type != 1) {
- found_msg=msg;
- msgtyp=msg->m_type-1;
- } else {
- found_msg=msg;
- break;
- }
- }
- tmp = tmp->next;
}
- if(found_msg) {
- msg=found_msg;
- if ((msgsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) {
- err=-E2BIG;
- goto out_unlock;
- }
- list_del(&msg->m_list);
- msq->q_qnum--;
- msq->q_rtime = CURRENT_TIME;
- msq->q_lrpid = current->pid;
- msq->q_cbytes -= msg->m_ts;
- atomic_sub(msg->m_ts,&msg_bytes);
- atomic_dec(&msg_hdrs);
- ss_wakeup(&msq->q_senders,0);
- msg_unlock(msqid);
-out_success:
+ msg = msg_do_receive (msq, &msqid, msgsz, msgtyp, mode, msgflg);
+ if (!IS_ERR (msg)) {
msgsz = (msgsz > msg->m_ts) ? msg->m_ts : msgsz;
- if (put_user (msg->m_type, &msgp->mtype) ||
- store_msg(msgp->mtext, msg, msgsz)) {
- msgsz = -EFAULT;
- }
- free_msg(msg);
- return msgsz;
- } else
- {
- struct msg_queue *t;
- /* no message waiting. Prepare for pipelined
- * receive.
- */
- if (msgflg & IPC_NOWAIT) {
- err=-ENOMSG;
- goto out_unlock;
- }
- list_add_tail(&msr_d.r_list,&msq->q_receivers);
- msr_d.r_tsk = current;
- msr_d.r_msgtype = msgtyp;
- msr_d.r_mode = mode;
- if(msgflg & MSG_NOERROR)
- msr_d.r_maxsize = INT_MAX;
- else
- msr_d.r_maxsize = msgsz;
- msr_d.r_msg = ERR_PTR(-EAGAIN);
- current->state = TASK_INTERRUPTIBLE;
- msg_unlock(msqid);
-
- schedule();
- current->state = TASK_RUNNING;
-
- msg = (struct msg_msg*) msr_d.r_msg;
- if(!IS_ERR(msg))
- goto out_success;
-
- t = msg_lock(msqid);
- if(t==NULL)
- msqid=-1;
- msg = (struct msg_msg*)msr_d.r_msg;
- if(!IS_ERR(msg)) {
- /* our message arived while we waited for
- * the spinlock. Process it.
- */
- if(msqid!=-1)
- msg_unlock(msqid);
- goto out_success;
- }
- err = PTR_ERR(msg);
- if(err == -EAGAIN) {
- if(msqid==-1)
- BUG();
- list_del(&msr_d.r_list);
- if (signal_pending(current))
- err=-EINTR;
- else
- goto retry;
- }
- }
-out_unlock:
- if(msqid!=-1)
- msg_unlock(msqid);
- return err;
+ if (put_user (msg->m_type, &msgp->mtype) ||
+ store_msg(msgp->mtext, msg, msgsz))
+ msgsz = -EFAULT;
+ free_msg(msg);
+ return msgsz;
+ }
+ if (msqid != -1)
+ msg_unlock(msqid);
+ return PTR_ERR(msg);
}

#ifdef CONFIG_PROC_FS
@@ -841,16 +1569,16 @@
int i, len = 0;

down(&msg_ids.sem);
- len += sprintf(buffer, " key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n");
+ len += sprintf(buffer, " key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime name(POSIX)\n");

for(i = 0; i <= msg_ids.max_id; i++) {
struct msg_queue * msq;
msq = msg_lock(i);
if(msq != NULL) {
- len += sprintf(buffer + len, "%10d %10d %4o %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n",
+ len += sprintf(buffer + len, "%10d %10d %4o %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu %.*s%s\n",
msq->q_perm.key,
msg_buildid(i,msq->q_perm.seq),
- msq->q_perm.mode,
+ msq->q_flags & S_IRWXUGO,
msq->q_cbytes,
msq->q_qnum,
msq->q_lspid,
@@ -861,7 +1589,10 @@
msq->q_perm.cgid,
msq->q_stime,
msq->q_rtime,
- msq->q_ctime);
+ msq->q_ctime,
+ msq->q_namelen,
+ msq->q_name,
+ msq->q_flags & MSG_UNLK ? " (deleted)" : "");
msg_unlock(i);

pos += len;


Attachments:
posix-mqueue.txt (41.38 kB)

2002-10-27 15:41:25

by Jeff Garzik

[permalink] [raw]
Subject: Re: [PATCH] unified SysV and Posix mqueues as FS

Peter Waechtler wrote:

>I applied the patch from Jakub against 2.5.44
>There are still open issues but it's important to get this in before
>feature freeze.
>
>While you can implement Posix mqueues in userland (Irix is doing this
>with fcntl(fd,F_SETLKW,) and shmem) a kernel implementation has some advantages:
>
>a) no hassle with locks in case an app crashes
>b) guaranteed notification with signals (you can have two apps with
> different uid that can acces the queue but aren't allowed to
> send signals)
>c) surprisingly, seems a little faster - did not test with NPT
>
>
>Open issues are:
>
>- notification not tested
>- still linear search in queues
>- I would really enhance the sys_ipc for handling posix mqueue as well
> (yes, perhaps it's more ugly - but it fits naturally, you can't
> specify a priority with a read() - ending up with ioctl())
>- funny "locking" in ipc/util.c
>- check the ipc ids
>
>
>

I don't comment on the overall concept of the patch itself, it's not my
area of expertise and it's too early in the morning to think about it ;-)

However, there are three issues to consider in the meantime:
* Documentation/CodingStyle problems. You need to use standard
one-tab-for-indentation formatting, just like the code around what you
are adding/modifying.
* There is weird text translation in the patch (short example follows).
It may be better if you use mutt and vi to include your patch directly,
without word wrapping, if attachments are getting mangled.

- msq =3D msg_lock(msqid);
- err =3D -EIDRM;
- if(msq=3D=3DNULL)
- goto out_free;
- ss_del(&s);
- =

* Linus probably won't see your email, he has threatened to flush his entire inbox when he returns from his trip ;-)

Regards,

Jeff





2002-10-27 15:47:18

by Alexander Viro

[permalink] [raw]
Subject: Re: [PATCH] unified SysV and Posix mqueues as FS



On Sun, 27 Oct 2002, Peter Waechtler wrote:

> I applied the patch from Jakub against 2.5.44
> There are still open issues but it's important to get this in before
> feature freeze.
>
> While you can implement Posix mqueues in userland (Irix is doing this
> with fcntl(fd,F_SETLKW,) and shmem) a kernel implementation has some advantages:

*thud*

ioctls on _directories_, of all things?

2002-10-27 19:43:32

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [PATCH] unified SysV and Posix mqueues as FS

Followup to: <[email protected]>
By author: Alexander Viro <[email protected]>
In newsgroup: linux.dev.kernel
>
> On Sun, 27 Oct 2002, Peter Waechtler wrote:
>
> > I applied the patch from Jakub against 2.5.44
> > There are still open issues but it's important to get this in before
> > feature freeze.
> >
> > While you can implement Posix mqueues in userland (Irix is doing this
> > with fcntl(fd,F_SETLKW,) and shmem) a kernel implementation has some advantages:
>
> *thud*
>
> ioctls on _directories_, of all things?
>

Yup. There are plenty of them already.

-hpa
--
<[email protected]> at work, <[email protected]> in private!
"Unix gives you enough rope to shoot yourself in the foot."
http://www.zytor.com/~hpa/puzzle.txt <[email protected]>

2002-10-27 21:56:07

by Peter Waechtler

[permalink] [raw]
Subject: Re: [PATCH] unified SysV and Posix mqueues as FS

Manfred Spraul schrieb:
>
> > - notification not tested
> > - still linear search in queues
>
> Is that a problem? Receive does one linear search of the queued
> messages, send does one linear search of the waiting receivers. Both
> lists should be short.
>

Yes, they _should_ but don't have to be.
It only matters if you ask for specific priority/type of message.

> Could you split your patch into the functional changes and cleanup?
> (const, size_t, you move a few definitions around, whitespace cleanups)
>
> I don't like the deep integration of the mqueues into the sysv code - is
> that really needed?
> For example, you add the mqueue messages into the sysv array, and then
> add lots of code to separate both again - IPC_RMID cannot remove posix
> queues, etc.
>
> Have you tried to separate both further? Create a ramfs like filesystem,
> store msg_queue in the inode structure?
> The ids array is only for sysv, only the actual message handling is
> shared between sysv msg and posix mqueues
>

I plan to separate the interfaces and just share the message stuff.
But time was getting short. :)

2002-10-27 21:53:09

by Peter Waechtler

[permalink] [raw]
Subject: Re: [PATCH] unified SysV and Posix mqueues as FS

Alexander Viro schrieb:
>
> On Sun, 27 Oct 2002, Peter Waechtler wrote:
>
> > I applied the patch from Jakub against 2.5.44
> > There are still open issues but it's important to get this in before
> > feature freeze.
> >
> > While you can implement Posix mqueues in userland (Irix is doing this
> > with fcntl(fd,F_SETLKW,) and shmem) a kernel implementation has some advantages:
>
> *thud*
>
> ioctls on _directories_, of all things?

Parden? Where are directories used?
create a file, give it a size, mmap it and serialize access to it with locks.
That's all.

2002-10-28 01:48:02

by Alexander Viro

[permalink] [raw]
Subject: Re: [PATCH] unified SysV and Posix mqueues as FS



On Sun, 27 Oct 2002, Peter Waechtler wrote:

> Alexander Viro schrieb:
> >
> > On Sun, 27 Oct 2002, Peter Waechtler wrote:
> >
> > > I applied the patch from Jakub against 2.5.44
> > > There are still open issues but it's important to get this in before
> > > feature freeze.
> > >
> > > While you can implement Posix mqueues in userland (Irix is doing this
> > > with fcntl(fd,F_SETLKW,) and shmem) a kernel implementation has some advantages:
> >
> > *thud*
> >
> > ioctls on _directories_, of all things?
>
> Parden? Where are directories used?
> create a file, give it a size, mmap it and serialize access to it with locks.
> That's all.

Check your file_operations for root directory.

2002-10-28 10:35:35

by Peter Waechtler

[permalink] [raw]
Subject: Re: [PATCH] unified SysV and Posix mqueues as FS

Alexander Viro schrieb:
>
> On Sun, 27 Oct 2002, Peter Waechtler wrote:
>
> > Alexander Viro schrieb:
> > >
> > > On Sun, 27 Oct 2002, Peter Waechtler wrote:
> > >
> > > > I applied the patch from Jakub against 2.5.44
> > > > There are still open issues but it's important to get this in before
> > > > feature freeze.
> > > >
> > > > While you can implement Posix mqueues in userland (Irix is doing this
> > > > with fcntl(fd,F_SETLKW,) and shmem) a kernel implementation has some advantages:
> > >
> > > *thud*
> > >
> > > ioctls on _directories_, of all things?
> >
> > Parden? Where are directories used?
> > create a file, give it a size, mmap it and serialize access to it with locks.
> > That's all.
>
> Check your file_operations for root directory.

Umh, misunderstanding: I thought you commented on the Irix part.
And it's not my patch, it's Jakub Jelineks patch I applied against current 2.5

I don't even see an advantage on having them as filesystem - I just think
that the SysV and Posix mqueues should share most of the code.

2002-10-28 10:40:18

by Peter Waechtler

[permalink] [raw]
Subject: Re: [PATCH] unified SysV and Posix mqueues as FS

Manfred Spraul schrieb:
>
> Peter Waechtler wrote:
>
> >I plan to separate the interfaces and just share the message stuff.
> >But time was getting short. :)
> >
>
> Ok, you plan to rewrite the patch entirely, and what you have posted is
> a placeholder.
>
> How would the result look like?
> I'm thinking about
> - real syscalls
> - pipefs like filesystem stub, kern-only mounted, not visible for normal
> fs operations.
> - not using the sysv array
>
> Could you check the sus standard if that is permitted? A child would
> inherit the mqueue on fork().
>

Yes, I will check that - but I'm afraid of submitting too late.. :(

> For the locking stuff, the patch should probably depend on the sysv rcu
> patch, it cleans up locking a bit.
>

BTW, please have a look at ipc/util.c - the spinlock is held once
ipc_addid() is called and will deadlock when the array has to grow...

2002-10-28 21:29:37

by Peter Waechtler

[permalink] [raw]
Subject: Re: [PATCH] unified SysV and Posix mqueues as FS

Manfred Spraul schrieb:
>
> Peter Waechtler wrote:
>
> >I plan to separate the interfaces and just share the message stuff.
> >But time was getting short. :)
> >
>
> Ok, you plan to rewrite the patch entirely, and what you have posted is
> a placeholder.
>
> How would the result look like?
> I'm thinking about
> - real syscalls
> - pipefs like filesystem stub, kern-only mounted, not visible for normal
> fs operations.
> - not using the sysv array
>
> Could you check the sus standard if that is permitted? A child would
> inherit the mqueue on fork().
>

SuSv3:
[MSG] The child process shall have its own copy of the message queue
descriptors of the parent. Each of the message descriptors of the child
shall refer to the same open message queue description as the
corresponding message descriptor of the parent.

> For the locking stuff, the patch should probably depend on the sysv rcu
> patch, it cleans up locking a bit.
>

Well, I am a victim of "information hiding" ;-)

msq_lock(id) does not lock a queue, it locks/unlocks the whole array.
Forget my post about a deadlock in ipc_addid()

2002-10-28 21:37:14

by Manfred Spraul

[permalink] [raw]
Subject: Re: [PATCH] unified SysV and Posix mqueues as FS

Peter Waechtler wrote:

>>For the locking stuff, the patch should probably depend on the sysv rcu
>>patch, it cleans up locking a bit.
>>
>>
>>
>
>Well, I am a victim of "information hiding" ;-)
>
>
That's not information hiding - there were 3 different locking
implementations: first one with a per-queue spinlock that didn't support
growing the number of queues at runtime, then one with a global rw lock
and a per-queue spinlock, and the simple, global spinlock version that's
part of 2.3.x-2.5.4x.

>msq_lock(id) does not lock a queue, it locks/unlocks the whole array.
>Forget my post about a deadlock in ipc_addid()
>
>
Which deadlock did you see? With the RCU ipc patch [part of 2.5.44-mm6],
msg_lock again locks the queue, not the whole array. Calls to
ipc_addid() with the msq spinlock aquired are not permitted.

--
Manfred

2002-10-28 22:15:55

by Bill Davidsen

[permalink] [raw]
Subject: Re: [PATCH] unified SysV and Posix mqueues as FS

diff -Nur -X dontdiff vanilla-2.5.44/Documentation/ioctl-number.txt linux-2.5.44/Documentation/ioctl-number.txt
--- vanilla-2.5.44/Documentation/ioctl-number.txt 2002-04-20 18:22:08.000000000 +0200
+++ linux-2.5.44/Documentation/ioctl-number.txt 2002-10-27 15:33:23.000000000 +0100
@@ -186,6 +186,7 @@
0xB0 all RATIO devices in development:
<mailto:[email protected]>
0xB1 00-1F PPPoX <mailto:[email protected]>
+0xB2 00-1F linux/mqueue.h
0xCB 00-1F CBM serial IEC bus in development:
<mailto:[email protected]>

diff -Nur -X dontdiff vanilla-2.5.44/include/linux/mqueue.h linux-2.5.44/include/linux/mqueue.h
--- vanilla-2.5.44/include/linux/mqueue.h 1970-01-01 01:00:00.000000000 +0100
+++ linux-2.5.44/include/linux/mqueue.h 2002-10-23 14:48:31.000000000 +0200
@@ -0,0 +1,37 @@
+#ifndef _LINUX_MQUEUE_H
+#define _LINUX_MQUEUE_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+#include <asm/siginfo.h>
+
+struct mq_attr {
+ long mq_flags; /* O_NONBLOCK or 0 */
+ long mq_maxmsg; /* Maximum number of messages in the queue */
+ long mq_msgsize; /* Maximum size of one message in bytes */
+ long mq_curmsgs; /* Current number of messages in the queue */
+ long __pad[2];
+};
+
+struct mq_open {
+ char *mq_name; /* pathname */
+ int mq_oflag; /* flags */
+ mode_t mq_mode; /* mode */
+ struct mq_attr mq_attr; /* attributes */
+};
+
+struct mq_sndrcv {
+ size_t mq_len; /* message length */
+ long mq_type; /* message type */
+ char *mq_buf; /* message buffer */
+};
+
+#define MQ_OPEN _IOW(0xB2, 0, struct mq_open)
+#define MQ_GETATTR _IOR(0xB2, 1, struct mq_attr)
+#define MQ_SEND _IOW(0xB2, 2, struct mq_sndrcv)
+#define MQ_RECEIVE _IOWR(0xB2, 3, struct mq_sndrcv)
+#define MQ_NOTIFY _IOW(0xB2, 4, struct sigevent)
+
+#define MQ_DEFAULT_TYPE 0x7FFFFFFE
+
+#endif /* _LINUX_MQUEUE_H */
diff -Nur -X dontdiff vanilla-2.5.44/include/linux/msg.h linux-2.5.44/include/linux/msg.h
--- vanilla-2.5.44/include/linux/msg.h 2002-08-10 00:09:02.000000000 +0200
+++ linux-2.5.44/include/linux/msg.h 2002-10-25 20:06:47.000000000 +0200
@@ -2,6 +2,7 @@
#define _LINUX_MSG_H

#include <linux/ipc.h>
+#include <linux/signal.h>

/* ipcs ctl commands */
#define MSG_STAT 11
@@ -49,7 +50,7 @@
unsigned short msgseg;
};

-#define MSGMNI 16 /* <= IPCMNI */ /* max # of msg queue identifiers */
+#define MSGMNI 128 /* <= IPCMNI */ /* max # of msg queue identifiers */
#define MSGMAX 8192 /* <= INT_MAX */ /* max size of message (bytes) */
#define MSGMNB 16384 /* <= INT_MAX */ /* default max size of a message queue */

@@ -63,33 +64,88 @@

#ifdef __KERNEL__

+#define SEARCH_ANY 1
+#define SEARCH_EQUAL 2
+#define SEARCH_NOTEQUAL 3
+#define SEARCH_LESSEQUAL 4
+
+#define DATALEN_MSG (PAGE_SIZE-sizeof(struct msg_msg))
+#define DATALEN_SEG (PAGE_SIZE-sizeof(struct msg_msgseg))
+
+/* used by sys_msgctl(,IPC_SET,) */
+struct msq_setbuf {
+ unsigned long qbytes;
+ uid_t uid;
+ gid_t gid;
+ mode_t mode;
+};
+
+/* one msg_receiver structure for each sleeping receiver */
+struct msg_receiver {
+ struct list_head r_list;
+ struct task_struct *r_tsk;
+
+ int r_mode;
+ long r_msgtype;
+ long r_maxsize;
+
+ struct msg_msg* volatile r_msg;
+};
+
+/* one msg_sender for each sleeping sender */
+struct msg_sender {
+ struct list_head list;
+ struct task_struct *tsk;
+};
+
+struct msg_msgseg {
+ struct msg_msgseg *next;
+ /* the next part of the message follows immediately */
+};
+
/* one msg_msg structure for each message */
struct msg_msg {
struct list_head m_list;
long m_type;
int m_ts; /* message text size */
- struct msg_msgseg* next;
+ struct msg_msgseg *next;
/* the actual message follows immediately */
};

-#define DATALEN_MSG (PAGE_SIZE-sizeof(struct msg_msg))
-#define DATALEN_SEG (PAGE_SIZE-sizeof(struct msg_msgseg))
+struct mq_link {
+ struct list_head link;
+ struct task_struct *tsk;
+ struct mq_attr *attr;
+};

/* one msq_queue structure for each present queue on the system */
struct msg_queue {
struct kern_ipc_perm q_perm;
- time_t q_stime; /* last msgsnd time */
- time_t q_rtime; /* last msgrcv time */
- time_t q_ctime; /* last change time */
- unsigned long q_cbytes; /* current number of bytes on queue */
- unsigned long q_qnum; /* number of messages in queue */
- unsigned long q_qbytes; /* max number of bytes on queue */
- pid_t q_lspid; /* pid of last msgsnd */
- pid_t q_lrpid; /* last receive pid */
+#define q_flags q_perm.mode
+ time_t q_stime; /* last msgsnd time */
+ time_t q_rtime; /* last msgrcv time */
+ time_t q_ctime; /* last change time */
+ unsigned long q_cbytes; /* current number of bytes on queue */
+ unsigned long q_qnum; /* number of messages in queue */
+ unsigned long q_qbytes; /* max number of bytes on queue */
+
+ unsigned int q_msgsize; /* max number of bytes for one message */
+ unsigned int q_maxmsg; /* max number of outstanding messages */
+
+ pid_t q_lspid; /* pid of last msgsnd */
+ pid_t q_lrpid; /* last receive pid */
+
+ int q_signo; /* signal to be sent if empty queue with no waiting
+ receivers should be sent */
+ pid_t q_pid; /* to which pid */
+ sigval_t q_sigval; /* which value to pass */
+ int id;

struct list_head q_messages;
struct list_head q_receivers;
struct list_head q_senders;
+ unsigned int q_namelen;
+ unsigned char q_name[0];
};

asmlinkage long sys_msgget (key_t key, int msgflg);
diff -Nur -X dontdiff vanilla-2.5.44/ipc/msg.c linux-2.5.44/ipc/msg.c
--- vanilla-2.5.44/ipc/msg.c 2002-10-13 23:03:57.000000000 +0200
+++ linux-2.5.44/ipc/msg.c 2002-10-27 15:42:12.000000000 +0100
@@ -13,15 +13,23 @@
* mostly rewritten, threaded and wake-one semantics added
* MSGMAX limit removed, sysctl's added
* (c) 1999 Manfred Spraul <[email protected]>
+ *
+ * make it a filesystem (based on Christoph Rohland's work on shmfs),
+ * (c) 2000 Jakub Jelinek <[email protected]>
+ * adapted and cleaned up for 2.5.44 by Peter W?chtler <[email protected]>
*/

#include <linux/config.h>
#include <linux/slab.h>
-#include <linux/msg.h>
#include <linux/spinlock.h>
#include <linux/init.h>
+#include <linux/fs.h>
#include <linux/proc_fs.h>
#include <linux/list.h>
+#include <linux/signal.h>
+#include <linux/mqueue.h>
+#include <linux/msg.h>
+#include <linux/namei.h>
#include <linux/security.h>
#include <asm/uaccess.h>
#include "util.h"
@@ -30,34 +38,87 @@
int msg_ctlmax = MSGMAX;
int msg_ctlmnb = MSGMNB;
int msg_ctlmni = MSGMNI;
+static int msg_mode;
+
+#define MSG_FS_MAGIC 822419456
+
+#define MSG_NAME_LEN NAME_MAX
+#define MSG_FMT ".IPC_%08x"
+#define MSG_FMT_LEN 13
+
+#define MSG_UNLK 0010000 /* filename is unlinked */
+#define MSG_SYSV 0020000 /* It is a SYSV message queue */
+
+static struct super_block * msg_sb;
+
+static struct super_block *msg_read_super(struct file_system_type *,int , char *, void *);
+static void msg_put_super(struct super_block *);
+static int msg_remount_fs(struct super_block *, int *, char *);
+static void msg_fill_inode(struct inode *);
+static int msg_statfs(struct super_block *, struct statfs *);
+static int msg_create(struct inode *,struct dentry *,int);
+static struct dentry *msg_lookup(struct inode *,struct dentry *);
+static int msg_unlink(struct inode *,struct dentry *);
+static int msg_setattr(struct dentry *dent, struct iattr *attr);
+static void msg_delete(struct inode *);
+static int msg_readdir(struct file *, void *, filldir_t);
+static int msg_remove_name(int id);
+static int msg_ioctl(struct inode *, struct file *, unsigned int, unsigned long);
+static int msg_root_ioctl(struct inode *, struct file *, unsigned int, unsigned long);
+static ssize_t msg_read(struct file *, char *, size_t, loff_t *);
+static ssize_t msg_write(struct file *, const char *, size_t, loff_t *);
+/* FIXME: Support poll on mq
+static unsigned int msg_poll(struct file *, poll_table *);
+ */
+static ssize_t msg_send (struct inode *, struct file *, const char *, size_t, long);
+static ssize_t msg_receive (struct inode *, struct file *, char *, size_t, long *);
+static int msg_flush (struct file *);
+static int msg_release (struct inode *, struct file *);

-/* one msg_receiver structure for each sleeping receiver */
-struct msg_receiver {
- struct list_head r_list;
- struct task_struct* r_tsk;
-
- int r_mode;
- long r_msgtype;
- long r_maxsize;
+static void freeque (int id);
+static int newque (key_t key, const char *name, int namelen, struct mq_attr *attr, int msgflg);

- struct msg_msg* volatile r_msg;
+static struct file_system_type msg_fs_type = {
+ .name = "msgfs",
+ .get_sb = msg_read_super,
+ .kill_sb = kill_litter_super,
};

-/* one msg_sender for each sleeping sender */
-struct msg_sender {
- struct list_head list;
- struct task_struct* tsk;
+static struct super_operations msg_sops = {
+ .read_inode= msg_fill_inode,
+ .delete_inode= msg_delete,
+ .put_super= msg_put_super,
+ .statfs= msg_statfs,
+ .remount_fs= msg_remount_fs,
};

-struct msg_msgseg {
- struct msg_msgseg* next;
- /* the next part of the message follows immediately */
+static struct file_operations msg_root_operations = {
+ .readdir= msg_readdir,
+ .ioctl= msg_root_ioctl,
};

-#define SEARCH_ANY 1
-#define SEARCH_EQUAL 2
-#define SEARCH_NOTEQUAL 3
-#define SEARCH_LESSEQUAL 4
+static struct inode_operations msg_root_inode_operations = {
+ .create= msg_create,
+ .lookup= msg_lookup,
+ .unlink= msg_unlink,
+};
+
+static struct file_operations msg_file_operations = {
+ .read= msg_read,
+ .write= msg_write,
+ .ioctl= msg_ioctl,
+/* FIXME: Support poll on mq *
+ poll= msg_poll,
+ */
+ .flush= msg_flush,
+ .release= msg_release,
+};
+
+static struct inode_operations msg_inode_operations = {
+ .setattr= msg_setattr,
+};
+
+static LIST_HEAD(mq_open_links);

static atomic_t msg_bytes = ATOMIC_INIT(0);
static atomic_t msg_hdrs = ATOMIC_INIT(0);
@@ -67,33 +128,529 @@
#define msg_lock(id) ((struct msg_queue*)ipc_lock(&msg_ids,id))
#define msg_unlock(id) ipc_unlock(&msg_ids,id)
#define msg_rmid(id) ((struct msg_queue*)ipc_rmid(&msg_ids,id))
-#define msg_checkid(msq, msgid) \
- ipc_checkid(&msg_ids,&msq->q_perm,msgid)
-#define msg_buildid(id, seq) \
- ipc_buildid(&msg_ids, id, seq)
+#define msg_get(id) ((struct msg_queue*)ipc_get(&msg_ids,id))
+#define msg_buildid(id, seq) ipc_buildid(&msg_ids, id, seq)

-static void freeque (int id);
-static int newque (key_t key, int msgflg);
#ifdef CONFIG_PROC_FS
static int sysvipc_msg_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data);
#endif

void __init msg_init (void)
{
+ struct vfsmount *res;
ipc_init_ids(&msg_ids,msg_ctlmni);
-
+ register_filesystem (&msg_fs_type);
+ res = kern_mount(&msg_fs_type);
+ if (IS_ERR(res)) {
+ unregister_filesystem(&msg_fs_type);
+ return;
+ }
#ifdef CONFIG_PROC_FS
create_proc_read_entry("sysvipc/msg", 0, 0, sysvipc_msg_read_proc, NULL);
#endif
}

-static int newque (key_t key, int msgflg)
+static int msg_parse_options(char *options)
+{
+ int blocks = msg_ctlmnb * msg_ctlmni;
+ int inodes = msg_ctlmni;
+ umode_t mode = msg_mode;
+ char *this_char, *value;
+
+ this_char = NULL;
+ if ( options )
+ this_char = strsep(&options,",");
+ for ( ; this_char; this_char = strsep(&options,",")) {
+ if ((value = strchr(this_char,'=')) != NULL)
+ *value++ = 0;
+ if (!strcmp(this_char,"nr_blocks")) {
+ if (!value || !*value)
+ return 1;
+ blocks = simple_strtoul(value,&value,0);
+ if (*value)
+ return 1;
+ }
+ else if (!strcmp(this_char,"nr_inodes")) {
+ if (!value || !*value)
+ return 1;
+ inodes = simple_strtoul(value,&value,0);
+ if (*value)
+ return 1;
+ }
+ else if (!strcmp(this_char,"mode")) {
+ if (!value || !*value)
+ return 1;
+ mode = simple_strtoul(value,&value,8);
+ if (*value)
+ return 1;
+ }
+ else
+ return 1;
+ }
+/* FIXME *
+ msg_ctlmni = inodes;
+ msg_ctlmnb = inodes ? blocks / inodes : 0;
+ */
+ msg_mode = mode;
+
+ return 0;
+}
+
+static int
+msg_fill_super (struct super_block *sb, void *data, int silent)
+{
+ struct inode * root_inode;
+
+/* FIXME *
+ msg_ctlmnb = MSGMNB;
+ msg_ctlmni = MSGMNI;
+ */
+ msg_mode = S_IRWXUGO | S_ISVTX;
+ if (msg_parse_options (data)) {
+ printk(KERN_ERR "msg fs invalid option\n");
+ return -EINVAL;
+ }
+
+ sb->s_blocksize = PAGE_SIZE;
+ sb->s_blocksize_bits = PAGE_SHIFT;
+ sb->s_magic = MSG_FS_MAGIC;
+ sb->s_op = &msg_sops;
+ root_inode = iget (sb, SEQ_MULTIPLIER);
+ if (!root_inode)
+ return -ENOMEM;
+ root_inode->i_op = &msg_root_inode_operations;
+ root_inode->i_sb = sb;
+ root_inode->i_nlink = 2;
+ root_inode->i_mode = S_IFDIR | msg_mode;
+ sb->s_root = d_alloc_root(root_inode);
+ if (!sb->s_root)
+ goto out_no_root;
+ msg_sb = sb;
+ return 0;
+
+out_no_root:
+ printk(KERN_ERR "msg_fill_super: get root inode failed\n");
+ iput(root_inode);
+ return -ENOMEM;
+}
+
+static struct super_block *msg_read_super(struct file_system_type *fs_type,
+ int flags, char *dev_name, void *data)
+{
+ return get_sb_single (fs_type, flags, data, msg_fill_super);
+}
+
+static int msg_remount_fs (struct super_block *sb, int *flags, char *data)
+{
+ if (msg_parse_options (data))
+ return -EINVAL;
+ return 0;
+}
+
+static inline int msg_checkid(struct msg_queue *msq, int id)
+{
+ if (!(msq->q_flags & MSG_SYSV))
+ return -EINVAL;
+ if (ipc_checkid(&msg_ids,&msq->q_perm,id))
+ return -EIDRM;
+ return 0;
+}
+
+static void msg_put_super(struct super_block *sb)
+{
+ int i;
+ struct msg_queue *msq;
+
+ down(&msg_ids.sem);
+ for(i = 0; i <= msg_ids.max_id; i++) {
+ if (!(msq = msg_lock (i)))
+ continue;
+ freeque(i);
+ }
+ dput (sb->s_root);
+ up(&msg_ids.sem);
+}
+
+static int msg_statfs(struct super_block *sb, struct statfs *buf)
+{
+ buf->f_type = MSG_FS_MAGIC;
+ buf->f_bsize = PAGE_SIZE;
+ buf->f_blocks = (msg_ctlmnb * msg_ctlmni) >> PAGE_SHIFT;
+ buf->f_bavail = buf->f_bfree = buf->f_blocks - (atomic_read(&msg_bytes) >> PAGE_SHIFT);
+ buf->f_files = msg_ctlmni;
+ buf->f_ffree = msg_ctlmni - atomic_read(&msg_hdrs);
+ buf->f_namelen = MSG_NAME_LEN;
+ return 0;
+}
+
+static void msg_fill_inode(struct inode * inode)
+{
+ int id;
+ struct msg_queue *msq;
+ id = inode->i_ino;
+ inode->i_op = NULL;
+ inode->i_mode = 0;
+
+ if (id < SEQ_MULTIPLIER) {
+ if (!(msq = msg_lock (id)))
+ return;
+ inode->i_mode = (msq->q_flags & S_IRWXUGO) | S_IFIFO;
+ inode->i_uid = msq->q_perm.uid;
+ inode->i_gid = msq->q_perm.gid;
+ inode->i_size = msq->q_cbytes;
+ inode->i_mtime = msq->q_stime;
+ inode->i_atime = msq->q_stime > msq->q_rtime ? msq->q_stime : msq->q_rtime;
+ inode->i_ctime = msq->q_ctime;
+ msg_unlock (id);
+ inode->i_op = &msg_inode_operations;
+ inode->i_fop = &msg_file_operations;
+ return;
+ }
+ inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+ inode->i_op = &msg_root_inode_operations;
+ inode->i_fop = &msg_root_operations;
+ inode->i_sb = msg_sb;
+ inode->i_nlink = 2;
+ inode->i_mode = S_IFDIR | msg_mode;
+ inode->i_uid = inode->i_gid = 0;
+}
+
+static int msg_create (struct inode *dir, struct dentry *dent, int mode)
+{
+ int id, err;
+ struct inode *inode;
+ struct mq_attr attr, *p;
+ struct list_head *tmp;
+
+ attr.mq_maxmsg = 32;
+ attr.mq_msgsize = 64;
+ p = &attr;
+
+ down(&msg_ids.sem);
+ list_for_each(tmp, &mq_open_links) {
+ struct mq_link *l = list_entry(tmp, struct mq_link, link);
+ if (l->tsk == current) {
+ p = l->attr;
+ break;
+ }
+ }
+ err = id = newque (IPC_PRIVATE, dent->d_name.name, dent->d_name.len, p, mode);
+ if (err < 0)
+ goto out;
+
+ inode = iget (msg_sb, id % SEQ_MULTIPLIER);
+ if (!inode){
+ err = -ENOMEM;
+ goto out;
+ }
+ err = 0;
+ down (&inode->i_sem);
+ inode->i_mode = (mode & S_IRWXUGO) | S_IFIFO;
+ inode->i_op = &msg_inode_operations;
+ d_instantiate(dent, inode);
+ up (&inode->i_sem);
+
+out:
+ up(&msg_ids.sem);
+ return err;
+}
+
+static int msg_readdir (struct file *filp, void *dirent, filldir_t filldir)
+{
+ struct inode * inode = filp->f_dentry->d_inode;
+ struct msg_queue *msq;
+ off_t nr;
+
+ nr = filp->f_pos;
+
+ switch(nr)
+ {
+ case 0:
+ if (filldir(dirent, ".", 1, nr, inode->i_ino, DT_DIR) < 0)
+ return 0;
+ filp->f_pos = ++nr;
+ /* fall through */
+ case 1:
+ if (filldir(dirent, "..", 2, nr, inode->i_ino, DT_DIR) < 0)
+ return 0;
+ filp->f_pos = ++nr;
+ /* fall through */
+ default:
+ down(&msg_ids.sem);
+ for (; nr-2 <= msg_ids.max_id; nr++) {
+ if (!(msq = msg_get (nr-2)))
+ continue;
+ if (msq->q_flags & MSG_UNLK)
+ continue;
+ if (filldir(dirent, msq->q_name, msq->q_namelen, nr, nr, DT_FIFO) < 0)
+ break;;
+ }
+ filp->f_pos = nr;
+ up(&msg_ids.sem);
+ break;
+ }
+
+ UPDATE_ATIME(inode);
+ return 0;
+}
+
+static struct dentry *msg_lookup (struct inode *dir, struct dentry *dent)
+{
+ int i, err = 0;
+ struct msg_queue* msq;
+ struct inode *inode = NULL;
+
+ if (dent->d_name.len > MSG_NAME_LEN)
+ return ERR_PTR(-ENAMETOOLONG);
+
+ down(&msg_ids.sem);
+ for(i = 0; i <= msg_ids.max_id; i++) {
+ if (!(msq = msg_lock(i)))
+ continue;
+ if (!(msq->q_flags & MSG_UNLK) &&
+ dent->d_name.len == msq->q_namelen &&
+ strncmp(dent->d_name.name, msq->q_name, msq->q_namelen) == 0)
+ goto found;
+ msg_unlock(i);
+ }
+
+ /*
+ * prevent the reserved names as negative dentries.
+ * This also prevents object creation through the filesystem
+ */
+ if (dent->d_name.len == MSG_FMT_LEN &&
+ memcmp (MSG_FMT, dent->d_name.name, MSG_FMT_LEN - 8) == 0)
+ err = -EINVAL; /* EINVAL to give IPC_RMID the right error */
+
+ goto out;
+
+found:
+ msg_unlock(i);
+ inode = iget(dir->i_sb, i);
+
+ if (!inode)
+ err = -EACCES;
+out:
+ if (err == 0)
+ d_add (dent, inode);
+ up (&msg_ids.sem);
+ return ERR_PTR(err);
+}
+
+static inline int msg_do_unlink (struct inode *dir, struct dentry *dent, int sysv)
+{
+ struct inode * inode = dent->d_inode;
+ struct msg_queue *msq;
+
+ down (&msg_ids.sem);
+ if (!(msq = msg_lock (inode->i_ino)))
+ BUG();
+ if (sysv) {
+ int ret = 0;
+
+ if (!(msq->q_flags & MSG_SYSV))
+ ret = -EINVAL;
+ else if (current->euid != msq->q_perm.cuid &&
+ current->euid != msq->q_perm.uid && !capable(CAP_SYS_ADMIN))
+ ret = -EPERM;
+ if (ret) {
+ msg_unlock (inode->i_ino);
+ up (&msg_ids.sem);
+ return ret;
+ }
+ }
+ msq->q_flags |= MSG_UNLK;
+ msq->q_perm.key = IPC_PRIVATE; /* Do not find it any more */
+ msg_unlock (inode->i_ino);
+ up (&msg_ids.sem);
+ inode->i_nlink -= 1;
+ /*
+ * If it's a reserved name we have to drop the dentry instead
+ * of creating a negative dentry
+ */
+ if (dent->d_name.len == MSG_FMT_LEN &&
+ memcmp (MSG_FMT, dent->d_name.name, MSG_FMT_LEN - 8) == 0)
+ d_drop (dent);
+ return 0;
+}
+
+static int msg_unlink (struct inode *dir, struct dentry *dent)
+{
+ return msg_do_unlink (dir, dent, 0);
+}
+static int msg_setattr (struct dentry *dentry, struct iattr *attr)
+{
+ int error;
+ struct inode *inode = dentry->d_inode;
+ struct msg_queue *msq;
+
+ error = inode_change_ok(inode, attr);
+ if (error)
+ return error;
+ if (attr->ia_valid & ATTR_SIZE)
+ return -EINVAL;
+
+ if (attr->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID)) {
+ if (!(msq = msg_lock(inode->i_ino)))
+ BUG();
+ if (attr->ia_valid & ATTR_MODE)
+ msq->q_flags = (msq->q_flags & ~S_IRWXUGO)
+ | (S_IRWXUGO & attr->ia_mode);
+ if (attr->ia_valid & ATTR_UID)
+ msq->q_perm.uid = attr->ia_uid;
+ if (attr->ia_valid & ATTR_GID)
+ msq->q_perm.gid = attr->ia_gid;
+ msq->q_ctime = attr->ia_ctime;
+ msg_unlock (inode->i_ino);
+ }
+
+ inode_setattr(inode, attr);
+ return error;
+}
+
+static int msg_root_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, unsigned long arg)
+{
+ struct mq_open o;
+ struct mq_link link;
+ int ret;
+
+ if (cmd != MQ_OPEN)
+ return -EINVAL;
+ ret = -EFAULT;
+ if (copy_from_user(&o, (struct mq_open *)arg, sizeof(struct mq_open)))
+ goto out;
+ ret = -EINVAL;
+ if ((unsigned long)o.mq_attr.mq_msgsize > msg_ctlmnb ||
+ (unsigned long)o.mq_attr.mq_maxmsg > msg_ctlmnb ||
+ o.mq_attr.mq_msgsize * o.mq_attr.mq_maxmsg > msg_ctlmnb)
+ goto out;
+ link.attr = &o.mq_attr;
+ link.tsk = current;
+ down(&msg_ids.sem);
+ list_add(&link.link, &mq_open_links);
+ up(&msg_ids.sem);
+ /* FIXME: Shouldn't we check here whether mq_name is really a file within the msg filesystem?
+ Otherwise people tracing the open(2) syscall might miss this place... */
+ ret = sys_open(o.mq_name, o.mq_oflag, o.mq_mode);
+ down(&msg_ids.sem);
+ list_del(&link.link);
+ up(&msg_ids.sem);
+out:
+ return ret;
+}
+
+static int msg_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, unsigned long arg)
+{
+ int ret = -EINVAL;
+ struct msg_queue *msq;
+ struct mq_sndrcv sr;
+
+ switch (cmd) {
+ case MQ_GETATTR: {
+ struct mq_attr attr;
+ memset(&attr, 0, sizeof(attr));
+ msq = msg_lock (inode->i_ino);
+ if (msq == NULL)
+ BUG();
+ attr.mq_maxmsg = msq->q_maxmsg;
+ attr.mq_msgsize = msq->q_msgsize;
+ attr.mq_curmsgs = msq->q_qnum;
+ attr.mq_flags = filp->f_flags & O_NONBLOCK;
+ msg_unlock (inode->i_ino);
+ ret = copy_to_user((struct mq_attr *)arg, &attr, sizeof(attr)) ? -EFAULT : 0;
+ break;
+ }
+ case MQ_SEND:
+ ret = -EBADF;
+ if (!(filp->f_mode & FMODE_WRITE))
+ break;
+ ret = -EFAULT;
+ if (copy_from_user(&sr, (struct mq_sndrcv *)arg, sizeof(sr)))
+ break;
+ ret = -EINVAL;
+ if (sr.mq_type <= 0)
+ break;
+ ret = msg_send (inode, filp, sr.mq_buf, sr.mq_len, sr.mq_type);
+ break;
+ case MQ_RECEIVE:
+ ret = -EBADF;
+ if (!(filp->f_mode & FMODE_READ))
+ break;
+ ret = -EFAULT;
+ if (copy_from_user(&sr, (struct mq_sndrcv *)arg, sizeof(sr)))
+ break;
+ ret = msg_receive (inode, filp, sr.mq_buf, sr.mq_len, &sr.mq_type);
+ if (!ret && put_user (sr.mq_type, &((struct mq_sndrcv *)arg)->mq_type))
+ ret = -EFAULT;
+ break;
+ case MQ_NOTIFY: {
+ struct sigevent sev;
+ struct msg_queue *msg;
+ ret = -EFAULT;
+ if (copy_from_user(&sev, (struct sigevent *)arg, sizeof(sev)))
+ break;
+ ret = -EINVAL;
+ if (sev.sigev_notify != SIGEV_SIGNAL && sev.sigev_notify != SIGEV_NONE)
+ break;
+ if (sev.sigev_signo <= 0 || sev.sigev_signo > _NSIG)
+ break;
+ msg = msg_lock(inode->i_ino);
+ if (!msg) BUG();
+ ret = 0;
+ if (msg->q_signo)
+ ret = -EBUSY;
+ else if (sev.sigev_notify == SIGEV_SIGNAL) {
+ msg->q_signo = sev.sigev_signo;
+ msg->q_sigval = sev.sigev_value;
+ } else
+ msg->q_signo = 0;
+ msg_unlock(inode->i_ino);
+ }
+ default:
+ break;
+ }
+ return ret;
+}
+
+static ssize_t msg_write(struct file * file,
+ const char * buf, size_t count, loff_t *ppos)
+{
+ int ret = msg_send(file->f_dentry->d_inode, file, buf, count, MQ_DEFAULT_TYPE);
+ return ret ?: count;
+}
+
+static ssize_t msg_read(struct file * file,
+ char * buf, size_t count, loff_t *ppos)
+{
+ return msg_receive(file->f_dentry->d_inode, file, buf, count, NULL);
+}
+
+static int msg_release (struct inode *ino, struct file *filp)
+{
+ struct msg_queue *msq = msg_lock(ino->i_ino);
+ if (!msq) BUG();
+ if (msq->q_signo && msq->q_pid == current->pid)
+ msq->q_signo = 0;
+ msg_unlock(ino->i_ino);
+ return 0;
+}
+
+static int msg_flush (struct file *filp)
+{
+ return msg_release(filp->f_dentry->d_inode, filp);
+}
+
+static int newque (key_t key, const char *name, int namelen,
+ struct mq_attr *attr, int msgflg)
{
int id;
int retval;
struct msg_queue *msq;

- msq = (struct msg_queue *) kmalloc (sizeof (*msq), GFP_KERNEL);
+ if (namelen > MSG_NAME_LEN)
+ return -ENAMETOOLONG;
+ msq = (struct msg_queue *) kmalloc (sizeof (*msq) + namelen, GFP_KERNEL);
+
if (!msq)
return -ENOMEM;

@@ -113,18 +670,94 @@
kfree(msq);
return -ENOSPC;
}
+ msq->q_flags = (msgflg & S_IRWXUGO);
+ msq->q_perm.key = key;

msq->q_stime = msq->q_rtime = 0;
msq->q_ctime = CURRENT_TIME;
msq->q_cbytes = msq->q_qnum = 0;
msq->q_qbytes = msg_ctlmnb;
msq->q_lspid = msq->q_lrpid = 0;
+ msq->q_signo = 0;
+
INIT_LIST_HEAD(&msq->q_messages);
INIT_LIST_HEAD(&msq->q_receivers);
INIT_LIST_HEAD(&msq->q_senders);
+ msq->id = msg_buildid(id, msq->q_perm.seq);
+ if (name) {
+ msq->q_maxmsg = attr->mq_maxmsg;
+ msq->q_msgsize = attr->mq_msgsize;
+ msq->q_qbytes = msq->q_maxmsg * msq->q_msgsize;
+ msq->q_namelen = namelen;
+ memcpy(msq->q_name, name, namelen);
+ } else {
+ msq->q_qbytes = msg_ctlmnb;
+ msq->q_maxmsg = msg_ctlmnb;
+ msq->q_msgsize = msg_ctlmax;
+ msq->q_flags |= MSG_SYSV;
+ msq->q_namelen = sprintf(msq->q_name, MSG_FMT, msq->id);
+ }
msg_unlock(id);

- return msg_buildid(id,msq->q_perm.seq);
+ return msq->id;
+}
+
+/* FIXME: maybe we need lock_kernel() here */
+static void msg_delete (struct inode *ino)
+{
+ int msgid = ino->i_ino;
+ struct msg_queue *msq;
+
+ down(&msg_ids.sem);
+ msq = msg_lock(msgid);
+ if(msq==NULL)
+ BUG();
+ freeque(msgid);
+ up(&msg_ids.sem);
+ clear_inode(ino);
+}
+
+static int msg_remove_name(int msqid)
+{
+ struct dentry *dir;
+ struct dentry *dentry;
+ struct msg_queue *msq;
+ int error, id;
+ char name[MSG_FMT_LEN+1];
+
+ down(&msg_ids.sem);
+ msq = msg_lock(msqid);
+ if (msq == NULL)
+ return -EINVAL;
+ id = msq->id;
+ if (msg_checkid (msq, msqid)) {
+ msg_unlock(msqid);
+ return -EIDRM;
+ }
+ msg_unlock(msqid);
+ up(&msg_ids.sem);
+ sprintf (name, MSG_FMT, id);
+ dir=msg_sb->s_root;
+ down(&dir->d_inode->i_sem);
+ dentry = lookup_one_len(name, dir, strlen(name) );
+ error = PTR_ERR(dentry);
+ if (!IS_ERR(dentry)) {
+ /*
+ * We have to do our own unlink to prevent the vfs
+ * permission check. We'll do the SYSV IPC style check
+ * inside of msg_do_unlink when we hold msg lock and
+ * msg_ids semaphore.
+ */
+ struct inode *inode = dir->d_inode;
+ down(&inode->i_sem);
+ error = msg_do_unlink(inode, dentry, 1);
+ if (!error)
+ d_delete(dentry);
+ up(&inode->i_sem);
+ dput(dentry);
+ }
+ up(&dir->d_inode->i_sem);
+ return error;
}

static void free_msg(struct msg_msg* msg)
@@ -139,7 +772,7 @@
}
}

-static struct msg_msg* load_msg(void* src, int len)
+static struct msg_msg* load_msg(const char * src, int len)
{
struct msg_msg* msg;
struct msg_msgseg** pseg;
@@ -191,9 +824,9 @@
return ERR_PTR(err);
}

-static int store_msg(void* dest, struct msg_msg* msg, int len)
+static int store_msg(void* dest, struct msg_msg* msg, size_t len)
{
- int alen;
+ size_t alen;
struct msg_msgseg *seg;

alen = len;
@@ -213,7 +846,7 @@
return -1;
len -= alen;
dest = ((char*)dest)+alen;
- seg=seg->next;
+ seg = seg->next;
}
return 0;
}
@@ -272,7 +905,7 @@
expunge_all(msq,-EIDRM);
ss_wakeup(&msq->q_senders,1);
msg_unlock(id);
-
+
tmp = msq->q_messages.next;
while(tmp != &msq->q_messages) {
struct msg_msg* msg = list_entry(tmp,struct msg_msg,m_list);
@@ -292,12 +925,12 @@

down(&msg_ids.sem);
if (key == IPC_PRIVATE)
- ret = newque(key, msgflg);
+ ret = newque(key, NULL, MSG_FMT_LEN + 1, NULL, msgflg);
else if ((id = ipc_findkey(&msg_ids, key)) == -1) { /* key not used */
if (!(msgflg & IPC_CREAT))
ret = -ENOENT;
else
- ret = newque(key, msgflg);
+ ret = newque(key, NULL, MSG_FMT_LEN + 1, NULL, msgflg);
} else if (msgflg & IPC_CREAT && msgflg & IPC_EXCL) {
ret = -EEXIST;
} else {
@@ -358,13 +991,6 @@
}
}

-struct msq_setbuf {
- unsigned long qbytes;
- uid_t uid;
- gid_t gid;
- mode_t mode;
-};
-
static inline unsigned long copy_msqid_from_user(struct msq_setbuf *out, void *buf, int version)
{
switch(version) {
@@ -468,10 +1094,13 @@
return -EINVAL;

if(cmd == MSG_STAT) {
+ err = -EINVAL;
+ if (!(msq->q_flags & MSG_SYSV))
+ goto out_unlock;
success_return = msg_buildid(msqid, msq->q_perm.seq);
} else {
- err = -EIDRM;
- if (msg_checkid(msq,msqid))
+ err = msg_checkid(msq,msqid);
+ if (err)
goto out_unlock;
success_return = 0;
}
@@ -480,6 +1109,7 @@
goto out_unlock;

kernel_to_ipc64_perm(&msq->q_perm, &tbuf.msg_perm);
+ tbuf.msg_perm.mode &= S_IRWXUGO;
tbuf.msg_stime = msq->q_stime;
tbuf.msg_rtime = msq->q_rtime;
tbuf.msg_ctime = msq->q_ctime;
@@ -500,7 +1130,7 @@
return -EFAULT;
break;
case IPC_RMID:
- break;
+ return msg_remove_name(msqid);
default:
return -EINVAL;
}
@@ -521,12 +1151,11 @@
/* We _could_ check for CAP_CHOWN above, but we don't */
goto out_unlock_up;

- switch (cmd) {
- case IPC_SET:
- {
+ if (cmd == IPC_SET) {
if (setbuf.qbytes > msg_ctlmnb && !capable(CAP_SYS_RESOURCE))
goto out_unlock_up;
msq->q_qbytes = setbuf.qbytes;
+ msq->q_maxmsg = setbuf.qbytes;

ipcp->uid = setbuf.uid;
ipcp->gid = setbuf.gid;
@@ -542,11 +1171,6 @@
*/
ss_wakeup(&msq->q_senders,0);
msg_unlock(msqid);
- break;
- }
- case IPC_RMID:
- freeque (msqid);
- break;
}
err = 0;
out_up:
@@ -608,6 +1232,105 @@
return 0;
}

+static int msg_do_send (struct msg_queue **msqp, int msqid,
+ struct msg_msg *msg, size_t msgsz, int nowait)
+{
+ struct msg_queue *msq = *msqp;
+
+ if(msgsz + msq->q_cbytes > msq->q_qbytes ||
+ 1 + msq->q_qnum > msq->q_maxmsg) {
+ struct msg_sender s;
+
+ if(nowait)
+ return -EAGAIN;
+
+ ss_add(msq, &s);
+ msg_unlock(msqid);
+ schedule();
+ current->state = TASK_RUNNING;
+
+ *msqp = msq = msg_lock(msqid);
+ if(msq==NULL)
+ return -EIDRM;
+ ss_del(&s);
+
+ if (signal_pending(current))
+ return -EINTR;
+ return -EBUSY;
+ }
+
+ if(!pipelined_send(msq,msg)) {
+ /* noone is waiting for this message, enqueue it */
+ list_add_tail(&msg->m_list,&msq->q_messages);
+ msq->q_cbytes += msgsz;
+ msq->q_qnum++;
+ atomic_add(msgsz,&msg_bytes);
+ atomic_inc(&msg_hdrs);
+ if (msq->q_qnum == 1 && msq->q_signo) {
+ struct task_struct *p;
+ siginfo_t si;
+ read_lock(&tasklist_lock);
+ p = find_task_by_pid(msq->q_pid);
+ if (p) {
+ si.si_signo = msq->q_signo;
+ si.si_errno = 0;
+ si.si_code = SI_MESGQ;
+ si.si_pid = current->pid;
+ si.si_uid = current->euid;
+ si.si_value = msq->q_sigval;
+ if (!send_sig_info(msq->q_signo, &si, p))
+ send_sig(msq->q_signo, p, 1);
+ }
+ read_unlock(&tasklist_lock);
+ msq->q_signo = 0;
+ }
+ }
+
+ msq->q_lspid = current->pid;
+ msq->q_stime = CURRENT_TIME;
+ return 0;
+}
+
+static ssize_t msg_send (struct inode *ino, struct file *filp, const char *mtext, size_t msgsz, long mtype)
+{
+ struct msg_queue *msq;
+ struct msg_msg *msg;
+ int err = 0;
+
+ if (mtype < 1)
+ return -EINVAL;
+ msq = msg_lock(ino->i_ino);
+ if (!msq) BUG();
+ if (msgsz > msq->q_msgsize)
+ err = -EMSGSIZE;
+ msg_unlock(ino->i_ino);
+ if (err) return err;
+
+ msg = load_msg(mtext, msgsz);
+ if(IS_ERR(msg))
+ return PTR_ERR(msg);
+
+ msg->m_type = mtype;
+ msg->m_ts = msgsz;
+
+ msq = msg_lock(ino->i_ino);
+ if (!msq) BUG();
+
+ do {
+ err = -EACCES;
+ if (msq->q_flags & MSG_SYSV && ipcperms(&msq->q_perm, S_IWUGO))
+ break;
+
+ err = msg_do_send(&msq, ino->i_ino, msg, msgsz, filp->f_flags & O_NONBLOCK);
+
+ } while (err == -EBUSY);
+
+ msg_unlock(ino->i_ino);
+ if (msg && err)
+ free_msg(msg);
+ return err;
+}
+
asmlinkage long sys_msgsnd (int msqid, struct msgbuf *msgp, size_t msgsz, int msgflg)
{
struct msg_queue *msq;
@@ -633,60 +1356,23 @@
err=-EINVAL;
if(msq==NULL)
goto out_free;
-retry:
- err= -EIDRM;
- if (msg_checkid(msq,msqid))
- goto out_unlock_free;
-
- err=-EACCES;
- if (ipcperms(&msq->q_perm, S_IWUGO))
- goto out_unlock_free;
-
- if(msgsz + msq->q_cbytes > msq->q_qbytes ||
- 1 + msq->q_qnum > msq->q_qbytes) {
- struct msg_sender s;
-
- if(msgflg&IPC_NOWAIT) {
- err=-EAGAIN;
- goto out_unlock_free;
- }
- ss_add(msq, &s);
- msg_unlock(msqid);
- schedule();
- current->state= TASK_RUNNING;
+ do {
+ err= -EIDRM;
+ if (msg_checkid(msq,msqid))
+ break;

- msq = msg_lock(msqid);
- err = -EIDRM;
- if(msq==NULL)
- goto out_free;
- ss_del(&s);
-
- if (signal_pending(current)) {
- err=-EINTR;
- goto out_unlock_free;
- }
- goto retry;
- }
+ err=-EACCES;
+ if (ipcperms(&msq->q_perm, S_IWUGO))
+ break;

- msq->q_lspid = current->pid;
- msq->q_stime = CURRENT_TIME;
+ err = msg_do_send(&msq, msqid, msg, msgsz, msgflg & IPC_NOWAIT);

- if(!pipelined_send(msq,msg)) {
- /* noone is waiting for this message, enqueue it */
- list_add_tail(&msg->m_list,&msq->q_messages);
- msq->q_cbytes += msgsz;
- msq->q_qnum++;
- atomic_add(msgsz,&msg_bytes);
- atomic_inc(&msg_hdrs);
- }
-
- err = 0;
- msg = NULL;
+ } while (err == -EBUSY);

-out_unlock_free:
- msg_unlock(msqid);
+ if (msq)
+ msg_unlock(msqid);
out_free:
- if(msg!=NULL)
+ if (msg && err)
free_msg(msg);
return err;
}
@@ -710,127 +1396,169 @@
return SEARCH_EQUAL;
}

+static struct msg_msg *
+msg_do_receive (struct msg_queue *msq, int *msqidp, size_t msgsz,
+ long msgtyp, int mode, int msgflg)
+{
+ struct msg_receiver msr_d;
+ struct list_head *tmp;
+ struct msg_msg *msg, *found_msg;
+ int msqid = *msqidp;
+
+ for (;;) {
+ if (msq->q_flags & MSG_SYSV && ipcperms (&msq->q_perm, S_IRUGO))
+ return ERR_PTR(-EACCES);
+
+ tmp = msq->q_messages.next;
+ found_msg = NULL;
+ while (tmp != &msq->q_messages) {
+ msg = list_entry(tmp,struct msg_msg,m_list);
+ if(testmsg(msg, msgtyp, mode)) {
+ found_msg = msg;
+ if(mode == SEARCH_LESSEQUAL && msg->m_type != 1)
+ msgtyp = msg->m_type - 1;
+ else
+ break;
+ }
+ tmp = tmp->next;
+ }
+ if (found_msg) {
+ msg = found_msg;
+ if ((msgsz < msg->m_ts) && !(msgflg & MSG_NOERROR))
+ return ERR_PTR(-E2BIG);
+ list_del(&msg->m_list);
+ msq->q_qnum--;
+ msq->q_rtime = CURRENT_TIME;
+ msq->q_lrpid = current->pid;
+ msq->q_cbytes -= msg->m_ts;
+ atomic_sub(msg->m_ts,&msg_bytes);
+ atomic_dec(&msg_hdrs);
+ ss_wakeup(&msq->q_senders,0);
+ msg_unlock(msqid);
+ return msg;
+ } else {
+ struct msg_queue *t;
+ /* no message waiting. Prepare for pipelined
+ * receive.
+ */
+ if (msgflg & IPC_NOWAIT)
+ return ERR_PTR(-ENOMSG);
+ list_add_tail(&msr_d.r_list,&msq->q_receivers);
+ msr_d.r_tsk = current;
+ msr_d.r_msgtype = msgtyp;
+ msr_d.r_mode = mode;
+ if(msgflg & MSG_NOERROR)
+ msr_d.r_maxsize = INT_MAX;
+ else
+ msr_d.r_maxsize = msgsz;
+ msr_d.r_msg = ERR_PTR(-EAGAIN);
+ current->state = TASK_INTERRUPTIBLE;
+ msg_unlock(msqid);
+
+ schedule();
+ current->state = TASK_RUNNING;
+
+ msg = (struct msg_msg*) msr_d.r_msg;
+ if(!IS_ERR(msg))
+ return msg;
+
+ t = msg_lock(msqid);
+ if(t == NULL)
+ *msqidp = msqid = -1;
+ msg = (struct msg_msg*)msr_d.r_msg;
+ if(!IS_ERR(msg)) {
+ /* our message arived while we waited for
+ * the spinlock. Process it.
+ */
+ if (msqid != -1)
+ msg_unlock(msqid);
+ return msg;
+ }
+ if(PTR_ERR(msg) == -EAGAIN) {
+ if(msqid == -1)
+ BUG();
+ list_del(&msr_d.r_list);
+ if (signal_pending(current))
+ return ERR_PTR(-EINTR);
+ else
+ continue;
+ }
+ return msg;
+ }
+ }
+}
+
+static int msg_receive (struct inode *ino, struct file *filp, char *mtext,
+ size_t msgsz, long *msgtypp)
+{
+ struct msg_queue *msq;
+ struct msg_msg *msg;
+ long msgtyp;
+ int err, mode, msqid = ino->i_ino;
+
+ if (msgtypp)
+ msgtyp = *msgtypp;
+ else
+ msgtyp = -MQ_DEFAULT_TYPE;
+ mode = convert_mode(&msgtyp, 0);
+ msq = msg_lock(msqid);
+ if (!msq) BUG();
+ if (msgtypp && msgsz < msq->q_msgsize) {
+ msg_unlock(msqid);
+ return -EMSGSIZE;
+ }
+
+ msg = msg_do_receive (msq, &msqid, msgsz, msgtyp, mode,
+ (filp->f_flags & O_NONBLOCK) ? IPC_NOWAIT : 0);
+ if (!IS_ERR (msg)) {
+ msgsz = (msgsz > msg->m_ts) ? msg->m_ts : msgsz;
+ if (store_msg(mtext, msg, msgsz))
+ msgsz = -EFAULT;
+ else if (msgtypp)
+ *msgtypp = msg->m_type;
+ free_msg(msg);
+ return msgsz;
+ }
+ if (msqid != -1)
+ msg_unlock(msqid);
+ err = PTR_ERR(msg);
+ switch (err) {
+ case -ENOMSG: err = -EAGAIN; break;
+ case -E2BIG: err = -EMSGSIZE; break;
+ }
+ return err;
+}
+
asmlinkage long sys_msgrcv (int msqid, struct msgbuf *msgp, size_t msgsz,
long msgtyp, int msgflg)
{
struct msg_queue *msq;
- struct msg_receiver msr_d;
- struct list_head* tmp;
- struct msg_msg* msg, *found_msg;
- int err;
+ struct msg_msg *msg;
int mode;

if (msqid < 0 || (long) msgsz < 0)
return -EINVAL;
- mode = convert_mode(&msgtyp,msgflg);
+ mode = convert_mode(&msgtyp, msgflg);

- msq = msg_lock(msqid);
- if(msq==NULL)
+ msq = msg_lock (msqid);
+ if (msq==NULL)
+ return -EINVAL;
+ if (!(msq->q_flags & MSG_SYSV)) {
+ msg_unlock (msqid);
return -EINVAL;
-retry:
- err = -EIDRM;
- if (msg_checkid(msq,msqid))
- goto out_unlock;
-
- err=-EACCES;
- if (ipcperms (&msq->q_perm, S_IRUGO))
- goto out_unlock;
-
- tmp = msq->q_messages.next;
- found_msg=NULL;
- while (tmp != &msq->q_messages) {
- msg = list_entry(tmp,struct msg_msg,m_list);
- if(testmsg(msg,msgtyp,mode)) {
- found_msg = msg;
- if(mode == SEARCH_LESSEQUAL && msg->m_type != 1) {
- found_msg=msg;
- msgtyp=msg->m_type-1;
- } else {
- found_msg=msg;
- break;
- }
- }
- tmp = tmp->next;
}
- if(found_msg) {
- msg=found_msg;
- if ((msgsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) {
- err=-E2BIG;
- goto out_unlock;
- }
- list_del(&msg->m_list);
- msq->q_qnum--;
- msq->q_rtime = CURRENT_TIME;
- msq->q_lrpid = current->pid;
- msq->q_cbytes -= msg->m_ts;
- atomic_sub(msg->m_ts,&msg_bytes);
- atomic_dec(&msg_hdrs);
- ss_wakeup(&msq->q_senders,0);
- msg_unlock(msqid);
-out_success:
+ msg = msg_do_receive (msq, &msqid, msgsz, msgtyp, mode, msgflg);
+ if (!IS_ERR (msg)) {
msgsz = (msgsz > msg->m_ts) ? msg->m_ts : msgsz;
- if (put_user (msg->m_type, &msgp->mtype) ||
- store_msg(msgp->mtext, msg, msgsz)) {
- msgsz = -EFAULT;
- }
- free_msg(msg);
- return msgsz;
- } else
- {
- struct msg_queue *t;
- /* no message waiting. Prepare for pipelined
- * receive.
- */
- if (msgflg & IPC_NOWAIT) {
- err=-ENOMSG;
- goto out_unlock;
- }
- list_add_tail(&msr_d.r_list,&msq->q_receivers);
- msr_d.r_tsk = current;
- msr_d.r_msgtype = msgtyp;
- msr_d.r_mode = mode;
- if(msgflg & MSG_NOERROR)
- msr_d.r_maxsize = INT_MAX;
- else
- msr_d.r_maxsize = msgsz;
- msr_d.r_msg = ERR_PTR(-EAGAIN);
- current->state = TASK_INTERRUPTIBLE;
- msg_unlock(msqid);
-
- schedule();
- current->state = TASK_RUNNING;
-
- msg = (struct msg_msg*) msr_d.r_msg;
- if(!IS_ERR(msg))
- goto out_success;
-
- t = msg_lock(msqid);
- if(t==NULL)
- msqid=-1;
- msg = (struct msg_msg*)msr_d.r_msg;
- if(!IS_ERR(msg)) {
- /* our message arived while we waited for
- * the spinlock. Process it.
- */
- if(msqid!=-1)
- msg_unlock(msqid);
- goto out_success;
- }
- err = PTR_ERR(msg);
- if(err == -EAGAIN) {
- if(msqid==-1)
- BUG();
- list_del(&msr_d.r_list);
- if (signal_pending(current))
- err=-EINTR;
- else
- goto retry;
- }
- }
-out_unlock:
- if(msqid!=-1)
- msg_unlock(msqid);
- return err;
+ if (put_user (msg->m_type, &msgp->mtype) ||
+ store_msg(msgp->mtext, msg, msgsz))
+ msgsz = -EFAULT;
+ free_msg(msg);
+ return msgsz;
+ }
+ if (msqid != -1)
+ msg_unlock(msqid);
+ return PTR_ERR(msg);
}

#ifdef CONFIG_PROC_FS
@@ -841,16 +1569,16 @@
int i, len = 0;

down(&msg_ids.sem);
- len += sprintf(buffer, " key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n");
+ len += sprintf(buffer, " key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime name(POSIX)\n");

for(i = 0; i <= msg_ids.max_id; i++) {
struct msg_queue * msq;
msq = msg_lock(i);
if(msq != NULL) {
- len += sprintf(buffer + len, "%10d %10d %4o %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n",
+ len += sprintf(buffer + len, "%10d %10d %4o %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu %.*s%s\n",
msq->q_perm.key,
msg_buildid(i,msq->q_perm.seq),
- msq->q_perm.mode,
+ msq->q_flags & S_IRWXUGO,
msq->q_cbytes,
msq->q_qnum,
msq->q_lspid,
@@ -861,7 +1589,10 @@
msq->q_perm.cgid,
msq->q_stime,
msq->q_rtime,
- msq->q_ctime);
+ msq->q_ctime,
+ msq->q_namelen,
+ msq->q_name,
+ msq->q_flags & MSG_UNLK ? " (deleted)" : "");
msg_unlock(i);

pos += len;